def handle(self, *args, **kwargs): """ Make it happen. """ super(Command, self).handle(*args, **kwargs) self.set_options() self.header("Analyzing contributors") self.headers = [ 'repo', 'login', 'name', 'email', 'company', 'location', 'bio', 'avatar_url', 'contributions' ] self.outfile_path = os.path.join(self.data_dir, 'contributors.csv') self.outfile = CSVKitDictWriter(open(self.outfile_path, 'wb'), fieldnames=self.headers) self.outfile.writeheader() for repo in self.repo_list: repo_name = repo.name self.log(" - Sifting through %s" % repo_name) contributor_list = repo.get_contributors() for contrib in contributor_list: d = dict(repo=repo_name, login=contrib.login, name=contrib.name, email=contrib.email, company=contrib.company, location=contrib.location, bio=contrib.bio, avatar_url=contrib.avatar_url, contributions=contrib.contributions) pprint(d) self.outfile.writerow(d) time.sleep(2)
def html_to_csv(path): header, data = parse_html(path) # Create stdout writer header.insert(2, 'System') writer = CSVKitDictWriter(sys.stdout, header) writer.writeheader() # Parse ID and system before writing each row for row in data: id_field = row.pop('FICE') if not id_field: continue # Parse out System that may be in parenthesis in the FICE field fice_parts = id_field.split(' (') row['FICE'] = fice_parts[0] if len(fice_parts) > 1: row['System'] = fice_parts[1].strip(') ') else: row['System'] = None writer.writerow(row)