def main(filename): raw_json = read_file(filename) parsed_data = parse_data(raw_json) writer = CSVKitDictWriter(sys.stdout, parsed_data[0].keys()) writer.writeheader() for row in parsed_data: writer.writerow(row)
class Command(CalAccessCommand): help = 'Analyze GitHub contributors across our repositories' def set_options(self, *args, **kwargs): """ Hook up with the GitHub API """ # Set the output directory self.data_dir = os.path.join( settings.BASE_DIR, 'network-analysis' ) os.path.exists(self.data_dir) or os.mkdir(self.data_dir) # Get our GitHub repos self.gh = Github(os.getenv('GITHUB_TOKEN')) self.org = self.gh.get_organization("california-civic-data-coalition") self.repo_list = self.org.get_repos() def handle(self, *args, **kwargs): """ Make it happen. """ super(Command, self).handle(*args, **kwargs) self.set_options() self.header("Analyzing Code Rush contributors") self.headers = [ 'repo', 'name', 'company', 'location', 'avatar_url', 'contributions' ] self.outfile_path = os.path.join( self.data_dir, 'contributors.csv' ) self.outfile = CSVKitDictWriter( open(self.outfile_path, 'wb'), fieldnames=self.headers ) self.outfile.writeheader() for repo in self.repo_list: self.log(" - Sifting through %s" % repo.name) contributor_list = repo.get_contributors() for contrib in contributor_list: d = dict( repo=repo.name, name=contrib.login, company=contrib.company, location=contrib.location, avatar_url=contrib.avatar_url, contributions=contrib.contributions ) self.outfile.writerow(d) time.sleep(1)
class Command(CalAccessCommand): help = 'Analyze GitHub contributors across our repositories' def set_options(self, *args, **kwargs): """ Hook up with the GitHub API """ # Set the output directory self.data_dir = os.path.join(settings.BASE_DIR, 'network-analysis') os.path.exists(self.data_dir) or os.mkdir(self.data_dir) # Get our GitHub repos # self.gh = Github(os.getenv('GITHUB_TOKEN')) self.gh = Github(client_id=settings.SOCIAL_AUTH_GITHUB_ORG_KEY, client_secret=settings.SOCIAL_AUTH_GITHUB_ORG_SECRET) self.org = self.gh.get_organization("california-civic-data-coalition") self.repo_list = self.org.get_repos() def handle(self, *args, **kwargs): """ Make it happen. """ super(Command, self).handle(*args, **kwargs) self.set_options() self.header("Analyzing contributors") self.headers = [ 'repo', 'login', 'name', 'email', 'company', 'location', 'bio', 'avatar_url', 'contributions' ] self.outfile_path = os.path.join(self.data_dir, 'contributors.csv') self.outfile = CSVKitDictWriter(open(self.outfile_path, 'wb'), fieldnames=self.headers) self.outfile.writeheader() for repo in self.repo_list: repo_name = repo.name self.log(" - Sifting through %s" % repo_name) contributor_list = repo.get_contributors() for contrib in contributor_list: d = dict(repo=repo_name, login=contrib.login, name=contrib.name, email=contrib.email, company=contrib.company, location=contrib.location, bio=contrib.bio, avatar_url=contrib.avatar_url, contributions=contrib.contributions) pprint(d) self.outfile.writerow(d) time.sleep(2)
def html_to_csv(path): header, data = parse_html(path) # Create stdout writer header.insert(2, 'System') writer = CSVKitDictWriter(sys.stdout, header) writer.writeheader() # Parse ID and system before writing each row for row in data: id_field = row.pop('FICE') if not id_field: continue # Parse out System that may be in parenthesis in the FICE field fice_parts = id_field.split(' (') row['FICE'] = fice_parts[0] if len(fice_parts) > 1: row['System'] = fice_parts[1].strip(') ') else: row['System'] = None writer.writerow(row)
def writefile(parsed_data): writer = CSVKitDictWriter(sys.stdout, parsed_data[0].keys()) writer.writeheader() for row in parsed_data: writer.writerow(row)