Python CSVKitDictWriter Examples, csvkit.CSVKitDictWriter Python Examples

Example #1

0

Show file

File: california_wells.py Project: USATODAY/groundwater

def main(filename):
    raw_json = read_file(filename)
    parsed_data = parse_data(raw_json)
    writer = CSVKitDictWriter(sys.stdout, parsed_data[0].keys())
    writer.writeheader()
    for row in parsed_data:
        writer.writerow(row)

Example #2

0

Show file

    def handle(self, *args, **kwargs):
        """
        Make it happen.
        """
        super(Command, self).handle(*args, **kwargs)
        self.set_options()
        self.header("Analyzing contributors")

        self.headers = [
            'repo', 'login', 'name', 'email', 'company', 'location', 'bio',
            'avatar_url', 'contributions'
        ]
        self.outfile_path = os.path.join(self.data_dir, 'contributors.csv')
        self.outfile = CSVKitDictWriter(open(self.outfile_path, 'wb'),
                                        fieldnames=self.headers)
        self.outfile.writeheader()

        for repo in self.repo_list:
            repo_name = repo.name
            self.log(" - Sifting through %s" % repo_name)
            contributor_list = repo.get_contributors()
            for contrib in contributor_list:
                d = dict(repo=repo_name,
                         login=contrib.login,
                         name=contrib.name,
                         email=contrib.email,
                         company=contrib.company,
                         location=contrib.location,
                         bio=contrib.bio,
                         avatar_url=contrib.avatar_url,
                         contributions=contrib.contributions)
                pprint(d)
                self.outfile.writerow(d)
                time.sleep(2)

Example #3

0

Show file

File: downloadcontributors.py Project: alabarga/django-calaccess-raw-data

class Command(CalAccessCommand):
    help = 'Analyze GitHub contributors across our repositories'

    def set_options(self, *args, **kwargs):
        """
        Hook up with the GitHub API
        """
        # Set the output directory
        self.data_dir = os.path.join(
            settings.BASE_DIR,
            'network-analysis'
        )
        os.path.exists(self.data_dir) or os.mkdir(self.data_dir)
        # Get our GitHub repos
        self.gh = Github(os.getenv('GITHUB_TOKEN'))
        self.org = self.gh.get_organization("california-civic-data-coalition")
        self.repo_list = self.org.get_repos()

    def handle(self, *args, **kwargs):
        """
        Make it happen.
        """
        super(Command, self).handle(*args, **kwargs)
        self.set_options()
        self.header("Analyzing Code Rush contributors")

        self.headers = [
            'repo',
            'name',
            'company',
            'location',
            'avatar_url',
            'contributions'
        ]
        self.outfile_path = os.path.join(
            self.data_dir,
            'contributors.csv'
        )
        self.outfile = CSVKitDictWriter(
            open(self.outfile_path, 'wb'),
            fieldnames=self.headers
        )
        self.outfile.writeheader()

        for repo in self.repo_list:
            self.log(" - Sifting through %s" % repo.name)
            contributor_list = repo.get_contributors()
            for contrib in contributor_list:
                d = dict(
                    repo=repo.name,
                    name=contrib.login,
                    company=contrib.company,
                    location=contrib.location,
                    avatar_url=contrib.avatar_url,
                    contributions=contrib.contributions
                )
                self.outfile.writerow(d)
                time.sleep(1)

Example #4

0

Show file

class Command(CalAccessCommand):
    help = 'Analyze GitHub contributors across our repositories'

    def set_options(self, *args, **kwargs):
        """
        Hook up with the GitHub API
        """
        # Set the output directory
        self.data_dir = os.path.join(settings.BASE_DIR, 'network-analysis')
        os.path.exists(self.data_dir) or os.mkdir(self.data_dir)
        # Get our GitHub repos
        # self.gh = Github(os.getenv('GITHUB_TOKEN'))
        self.gh = Github(client_id=settings.SOCIAL_AUTH_GITHUB_ORG_KEY,
                         client_secret=settings.SOCIAL_AUTH_GITHUB_ORG_SECRET)
        self.org = self.gh.get_organization("california-civic-data-coalition")
        self.repo_list = self.org.get_repos()

    def handle(self, *args, **kwargs):
        """
        Make it happen.
        """
        super(Command, self).handle(*args, **kwargs)
        self.set_options()
        self.header("Analyzing contributors")

        self.headers = [
            'repo', 'login', 'name', 'email', 'company', 'location', 'bio',
            'avatar_url', 'contributions'
        ]
        self.outfile_path = os.path.join(self.data_dir, 'contributors.csv')
        self.outfile = CSVKitDictWriter(open(self.outfile_path, 'wb'),
                                        fieldnames=self.headers)
        self.outfile.writeheader()

        for repo in self.repo_list:
            repo_name = repo.name
            self.log(" - Sifting through %s" % repo_name)
            contributor_list = repo.get_contributors()
            for contrib in contributor_list:
                d = dict(repo=repo_name,
                         login=contrib.login,
                         name=contrib.name,
                         email=contrib.email,
                         company=contrib.company,
                         location=contrib.location,
                         bio=contrib.bio,
                         avatar_url=contrib.avatar_url,
                         contributions=contrib.contributions)
                pprint(d)
                self.outfile.writerow(d)
                time.sleep(2)

Example #5

0

Show file

File: downloadcontributors.py Project: california-civic-data-coalition/django-calaccess-raw-data

    def handle(self, *args, **kwargs):
        """
        Make it happen.
        """
        super(Command, self).handle(*args, **kwargs)
        self.set_options()
        self.header("Analyzing contributors")

        self.headers = [
            'repo',
            'login',
            'name',
            'email',
            'company',
            'location',
            'bio',
            'avatar_url',
            'contributions'
        ]
        self.outfile_path = os.path.join(
            self.data_dir,
            'contributors.csv'
        )
        self.outfile = CSVKitDictWriter(
            open(self.outfile_path, 'wb'),
            fieldnames=self.headers
        )
        self.outfile.writeheader()

        for repo in self.repo_list:
            repo_name = repo.name
            self.log(" - Sifting through %s" % repo_name)
            contributor_list = repo.get_contributors()
            for contrib in contributor_list:
                d = dict(
                    repo=repo_name,
                    login=contrib.login,
                    name=contrib.name,
                    email=contrib.email,
                    company=contrib.company,
                    location=contrib.location,
                    bio=contrib.bio,
                    avatar_url=contrib.avatar_url,
                    contributions=contrib.contributions
                )
                pprint(d)
                self.outfile.writerow(d)
                time.sleep(2)

Example #6

0

Show file

File: html_to_csv.py Project: texastribune/the-dp

def html_to_csv(path):
    header, data = parse_html(path)

    # Create stdout writer
    header.insert(2, 'System')
    writer = CSVKitDictWriter(sys.stdout, header)
    writer.writeheader()

    # Parse ID and system before writing each row
    for row in data:
        id_field = row.pop('FICE')
        if not id_field:
            continue

        # Parse out System that may be in parenthesis in the FICE field
        fice_parts = id_field.split(' (')
        row['FICE'] = fice_parts[0]
        if len(fice_parts) > 1:
            row['System'] = fice_parts[1].strip(') ')
        else:
            row['System'] = None

        writer.writerow(row)

Example #7

0

Show file

File: aquifer_clean.py Project: USATODAY/groundwater

def writefile(parsed_data):
    writer = CSVKitDictWriter(sys.stdout, parsed_data[0].keys())
    writer.writeheader()
    for row in parsed_data:
        writer.writerow(row)