Python CSVKitDictReader Examples, csvkit.CSVKitDictReader Python Examples

Example #1

0

Show file

File: test_csvkit.py Project: xtmgah/csvkit

    def test_reader(self):
        reader = csvkit.CSVKitDictReader(self.f)

        self.assertEqual(reader.next(), {
            u'a': u'1',
            u'b': u'2',
            u'c': u'3'
        })

Example #2

0

Show file

 def __iter__(self):
     if not hasattr(self, '_feature_count'):
         self.feature_counter = 0
     if self.filename:
         self.csv = csvkit.CSVKitDictReader(open(self.filename, 'r'),
                                            self.fieldnames,
                                            dialect=self.dialect)
         if self.skip_header:
             self.csv.next()
     return self

Example #3

0

Show file

def main(argv):
    try:
        raw_in = open(argv[1], 'rb')
    except IndexError:
        print "usage: %s csvfile" % (argv[0])
        return 1
    except IOError as e:
        print "Error opening %s: %s" % (argv[1], e.strerror)
        return 1
    csv_in = csvkit.CSVKitDictReader(raw_in)
    for record in csv_in:
        print "%s: %s" % (record['docket'],
                          record['NRC Reactor Unit Web Page'])
        r = load_reactor(record)
        print "-> saved as %d" % (r.id)

Example #4

0

Show file

File: wp-csv.py Project: daniela509/wpdeolhonosplanos

import os

import csvkit

# Muda para o diretorio data
#os.chdir("..")

# Carrega o csv do munc2011
f_read = open("wp_munic2011.csv", "r")

# Prepara lista de posts
f_write = open("wp_munic2011_posts.csv", "w")
reader = csvkit.CSVKitDictReader(f_read)
posts = []
for line in reader:
	f_post = line
	f_post["id"] = ''
	f_post["post_title"] = f_post["wpcf-a570"] + " - " + f_post["wpcf-a569"]
	f_post["post_type"] = "municipio"
	f_post["post_status"] = "publish"
	f_post["comment_status"] = "open"
	f_post["post_author"] = f_post["ibge"]
	f_post["lat"] = f_post["lat"].replace(",",".")
	f_post["lng"] = f_post["lng"].replace(",",".")
	posts.append(f_post)

writer = csvkit.CSVKitDictWriter(f_write, posts[0].keys())
writer.writeheader()
writer.writerows(posts)
f_write.close()

Example #5

0

Show file

    def run(self):
        """
        Run the loader and output summary.
        """
        print 'Loading organization names'
        self.load_organization_name_lookup()

        print 'Loading legislator demographics'
        self.load_legislators()

        print ''

        for year in range(self.first_year, datetime.datetime.today().year + 1):
            # We're always two months behind, so we won't have current year data until March
            if year == datetime.datetime.today().year:
                if datetime.datetime.today().month < 3:
                    continue

            print year
            print '----'
            print ''

            print 'Loading individual expenditures'
            path = '%s/%s_individual.csv' % (app_config.LOBBYING_DATA_PATH,
                                             year)

            with open(path) as f:
                table = list(csvkit.CSVKitDictReader(f))

            self.load_individual_expenditures(year, table, False)

            print 'Loading solicitation expenditures'
            path = '%s/%s_solicitation.csv' % (app_config.LOBBYING_DATA_PATH,
                                               year)

            with open(path) as f:
                table = list(csvkit.CSVKitDictReader(f))

            self.load_individual_expenditures(year, table, True)

            print 'Loading group expenditures'
            path = '%s/%s_group.csv' % (app_config.LOBBYING_DATA_PATH, year)

            with open(path) as f:
                table = list(csvkit.CSVKitDictReader(f))

            self.load_group_expenditures(year, table)

            print ''

        if self.warnings:
            print 'WARNINGS'
            print '--------'

            for warning in self.warnings:
                print warning

            print ''

        if self.errors:
            print 'ERRORS'
            print '------'

            for error in self.errors:
                print error

            print ''

            # return

        print 'Removing %i amended IDs' % self.amended_rows

        removed = 0

        for expenditure in self.expenditures:
            if expenditure.is_solicitation:
                if expenditure.ethics_id in self.amendments['solicitation']:
                    removed += 1
                    continue
            elif expenditure.group:
                if expenditure.ethics_id in self.amendments['group']:
                    removed += 1
                    continue
            else:
                if expenditure.ethics_id in self.amendments['individual']:
                    removed += 1
                    continue

            expenditure.save()

        print 'Removed %i rows' % removed
        print ''

        print 'SUMMARY'
        print '-------'

        print 'Processed %i individual rows' % self.individual_rows
        print 'Processed %i group rows' % self.group_rows
        print ''
        print 'Encountered %i warnings' % len(self.warnings)
        print 'Encountered %i errors' % len(self.errors)
        print ''
        print 'Imported %i expenditures' % len(self.expenditures)
        print 'Created %i lobbyists' % self.lobbyists_created
        print 'Created %i legislators' % self.legislators_created

Example #6

0

Show file

    def load_legislators(self):
        """
        Load legislator demographics.
        """
        VALID_OFFICES = ['Representative', 'Senator']
        VALID_PARTIES = ['Republican', 'Democratic']

        with open(self.legislators_demographics_filename) as f:
            reader = csvkit.CSVKitDictReader(f)
            rows = list(reader)

        i = 0

        for row in rows:
            i += 1

            for k in row:
                row[k] = row[k].strip()

            # Process vacant seats
            if row['last_name'].upper() == 'VACANT':
                Legislator.create(first_name='',
                                  last_name='',
                                  office=office,
                                  district=row['district'],
                                  party='',
                                  ethics_name='',
                                  phone='',
                                  year_elected=0,
                                  hometown='',
                                  vacant=True,
                                  photo_filename='')

                self.legislators_created += 1

                continue

            office = row['office']

            if office not in VALID_OFFICES:
                self.warn('Not a valid office: "%s"' % (office), year, i)

            party = row['party']

            if not party:
                self.error(
                    'No party affiliation for "%s": "%s"' %
                    (office, row['ethics_name']), year, i)
            elif party not in VALID_PARTIES:
                self.warn('Unknown party name: "%s"' % (party), year, i)

            year_elected = row['year_elected']

            if year_elected:
                year_elected = int(year_elected)
            else:
                self.error(
                    'No year elected for "%s": "%s"' %
                    (office, row['ethics_name']), year, i)
                year_elected = None

            legislator = Legislator(first_name=row['first_name'],
                                    last_name=row['last_name'],
                                    office=office,
                                    district=row['district'],
                                    party=party,
                                    ethics_name=row['ethics_name'],
                                    phone=row['phone'],
                                    year_elected=year_elected,
                                    hometown=row['hometown'],
                                    vacant=False,
                                    photo_filename=row['photo'])

            legislator.save()

            if not os.path.exists('www/%s' % legislator.mugshot_url()):
                self.error('No mugshot for legislator: %s' %
                           legislator.display_name())

            self.legislators_created += 1

Example #7

0

Show file

    def process_csv(self, filename):
        '''
        Here we have a CSV file that we need to process...
        '''
        try:
            with open(filename, 'r') as csvfile:
                data = '{0}{1}'.format(csvfile.readline(), csvfile.readline())
            logger.debug('First 2 lines of data data is %s', data)
            self.dialect = csvkit.sniffer.sniff_dialect(data)
            logger.debug('Dialect is %s', self.dialect)
            if self.dialect:
                self.filename = filename
            else:
                logger.warn(
                    'Unable to determine dialect in use for CSV file (%s)',
                    filename)
        except Exception as e:
            logger.warn('Found a CSV file (%s) with an invalid format: %s',
                        filename, e)
        if self.filename:
            reader = csvkit.CSVKitDictReader(open(self.filename, 'r'),
                                             self.fieldnames,
                                             dialect=self.dialect)
            if self.skip_header:
                reader.next()
            self._fieldnames = reader.fieldnames
            # Here we will gather each column of values in the input CSV
            # to figure out what the data type is for each, so we can
            # properly generate the database, etc.
            valuelists = collections.defaultdict(list)
            self._fields = []
            for row in reader:
                for f in self._fieldnames:
                    valuelists[f].append(row[f])
            for f in self._fieldnames:
                type, valuelists[f] = normalize_column_type(
                    valuelists[f], blanks_as_nulls=False)
                self._fields.append((
                    f,
                    type,
                ))

            latitude_field_candidates = ['latitude', 'lat']
            longitude_field_candidates = ['longitude', 'long', 'lon']
            lat = long = False

            # case-insensitive check to see if lat/long is in the resulting
            # fields from the data.
            # Now that we have the types for the fields, also ensure that the
            # field we are considering for lat/long is a float or int field,
            # otherwise it won't work as a lat/long value (even int is questionable..)
            #
            # Since we also have the full range of values, we can also check to see if
            # they are within the acceptable range...
            for field in latitude_field_candidates:
                for this_field, field_type in self._fields:
                    if field == this_field.lower() and field_type in (int, float) and \
                       min(valuelists[this_field]) >= -90 and max(valuelists[this_field]) <= 90 :
                        lat = this_field
                        break
            for field in longitude_field_candidates:
                for this_field, field_type in self._fields:
                    if field == this_field.lower() and field_type in (int, float) and \
                       min(valuelists[this_field]) >= -180 and max(valuelists[this_field]) <= 180 :
                        long = this_field
                        break

            if lat and long:
                # Here it is assumed we have geo-data, so we will
                # convert it to a GIS format and then handle it as such
                # going forward.
                #                 self._fields.remove(lat)
                #                 self._fields.remove(long)
                self.latitude_field = lat
                self.longitude_field = long
                self.spatial = True
                self.spatial_type = ogr.wkbPoint
                # We assume this based on the lat/long values we validate against.
                self.srid = 4326
                srs = osr.SpatialReference()
                epsg = str('EPSG:%s' % (self.srid, ))
                srs.SetFromUserInput(epsg)
                self.srs = srs.ExportToWkt()