def test_reader(self): reader = csvkit.CSVKitDictReader(self.f) self.assertEqual(reader.next(), { u'a': u'1', u'b': u'2', u'c': u'3' })
def __iter__(self): if not hasattr(self, '_feature_count'): self.feature_counter = 0 if self.filename: self.csv = csvkit.CSVKitDictReader(open(self.filename, 'r'), self.fieldnames, dialect=self.dialect) if self.skip_header: self.csv.next() return self
def main(argv): try: raw_in = open(argv[1], 'rb') except IndexError: print "usage: %s csvfile" % (argv[0]) return 1 except IOError as e: print "Error opening %s: %s" % (argv[1], e.strerror) return 1 csv_in = csvkit.CSVKitDictReader(raw_in) for record in csv_in: print "%s: %s" % (record['docket'], record['NRC Reactor Unit Web Page']) r = load_reactor(record) print "-> saved as %d" % (r.id)
import os import csvkit # Muda para o diretorio data #os.chdir("..") # Carrega o csv do munc2011 f_read = open("wp_munic2011.csv", "r") # Prepara lista de posts f_write = open("wp_munic2011_posts.csv", "w") reader = csvkit.CSVKitDictReader(f_read) posts = [] for line in reader: f_post = line f_post["id"] = '' f_post["post_title"] = f_post["wpcf-a570"] + " - " + f_post["wpcf-a569"] f_post["post_type"] = "municipio" f_post["post_status"] = "publish" f_post["comment_status"] = "open" f_post["post_author"] = f_post["ibge"] f_post["lat"] = f_post["lat"].replace(",",".") f_post["lng"] = f_post["lng"].replace(",",".") posts.append(f_post) writer = csvkit.CSVKitDictWriter(f_write, posts[0].keys()) writer.writeheader() writer.writerows(posts) f_write.close()
def run(self): """ Run the loader and output summary. """ print 'Loading organization names' self.load_organization_name_lookup() print 'Loading legislator demographics' self.load_legislators() print '' for year in range(self.first_year, datetime.datetime.today().year + 1): # We're always two months behind, so we won't have current year data until March if year == datetime.datetime.today().year: if datetime.datetime.today().month < 3: continue print year print '----' print '' print 'Loading individual expenditures' path = '%s/%s_individual.csv' % (app_config.LOBBYING_DATA_PATH, year) with open(path) as f: table = list(csvkit.CSVKitDictReader(f)) self.load_individual_expenditures(year, table, False) print 'Loading solicitation expenditures' path = '%s/%s_solicitation.csv' % (app_config.LOBBYING_DATA_PATH, year) with open(path) as f: table = list(csvkit.CSVKitDictReader(f)) self.load_individual_expenditures(year, table, True) print 'Loading group expenditures' path = '%s/%s_group.csv' % (app_config.LOBBYING_DATA_PATH, year) with open(path) as f: table = list(csvkit.CSVKitDictReader(f)) self.load_group_expenditures(year, table) print '' if self.warnings: print 'WARNINGS' print '--------' for warning in self.warnings: print warning print '' if self.errors: print 'ERRORS' print '------' for error in self.errors: print error print '' # return print 'Removing %i amended IDs' % self.amended_rows removed = 0 for expenditure in self.expenditures: if expenditure.is_solicitation: if expenditure.ethics_id in self.amendments['solicitation']: removed += 1 continue elif expenditure.group: if expenditure.ethics_id in self.amendments['group']: removed += 1 continue else: if expenditure.ethics_id in self.amendments['individual']: removed += 1 continue expenditure.save() print 'Removed %i rows' % removed print '' print 'SUMMARY' print '-------' print 'Processed %i individual rows' % self.individual_rows print 'Processed %i group rows' % self.group_rows print '' print 'Encountered %i warnings' % len(self.warnings) print 'Encountered %i errors' % len(self.errors) print '' print 'Imported %i expenditures' % len(self.expenditures) print 'Created %i lobbyists' % self.lobbyists_created print 'Created %i legislators' % self.legislators_created
def load_legislators(self): """ Load legislator demographics. """ VALID_OFFICES = ['Representative', 'Senator'] VALID_PARTIES = ['Republican', 'Democratic'] with open(self.legislators_demographics_filename) as f: reader = csvkit.CSVKitDictReader(f) rows = list(reader) i = 0 for row in rows: i += 1 for k in row: row[k] = row[k].strip() # Process vacant seats if row['last_name'].upper() == 'VACANT': Legislator.create(first_name='', last_name='', office=office, district=row['district'], party='', ethics_name='', phone='', year_elected=0, hometown='', vacant=True, photo_filename='') self.legislators_created += 1 continue office = row['office'] if office not in VALID_OFFICES: self.warn('Not a valid office: "%s"' % (office), year, i) party = row['party'] if not party: self.error( 'No party affiliation for "%s": "%s"' % (office, row['ethics_name']), year, i) elif party not in VALID_PARTIES: self.warn('Unknown party name: "%s"' % (party), year, i) year_elected = row['year_elected'] if year_elected: year_elected = int(year_elected) else: self.error( 'No year elected for "%s": "%s"' % (office, row['ethics_name']), year, i) year_elected = None legislator = Legislator(first_name=row['first_name'], last_name=row['last_name'], office=office, district=row['district'], party=party, ethics_name=row['ethics_name'], phone=row['phone'], year_elected=year_elected, hometown=row['hometown'], vacant=False, photo_filename=row['photo']) legislator.save() if not os.path.exists('www/%s' % legislator.mugshot_url()): self.error('No mugshot for legislator: %s' % legislator.display_name()) self.legislators_created += 1
def process_csv(self, filename): ''' Here we have a CSV file that we need to process... ''' try: with open(filename, 'r') as csvfile: data = '{0}{1}'.format(csvfile.readline(), csvfile.readline()) logger.debug('First 2 lines of data data is %s', data) self.dialect = csvkit.sniffer.sniff_dialect(data) logger.debug('Dialect is %s', self.dialect) if self.dialect: self.filename = filename else: logger.warn( 'Unable to determine dialect in use for CSV file (%s)', filename) except Exception as e: logger.warn('Found a CSV file (%s) with an invalid format: %s', filename, e) if self.filename: reader = csvkit.CSVKitDictReader(open(self.filename, 'r'), self.fieldnames, dialect=self.dialect) if self.skip_header: reader.next() self._fieldnames = reader.fieldnames # Here we will gather each column of values in the input CSV # to figure out what the data type is for each, so we can # properly generate the database, etc. valuelists = collections.defaultdict(list) self._fields = [] for row in reader: for f in self._fieldnames: valuelists[f].append(row[f]) for f in self._fieldnames: type, valuelists[f] = normalize_column_type( valuelists[f], blanks_as_nulls=False) self._fields.append(( f, type, )) latitude_field_candidates = ['latitude', 'lat'] longitude_field_candidates = ['longitude', 'long', 'lon'] lat = long = False # case-insensitive check to see if lat/long is in the resulting # fields from the data. # Now that we have the types for the fields, also ensure that the # field we are considering for lat/long is a float or int field, # otherwise it won't work as a lat/long value (even int is questionable..) # # Since we also have the full range of values, we can also check to see if # they are within the acceptable range... for field in latitude_field_candidates: for this_field, field_type in self._fields: if field == this_field.lower() and field_type in (int, float) and \ min(valuelists[this_field]) >= -90 and max(valuelists[this_field]) <= 90 : lat = this_field break for field in longitude_field_candidates: for this_field, field_type in self._fields: if field == this_field.lower() and field_type in (int, float) and \ min(valuelists[this_field]) >= -180 and max(valuelists[this_field]) <= 180 : long = this_field break if lat and long: # Here it is assumed we have geo-data, so we will # convert it to a GIS format and then handle it as such # going forward. # self._fields.remove(lat) # self._fields.remove(long) self.latitude_field = lat self.longitude_field = long self.spatial = True self.spatial_type = ogr.wkbPoint # We assume this based on the lat/long values we validate against. self.srid = 4326 srs = osr.SpatialReference() epsg = str('EPSG:%s' % (self.srid, )) srs.SetFromUserInput(epsg) self.srs = srs.ExportToWkt()