def generateDoacao(arquivo): '''Utiliza os arquivos ReceitaCand.txt das Prestações de Contas de 2010 http://agencia.tse.jus.br/estatistica/sead/odsele/prestacao_contas/prestacao_contas_2010.zip ''' doacoes_raw = open(arquivo, 'r') doacoes_raw = csvkit.DictReader(doacoes_raw, encoding='iso-8859-1', delimiter=';') r = {} for d in doacoes_raw: _id = d['CPF do candidato'] if not r.has_key(_id): r[_id] = { '_id': _id, 'nome': d['Nome candidato'], 'numero': d[u'Número candidato'], 'partido': d['Sigla Partido'], 'uf': d['UF'], 'doacoes': {}, 'total': 0 } r[_id]['total'] += float(d['Valor receita'].replace(',', '.')) if not r[_id]['doacoes'].has_key(d['CPF/CNPJ do doador']): r[_id]['doacoes'][d['CPF/CNPJ do doador']] = { 'nome': d['Nome do doador'], 'valor': float(d['Valor receita'].replace(',', '.')) } else: r[_id]['doacoes'][d['CPF/CNPJ do doador']]['valor'] += float( d['Valor receita'].replace(',', '.')) print 'Saving...' mongo_save(r)
def create_by_creationdate_small(collection, filters): tag_date = defaultdict(lambda: 0) with open(collection, 'r') as f: reader = csvkit.DictReader(f) for row in reader: c_date = row['Year creation'] c_tag = row['tag_thema'].split(', ') if not c_date.isdigit() or int(c_date) < 1868: pass else: if len(c_tag) > 1: for t in c_tag: if t in filters: tag_date[(t, c_date)] += 1 else: if c_tag[0] in filters: tag_date[(c_tag[0], c_date)] += 1 # Write output file with open('theme_creationDate_small.csv', 'w') as f: writer = csvkit.DictWriter( f, fieldnames=['Theme', 'Year creation', 'Weight']) writer.writeheader() for k in tag_date: output_row = { 'Theme': k[0], 'Year creation': '01/01/%s' % k[1], 'Weight': tag_date[k] } writer.writerow(output_row)
def _open_data(self, in_file): _regions = {} with open(in_file) as csvfile: reader = csv.DictReader(csvfile) for row in reader: _regions[row[self.key]] = row return _regions
def test_reader_alias(self): reader = csvkit.DictReader(self.f) self.assertEqual(reader.next(), { u'a': u'1', u'b': u'2', u'c': u'3' })
def iterate_and_parse_files(sub_directory): """ Merge all csv files in folder to list of dicts """ data = DictList() folder = os.getcwd() + "/" + sub_directory for file_name in os.listdir(folder): reader = csv.DictReader(open(folder + file_name)) data += list(reader) return data
def merge_csv_files(filelist): """ Pass a list of csv files and merge to DictList """ data = tables.DictList() for file_name in filelist: reader = csv.DictReader(open(file_name)) data += list(reader) return data
def get_artist_names(authors, collection): ''' ''' id_names_map = defaultdict(lambda: '') with open(authors, 'r') as f: reader = csvkit.DictReader(f) for row in reader: id_names_map[row['Id artist']] = row['name'] return id_names_map
def importaPrestacoes2014(arquivo, mugshot): from pymongo import MongoClient client = MongoClient() db = client.verdinha col = db.politicos raw = open(arquivo, 'r') doacoes_raw = csvkit.DictReader(raw, encoding='iso-8859-1', delimiter=';') c = doacoes_raw.next() if c.has_key('prestacao'): #print arquivo + " nao entregue" return None c['Nome do Candidato'] = unidecode.unidecode(c['Nome do Candidato']) p = col.find_one({'nome' : c['Nome do Candidato']}) if p: raw.seek(0) #rewind! doacoes_raw = csvkit.DictReader(raw, encoding='iso-8859-1', delimiter=';') for d in doacoes_raw: #p['mugshot'] = mugshot if not p['candidaturas'].has_key('2014'): p['candidaturas']['2014'] = { 'ano' : 2014, 'cargo' : d['Candidatura'], 'situacao' : 'Candidato', 'numero' : d[u'Número do Candidato'], 'partido' : d['Partido'], 'uf' : estados[d['Unidade Eleitoral']], 'doacoes' : {}, 'total' : 0 } p['candidaturas']['2014']['total'] += float(d['Valor R$'].strip('R$ ').strip('\.').replace('.','').replace(',','.')) cnpj_id = d['CPF/CNPJ'].replace('/','').replace('-','').replace('.','') if not p['candidaturas']['2014']['doacoes'].has_key(cnpj_id): p['candidaturas']['2014']['doacoes'][cnpj_id] = { 'nome' : d['Doador'], 'cnpj' : d['CPF/CNPJ'], 'valor' : float(d['Valor R$'].strip('R$ ').strip('\.').replace('.','').replace(',','.')) } #todo doador originario else: p['candidaturas']['2014']['doacoes'][cnpj_id]['valor'] += float(d['Valor R$'].strip('R$ ').strip('\.').replace('.','').replace(',','.')) col.update({'_id' : p['_id']}, p, upsert=True)
def keyword_list(collection): ''' Returns the list of theme keywords used in a collecion ''' keywords = set() with open(collection, 'r') as f: reader = csvkit.DictReader(f) for row in reader: keywords.update(row['themas'].split(', ')) return list(keywords)
def get_tags_from(gephi_csv): ''' Returns the tags from gephi.csv ''' tags = set() with open(gephi_csv, 'r') as f: reader = csvkit.DictReader(f) for row in reader: tags.update(row['tag']) return list(tags)
def load_forms(): """ Load all the FilingForm objects from the source CSV. """ this_dir = os.path.dirname(__file__) # Read in forms form_path = os.path.join(this_dir, 'forms.csv') with open(form_path, 'r') as form_obj: form_reader = csvkit.DictReader(form_obj) form_list = [FilingForm(**row) for row in form_reader] # Read in sections section_path = os.path.join(this_dir, 'sections.csv') with open(section_path, 'r') as section_obj: section_reader = csvkit.DictReader(section_obj) for section in section_reader: form = next((x for x in form_list if x.id == section['form_id'])) form.add_section(**section) # Pass it out return form_list
def handle_label(self, crs_filename, **options): verbosity = options['verbosity'] if verbosity == '0': self.logger.setLevel(logging.ERROR) elif verbosity == '1': self.logger.setLevel(logging.WARNING) elif verbosity == '2': self.logger.setLevel(logging.INFO) elif verbosity == '3': self.logger.setLevel(logging.DEBUG) # Gli argomenti forniti sono i nomi dei file CRS da lavorare self.logger.info(u"Start import %s" % YEAR) start_time = time.time() i = 0 if options.get('clean') and self.delete_projects(): raise CommandError("Import aborted") self.all_codelists = dict([(cl.code_list, dict(cl.objects.values_list('code', 'pk'))) for cl in codelist_models.CODE_LISTS]) rows = projects = activities = 0 set_autocommit(False) try: with open(crs_filename, 'r') as crs_file: for rows, activity in enumerate(csvkit.DictReader(crs_file), start=1): activity, new_project = self.load_activity(activity, rows) if activity: activities += 1 self.logger.debug("Imported row: %d" % (activities)) if new_project: projects += 1 if rows % 50 == 0: commit() except KeyboardInterrupt: commit() self.logger.critical("Command execution aborted.") finally: self.logger.info("Total projects: %d" % projects) self.logger.info("Total activities: %d" % activities) self.logger.info("Total rows: %d" % rows) self.logger.info("Execution time: %d seconds" % (time.time() - start_time)) commit() self.logger.info(u"Finish import %s" % YEAR)
def import_flows(filename,imp_exp,c): with open(filename) as f: importscsvs=csvkit.DictReader(f) for line in importscsvs: year=line["year"] for reporting,flow in line.iteritems(): if flow !="": try: flow=float(flow.replace(",",".")) except : print year,reporting,"'%s'"%flow continue # remove 0 values if reporting!="year" and flow!=0.0: data=["FEDERICO-TENA",flow,"1000000","sterling pound",int(year),reporting,"World Federico-Tena",imp_exp,"gen","total_federicotena"] c.execute("INSERT INTO flows (source, flow, unit, currency, year, reporting, partner, export_import, special_general, world_trade_type) VALUES (?,?,?,?,?,?,?,?,?,?)",data)
def _categories_from_file(self): """ Get categories from cached file """ categories = {} file_path = os.path.join("vantetider/data", self.dataset.id, self.id + ".csv") with open(file_path) as f: for row in csv.DictReader(f, encoding="utf-8"): cat = Category(row["id"], label=row["label"]) for attr, value in row.iteritems(): if attr not in ["id", "label"]: setattr(cat, attr, value) categories[cat.id] = cat return categories
def import_fredericotena(c): FT_PATH = "in_data/FredericoTena" ENTITIES_CSV = "FredericoTena_entities.csv" IMPORTS_CSV = "FredericoTena_imports.csv" EXPORTS_CSV = "FredericoTena_exports.csv" # create source done source_id="FEDERICO-TENA" source_authors="Federico G. & A. Tena-Junguito" source_type="estimation" source_edition_year="2016" source_url="http://www.ehes.org/EHES_93.pdf" source_title="World trade, 1800-1938: a new data-set, EHES Working Paper 93" c.execute("INSERT INTO source_types (acronym,reference,type,author,URL) VALUES (?,?,?,?,?)",(source_id,source_title,source_type,source_authors,source_url)) c.execute("INSERT INTO sources (slug,acronym,name,edition_date) VALUES (?,?,?,?)",(source_id,source_id,source_title,source_edition_year)) print "created FT source" # read entities ricslug=lambda _: re.sub("[ ()/]","",re.sub("&","_",_)) with open(os.path.join(FT_PATH,ENTITIES_CSV)) as f: entitiescsv=csvkit.DictReader(f) for entity in entitiescsv: if entity["new"]!="": # create new entities print "inserting new entity %s"%entity["ricname"] # todo add continent c.execute("INSERT OR IGNORE INTO RICentities (RICname,type,continent,COW_code,slug) VALUES (?,?,?,?,?)",(entity["ricname"],entity["rictype"],"?",entity["cow"],ricslug(entity["ricname"]))) # todo check for the group c.execute("INSERT OR IGNORE INTO entity_names (original_name,RICname) VALUES (?,?) ",(entity["Polity Federico-Tena"],entity["ricname"])) # add World Frederico Tena entity c.execute("INSERT OR IGNORE INTO entity_names (original_name,RICname) VALUES (?,?) ",("World Federico-Tena","World Federico-Tena")) c.execute("""INSERT OR IGNORE INTO RICentities (RICname,type,continent,slug) VALUES ("World Federico-Tena","geographical_area","World", "WorldFedericoTena")""") # read import import_flows(os.path.join(FT_PATH,IMPORTS_CSV),"imp",c) # read export import_flows(os.path.join(FT_PATH,EXPORTS_CSV),"exp",c)
def handle_label(self, crs_file, **options): start_time = time.time() i = 0 translations = 0 field = options['field'] languages = [lang[0].split('-')[0] for lang in settings.LANGUAGES] if options['lang']: if options['lang'] not in languages: raise CommandError("Invalid language code '%s'. Try: %s" % (options['lang'], ', '.join(languages))) languages = [ options['lang'], ] self.stdout.write('FIELD: %s' % field) self.stdout.write('LANGUAGES: %s' % languages) with open(crs_file, 'r') as crs_file: rows = csvkit.DictReader(crs_file, encoding='utf-8') for i, row in enumerate(rows, start=1): updates, matches = self.translate(row, field, languages, override=options['override']) if matches == 0: self.stdout.write( "\rRow %d non corrisponde a nessuna Activity" % (i)) else: self.stdout.write("\r%s: Translated activities %d " % (i, updates), ending='') self.stdout.flush() translations += updates self.stdout.write("\nTotal rows: %d" % i) self.stdout.write("Execution time: %d seconds" % (time.time() - start_time))
def import_flows(filename,imp_exp,c,ft_entities,ft_rates,ft_source): with open(filename) as f: importscsvs=csvkit.DictReader(f) for line in importscsvs: year=line["year"] for reporting,flow in line.iteritems(): if flow !="": try: flow=float(flow.replace(",","."))*ft_rates[year] except : print year,reporting,"'%s'"%flow continue # remove 0 values if reporting!="year" and flow!=0.0: reporting = reporting.strip().lower() if reporting in ft_entities: data=[ft_source,flow,"1000000","us dollar",int(year),reporting,"World Federico-Tena",imp_exp,"gen","total_federicotena"] c.execute("INSERT INTO flows (source, flow, unit, currency, year, reporting, partner, export_import, special_general, world_trade_type) VALUES (?,?,?,?,?,?,?,?,?,?)",data) data=["us dollar",int(year),reporting,"us dollar"] c.execute("INSERT OR IGNORE INTO currencies (currency, year, reporting, modified_currency) VALUES (?,?,?,?)",data) else: print "MISSING '%s' in ft entities"%reporting
def tag_artworks(collection, output, keyword_tag_map): ''' tag all of the artworks of a collection using gephi csv Also add name of the author ''' with open(collection, 'r') as fi: with open(output, 'w') as fo: # Init csv reader reader = csvkit.DictReader(fi) header = reader.fieldnames # Init new csv with updated header #header += ['tag_ico', 'tag_theme', 'tag_mat'] header.append('tag_thema') header.append('name') writer = csvkit.DictWriter(fo, fieldnames=header) writer.writeheader() id_names_map = get_artist_names( '/home/akira/Documents/Dev/Datasprint/authors.csv', collection) for input_row in reader: # Init dict to write output_row = dict.fromkeys(header) output_row.update(input_row) # Tag thema_words = input_row['themas'].split(', ') tags = get_tags_from_field(thema_words, keyword_tag_map) output_row['tag_thema'] = ', '.join(tags) # Add author's name output_row['name'] = id_names_map[ input_row['Id artists']].replace('"', '') # Write row writer.writerow(output_row)
def run(): from openaid.projects.models import Initiative, Project for i, row in enumerate(csvkit.DictReader(open('initiatives_full.csv')), start=1): initiative, created = Initiative.objects.get_or_create( code=row['code'].zfill(6), defaults={ 'title_it': row['title'], 'country': row['country'] if row['country'] != '(vuoto)' else '', 'total_project_costs': row['total'], 'grant_amount_approved': row['grant'], 'loan_amount_approved': row['loan'], }) projects = Project.objects.filter(number__startswith='%s/' % initiative.code).update( initiative=initiative) print '%d] Created %s%s' % (i, repr(initiative), (' associated with %d projects' % projects) if projects else '')
def create_by_acquisitiondate_acquisitionmode(collection): ''' ''' tag_acq_date = defaultdict(lambda: 0) with open(collection, 'r') as f: reader = csvkit.DictReader(f) for row in reader: c_date = row['Year acquisition'] c_tag = row['tag_thema'].split(', ') c_mode = row['Mode acquisition (new categories)'] if not c_date.isdigit() or c_mode == '' or int(c_date) < 1868: pass else: if len(c_tag) > 1: for t in c_tag: tag_acq_date[(t, c_mode, c_date)] += 1 else: tag_acq_date[(c_tag[0], c_mode, c_date)] += 1 # Write output file with open('theme_acqDate_acqMode_date.csv', 'w') as f: writer = csvkit.DictWriter(f, fieldnames=[ 'Theme', 'Year acquisition', 'Mode acquisition (new categories)', 'Weight' ]) writer.writeheader() for k in tag_acq_date: output_row = { 'Theme': k[0], 'Year acquisition': '01/01/%s' % k[2], 'Mode acquisition (new categories)': k[1], 'Weight': tag_acq_date[k] } writer.writerow(output_row)
def handle_label(self, crs_filename, **options): """ Gli argomenti forniti sono i nomi dei file CRS da lavorare """ start_time = time.time() i = 0 if options.get('clean') and not self.delete_projects(): raise CommandError("Import aborted") self.all_codelists = dict([(cl.code_list, dict(cl.objects.values_list('code', 'pk'))) for cl in codelist_models.CODE_LISTS]) rows = projects = activities = 0 try: with open(crs_filename, 'r') as crs_file: for rows, activity in enumerate(csvkit.DictReader(crs_file), start=1): activity, new_project = self.load_activity(activity, rows) if activity: activities += 1 self.stdout.write("\rImported row: %d" % (activities), ending='') self.stdout.flush() if new_project: projects += 1 except KeyboardInterrupt: self.stdout.write("\nCommand execution aborted.") finally: self.stdout.write("\nTotal projects: %d" % projects) self.stdout.write("Total activities: %d" % activities) self.stdout.write("Total rows: %d" % rows) self.stdout.write("Execution time: %d seconds" % (time.time() - start_time))
import csvkit import usaddress # expected format in input.csv: first column 'id', second column 'address' with open('input.csv', 'r') as f: reader = csvkit.DictReader(f) all_rows = [] for row in reader: try: parsed_addr = usaddress.tag(row['address']) row_dict = parsed_addr[0] except: row_dict = {'error': 'True'} row_dict['id'] = row['id'] all_rows.append(row_dict) field_list = [ 'id', 'AddressNumber', 'AddressNumberPrefix', 'AddressNumberSuffix', 'BuildingName', 'CornerOf', 'IntersectionSeparator', 'LandmarkName', 'NotAddress', 'OccupancyType', 'OccupancyIdentifier', 'PlaceName', 'Recipient', 'StateName', 'StreetName', 'StreetNamePreDirectional', 'StreetNamePreModifier', 'StreetNamePreType', 'StreetNamePostDirectional', 'StreetNamePostModifier', 'StreetNamePostType', 'SubaddressIdentifier', 'SubaddressType', 'USPSBoxGroupID', 'USPSBoxGroupType', 'USPSBoxID', 'USPSBoxType', 'ZipCode', 'error' ] with open('output.csv', 'wb') as outfile: writer = csvkit.DictWriter(outfile, field_list)
import csvkit with open('../../csv_data/sources.csv', 'r') as sf: sources = csvkit.DictReader(sf) with open('new_sources.csv', 'r') as nsf: new_sources = csvkit.DictReader(nsf) sourcesSlugs = set(s['slug'] for s in sources) newSourcesSlugs = set(s['slug'] for s in new_sources) inSourceNotInNew = sourcesSlugs - newSourcesSlugs inNewNotInSource = newSourcesSlugs - sourcesSlugs with open('sourceTroubles.csv', 'w') as of: sourceTroubles = csvkit.DictWriter(of, ['source', 'set']) sourceTroublesData = [{ 'source': s, 'set': 'inSourceNotInNew' } for s in inSourceNotInNew] sourceTroublesData += [{ 'source': s, 'set': 'inNewNotInSource' } for s in inNewNotInSource] sourceTroublesData = sorted(sourceTroublesData, key=lambda e: e['source']) sourceTroubles.writeheader() sourceTroubles.writerows(sourceTroublesData)
def update_crsids(filename): for row in csvkit.DictReader(open(filename)): activity_id = row['openaid_id'] new_crsid = row['CRSID-OK'] initiative_number = row['Initiative number'] # clean number if len(initiative_number.split(' ')) > 0: initiative_number = initiative_number.split(' ')[0] if len(initiative_number) > 0: initiative_number = initiative_number.zfill(6) project_number = '/'.join([initiative_number, row['projectnumber']]) updates_markers = False try: activity = Activity.objects.get(pk=activity_id) except Activity.DoesNotExist: print '- Impossibile trovare Activity.pk = %s' % activity_id continue try: new_project = Project.objects.get( crsid=new_crsid, recipient__code=activity.recipient.code) try: conclict_activity = new_project.activity_set.get( year=activity.year) if conclict_activity == activity: continue _, updates_markers = conclict_activity.merge(activity, save=False) activity, conclict_activity = conclict_activity, activity print '- Cancello %s dopo il merge in %s' % ( repr(conclict_activity), repr(activity)) conclict_activity.delete() except Activity.DoesNotExist: pass except Project.DoesNotExist: new_project = Project.objects.create( crsid=new_crsid, recipient=activity.recipient, start_year=activity.year, end_year=activity.year, number=project_number, ) print( '- Nuovo progetto per Activity %s non trovato con newCRSID:%s' % (repr(activity), new_crsid)) finally: activity.crsid = new_crsid activity.project = new_project activity.number = project_number if updates_markers: activity.markers.save() if project_number: activity.number = project_number new_project.number = project_number try: initiative = Initiative.objects.get( code=new_project.number.split('/')[0]) new_project.initiative = initiative except Initiative.DoesNotExist: print '- Nessuna Initiative trovata con codice: %s' % ( project_number) activity.save() new_project.update_from_activities(save=True) #print '- %s aggiornata' % repr(activity) # cancello tutti i progetti senza Activity qs = Project.objects.annotate(activities=Count('activity')).filter( activities=0) print 'Cancello %s Project senza Activity' % (qs.count(), ) qs.delete()
# cursor to mysql conn = sqlite3.connect("../../sqlite_data/RICardo.sqlite") c = conn.cursor() # nb_flows by source slug c.execute( """ SELECT source,count(*) as nb_flows FROM flows group by source UNION SELECT source, count(*) as nb_flows from exchange_rates group by source""" ) nb_flows_by_sources = dict(r for r in c) slugs = {} # open source_types with open('source_types.csv', 'r') as stfile: # create source_types index source_types = csvkit.DictReader(stfile) source_types = dict((st['acronym'], st) for st in source_types) # open sources with open('sources.csv', 'r') as sfile: sources = list(csvkit.DictReader(sfile)) # join sources and source_types for s in sources: # keep fields for field in FIELDSTOCOPY: s[field] = source_types[s['acronym']][field] # filter out fields for field in FIELDSTODISCARD: del (s[field]) # isolate author_editor if s['author'] == s['author_editor']: s['author'] = None
import os import csvkit import collections COUNTIES_PATH = os.path.join( os.path.dirname(__file__), 'data', '2017_Gaz_counties_national.txt' ) COUNTIES_LIST = csvkit.DictReader(open(COUNTIES_PATH, 'r'), delimiter="\t", encoding="latin-1") COUNTIES_DICT = collections.defaultdict(dict) for row in COUNTIES_LIST: COUNTIES_DICT[row['USPS']][row['NAME']] = row['GEOID'] def county(usps, name): return COUNTIES_DICT[usps][name]
nonLetters = re.compile(r'\W', re.UNICODE) def slugify(source): slug = lambda s: ''.join( [re.sub(nonLetters, '', w).capitalize() for w in s.split(' ')]) fields = [ 'author', 'name', 'country', 'volume_date', 'volume_number', 'pages' ] return '_'.join( slug(source[f]) for f in fields if source[f] and slug(source[f])) # read 'new_sources.csv' with open('new_sources.csv', 'r') as f: new_sources = list(csvkit.DictReader(f)) swapSources = {} toDeleteSourcesSlugs = [] # refaire tourner les slugs sources = [] for source in new_sources: source['new_slug'] = slugify(source) # create swap source slug dictionnary to update flow and currency later based on to be removed column swapSources[source['slug']] = slugify(source) # remove uneeded lines if source['put x to remove'] == '': sources.append(source)
"pays", "value", "quantit", "origine", "total", "quantity_unit", "leurvaleursubtotal_1", "leurvaleursubtotal_2", "leurvaleursubtotal_3", "prix_unitaire", "probleme", "remarks" ] headers = [] for (dirpath, dirnames, filenames) in os.walk(directory): if not sum(dirpath == os.path.join(directory, b) for b in black_list): for csv_file_name in filenames: ext = csv_file_name.split( ".")[-1] if "." in csv_file_name else None if ext == "csv": print "%s in %s" % (csv_file_name, dirpath) with open(os.path.join(dirpath, csv_file_name), "r") as source_file: r = csvkit.DictReader(source_file) headers += r.fieldnames lines = list(r) sources_aggregation += lines sources_aggregation = sorted( sources_aggregation, key=lambda e: (e["sourcetype"], e["year"], e["direction"] if "direction" in e else "", e["exportsimports"] if "exportsimports" in e else "", e["numrodeligne"] if ("numrodeligne" in e and e["numrodeligne"]) else "", e[ "marchandises"], e["pays"] if "pays" in e else "")) # Cleaning sources for row in sources_aggregation: for k in row: row[k] = clean(row[k])
import os import json import requests import bs4 import csvkit base_url = 'http://egg2.wustl.edu/roadmap/data/byFileType/peaks/consolidated/narrowPeak/' # extract T-cell states metapath = 'external_static/metadata/epigenome_roadmap/chromatin_state_samples_meta.csv' assert os.path.isfile(metapath) states = [] with open(metapath) as f: for x in csvkit.DictReader(f): if x['group'] == 'Blood & T-cell': states.append(x['eid']) entries = [] html = bs4.BeautifulSoup(requests.get(base_url).content, 'html5lib') for x in html.find_all('a'): if x.attrs['href'].endswith('narrowPeak.gz'): filename = x.attrs['href'] eid = filename.split('-')[0] if eid in states: entries.append( dict(url=requests.compat.urljoin(base_url, filename), filepath=filename)) print(json.dumps(entries, sort_keys=True, indent=4))
# See if the zip database table already exists. cursor.execute( "SELECT 1 FROM sqlite_master WHERE type='table' AND name='zip'") exists = cursor.fetchone() # If the database table doesn't exist, create it. if exists is None: cursor.execute( "CREATE TABLE zip(zipcode TEXT PRIMARY KEY NOT NULL, " + "zone TEXT, temperatures TEXT, city TEXT, state TEXT, latitude INTEGER, longitude INTEGER)" ) db.commit() # Import the CSV file into the database with open('zipcodes.csv', 'rb') as zips: dr = csvkit.DictReader(zips) to_db = [(i['zipcode'], i['city'], i['state'], i['latitude'], i['longitude']) for i in dr] cursor.executemany( "INSERT INTO zip (zipcode, city, state, latitude, longitude) VALUES (?, ?, ?, ?, ?);", to_db) db.commit() # Now load our climate data. zone_files = [1, 2, 3, 4] for zone_file in zone_files: with open(str(zone_file) + '.csv', 'rb') as zips: dr = csvkit.DictReader(zips) to_db = [(i['zone'], i['trange'], i['zipcode']) for i in dr] cursor.executemany( "UPDATE zip SET zone=?, temperatures=? WHERE zipcode=?;", to_db)