databaseDirectory = "../Database/" numCountiesExpected = 88 myZipFiles = glob.glob(zipDirectory + '*.zip') myZipFiles.sort() if len(myZipFiles) == numCountiesExpected: print numCountiesExpected, "counties found - OK!" else: print numCountiesExpected, "counties expected" print len(myZipFiles), "counties found" numErrors = len(glob.glob(zipDirectory + '*.ERROR')) if numErrors > 0: print str(numErrors) + " error file(s) found" if not MyFunctions.query_yes_no("Proceed?"): print "Exiting due to user request: incorrect # of counties found!" sys.exit(0) fields = [] # Scan and see if files are consistent errors = [] for onefile in myZipFiles: # separate the filename filenameparts = onefile.split('/') # grab the actual filename filename = filenameparts[-1]
import shutil import time import subprocess import zipfile import MyFunctions start_time = time.time() ZIPFILEDIRECTORY = "../ZIP/" errors = [] # list of lists, each sublist defined as [filename, error message to display] countiesfile = 'counties.txt' # text file containing list of counties to download (one per line) if os.listdir(ZIPFILEDIRECTORY): if not MyFunctions.query_yes_no("Directory " + ZIPFILEDIRECTORY + "is not empty. Proceed? \n(Will fill missing files, will not overwrite) -->", "no"): print "\nOk - exiting at user request" sys.exit(0) # open counties file f = open(countiesfile, 'r') # get the whole shebang counties = f.readlines() # standardize each line (remove trailing spaces, make all uppercase) counties = [x.strip() for x in counties] counties = [x.upper() for x in counties] # cycle through counties in the list
# print " -- ", nameMatch['id'], nameMatch['last'], nameMatch['first'], nameMatch['lat'], nameMatch['long'] if row['lat'] == nameMatch['lat'] and row['long'] == nameMatch['long']: matches += 1 print "MATCH FOUND! -- ", matches, nameMatch['first'], nameMatch['last'] insertmatchsql = "insert into libertarians_national (national_id, libertarian_id) values ('" + nameMatch['id'] + "', '" + row['SOS_VOTERID'] + "')" # print insertmatchsql cur.execute(insertmatchsql) db.commit() print "Number Processed: ", numberProcessed print "Matches: ", matches db.close() if MyFunctions.query_yes_no("Copy libertarians_national table into default database for later use?"): src_conn = MyFunctions.open_db(dbfile) dest_conn = MyFunctions.open_db(defaultDBfile) MyFunctions.copy_table('libertarians_national',src_conn,dest_conn) end_time = time.time() print("\n<--- Execution time: %s seconds --->" % (end_time - start_time))
cur = db.cursor() cur.execute("PRAGMA table_info('national')") pragma = cur.fetchall() # extract db field names from national dbfields = [str(x[1]) for x in pragma] if dbfields == fields: print "DB matches file format" else: print "DB does not match file format" print len(dbfields), "fields in database" print len(fields), "fields in files" if MyFunctions.query_yes_no("Replace db schema with file schema?", "no"): print "\nOk...." print "Dropping table: national" db.execute('''DROP TABLE IF EXISTS national''') db.commit() print "Recreating table: national with new file format" sql = "CREATE TABLE national (" fields_added = 0 for afield in fields: if fields_added == 0: fieldname = "" else: fieldname = ", " print fieldname if afield == "id": optionText = " PRIMARY KEY"
print rowsProcessed, ": ", r.status_code, ": ", insertSQL if rowsProcessed % 100 == 0: db.commit() # Commit every 100 requests except: print "Uh oh! Something went wrong!" print r.url print r.json() db.commit() db.close() raise db.commit() # Do final commit db.close() if MyFunctions.query_yes_no("Copy geolocations into default database for later use?"): src_conn = MyFunctions.open_db(dbfile) dest_conn = MyFunctions.open_db(defaultDBfile) MyFunctions.copy_table('locations',src_conn,dest_conn) end_time = time.time() print("\n<--- Execution time: %s seconds --->" % (end_time - start_time))