""") create_location_table() f = open(flder+"/"+fname, "r") print f.readline() j = 0 for i,x in enumerate(f): x = x.rstrip('\r\n') rec = x.split("\t") ## if rec[9]=="P" and rec[11]!="": #if Unknown, super small right? ## if rec[9]=="P": #properly adjust country code if rec[12] in cc_iso: rec[12] = cc_iso[rec[12]] rec[22] = uniasc(unicode(rec[22], "latin-1")).upper() rec[23] = uniasc(unicode(rec[23], "latin-1")).upper() rec[24] = uniasc(unicode(rec[24], "latin-1")).upper() rec[25] = uniasc(unicode(rec[25], "latin-1")).upper() rec[25] = rec[25][:-1] #rec.extend([rec[22].upper()[:3], rec[22].upper()[::-1][:4]]) #c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*28), rec) c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*29), rec) if i%100000==0: conn.commit() print i, datetime.datetime.now() j = 0 ## if i==50000: ## break conn.commit()
CREATE INDEX IF NOT EXISTS uidx_zip ON usloc (Zipcode); CREATE INDEX IF NOT EXISTS uidx_cty ON usloc (City); CREATE INDEX IF NOT EXISTS uidx_st ON usloc (State); """) for x in fnameUS: c.executemany("INSERT OR REPLACE INTO usloc VALUES (?,?,?,?,?,?)", [x for x in csv.reader(open("%s/%s" % (flderUS, x), "r"))][1:]) # TYPOS typos = [ [ cityctry(x[0], x[2]), x[1], cityctry(x[0], x[2], ret="ctry"), uniasc(unicode(x[3], "latin-1")).upper(), x[4], x[5] ] #for x in csv.reader(open("Typos\Typos.csv", "rb"))][1:] for x in csv.reader(open("/var/share/patentdata/Typos/typos.csv", "rb")) ][1:] c.executescript(""" DROP TABLE IF EXISTS typos; CREATE TABLE IF NOT EXISTS typos ( City VARCHAR, State VARCHAR, Country VARCHAR, NewCity VARCHAR, NewState VARCHAR, NewCountry VARCHAR, UNIQUE(City, State, Country)); CREATE INDEX IF NOT EXISTS typos_all ON typos (City, State, Country); CREATE INDEX IF NOT EXISTS typos_ctry ON typos (Country); CREATE INDEX IF NOT EXISTS typos_ctate ON typos (City, State); CREATE INDEX IF NOT EXISTS typos_cctry ON typos (City, Country);
create_location_table() f = open(flder+"/"+fname, "r") print f.readline() j = 0 for i,x in enumerate(f): #print x x = x.rstrip('\r\n') rec = x.split("\t") ## if rec[9]=="P" and rec[11]!="": #if Unknown, super small right? ## if rec[9]=="P": #properly adjust country code if rec[12] in cc_iso: rec[12] = cc_iso[rec[12]] rec[22] = uniasc(unicode(rec[22], "latin-1")).upper() rec[23] = uniasc(unicode(rec[23], "latin-1")).upper() rec[24] = uniasc(unicode(rec[24], "latin-1")).upper() rec[25] = uniasc(unicode(rec[25], "latin-1")).upper() rec[25] = rec[25][:-1] #rec.extend([rec[22].upper()[:3], rec[22].upper()[::-1][:4]]) #c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*28), rec) c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*29), rec) if i%100000==0: conn.commit() print i, datetime.datetime.now() j = 0 ## if i==50000: ## break conn.commit()
StateName VARCHAR(10), UNIQUE(Zipcode, City, State)); CREATE INDEX IF NOT EXISTS uidx_all ON usloc (Zipcode, City, State); CREATE INDEX IF NOT EXISTS uidx_zip ON usloc (Zipcode); CREATE INDEX IF NOT EXISTS uidx_cty ON usloc (City); CREATE INDEX IF NOT EXISTS uidx_st ON usloc (State); """) for x in fnameUS: c.executemany("INSERT OR REPLACE INTO usloc VALUES (?,?,?,?,?,?)", [x for x in csv.reader(open("%s/%s" % (flderUS, x), "r"))][1:]) # TYPOS typos = [[cityctry(x[0], x[2]), x[1], cityctry(x[0], x[2], ret="ctry"), uniasc(unicode(x[3], "latin-1")).upper(), x[4], x[5]] #for x in csv.reader(open("Typos\Typos.csv", "rb"))][1:] for x in csv.reader(open("/var/share/patentdata/Typos/typos.csv", "rb"))][1:] c.executescript(""" DROP TABLE IF EXISTS typos; CREATE TABLE IF NOT EXISTS typos ( City VARCHAR, State VARCHAR, Country VARCHAR, NewCity VARCHAR, NewState VARCHAR, NewCountry VARCHAR, UNIQUE(City, State, Country)); CREATE INDEX IF NOT EXISTS typos_all ON typos (City, State, Country); CREATE INDEX IF NOT EXISTS typos_ctry ON typos (Country); CREATE INDEX IF NOT EXISTS typos_ctate ON typos (City, State); CREATE INDEX IF NOT EXISTS typos_cctry ON typos (City, Country); """) c.executemany("INSERT OR REPLACE INTO typos VALUES (?, ?, ?, ?, ?, ?)", typos)