""")
create_location_table()


f = open(flder+"/"+fname, "r")
print f.readline()
j = 0
for i,x in enumerate(f):
    x = x.rstrip('\r\n')
    rec = x.split("\t")
##    if rec[9]=="P" and rec[11]!="": #if Unknown, super small right?
##    if rec[9]=="P":
        #properly adjust country code
    if rec[12] in cc_iso:
        rec[12] = cc_iso[rec[12]]
    rec[22] = uniasc(unicode(rec[22], "latin-1")).upper()
    rec[23] = uniasc(unicode(rec[23], "latin-1")).upper()
    rec[24] = uniasc(unicode(rec[24], "latin-1")).upper()
    rec[25] = uniasc(unicode(rec[25], "latin-1")).upper()
    rec[25] = rec[25][:-1]
    #rec.extend([rec[22].upper()[:3], rec[22].upper()[::-1][:4]])
    #c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*28), rec)
    c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*29), rec)
    if i%100000==0:
        conn.commit()
        print i, datetime.datetime.now()
        j = 0
##    if i==50000:
##        break
conn.commit()
Esempio n. 2
0
    CREATE INDEX IF NOT EXISTS uidx_zip ON usloc (Zipcode);
    CREATE INDEX IF NOT EXISTS uidx_cty ON usloc (City);
    CREATE INDEX IF NOT EXISTS uidx_st  ON usloc (State);
    """)

for x in fnameUS:
    c.executemany("INSERT OR REPLACE INTO usloc VALUES (?,?,?,?,?,?)",
                  [x
                   for x in csv.reader(open("%s/%s" % (flderUS, x), "r"))][1:])

# TYPOS
typos = [
    [
        cityctry(x[0], x[2]), x[1],
        cityctry(x[0], x[2], ret="ctry"),
        uniasc(unicode(x[3], "latin-1")).upper(), x[4], x[5]
    ]
    #for x in csv.reader(open("Typos\Typos.csv", "rb"))][1:]
    for x in csv.reader(open("/var/share/patentdata/Typos/typos.csv", "rb"))
][1:]

c.executescript("""
    DROP TABLE IF EXISTS typos;
    CREATE TABLE IF NOT EXISTS typos (
        City VARCHAR,       State VARCHAR,      Country VARCHAR,
        NewCity VARCHAR,    NewState VARCHAR,   NewCountry VARCHAR,
        UNIQUE(City, State, Country));
    CREATE INDEX IF NOT EXISTS typos_all   ON typos (City, State, Country);
    CREATE INDEX IF NOT EXISTS typos_ctry  ON typos (Country);
    CREATE INDEX IF NOT EXISTS typos_ctate ON typos (City, State);
    CREATE INDEX IF NOT EXISTS typos_cctry ON typos (City, Country);
Esempio n. 3
0
create_location_table()


f = open(flder+"/"+fname, "r")
print f.readline()
j = 0
for i,x in enumerate(f):
    #print x
    x = x.rstrip('\r\n')
    rec = x.split("\t")
##    if rec[9]=="P" and rec[11]!="": #if Unknown, super small right?
##    if rec[9]=="P":
        #properly adjust country code
    if rec[12] in cc_iso:
        rec[12] = cc_iso[rec[12]]
    rec[22] = uniasc(unicode(rec[22], "latin-1")).upper()
    rec[23] = uniasc(unicode(rec[23], "latin-1")).upper()
    rec[24] = uniasc(unicode(rec[24], "latin-1")).upper()
    rec[25] = uniasc(unicode(rec[25], "latin-1")).upper()
    rec[25] = rec[25][:-1]
    #rec.extend([rec[22].upper()[:3], rec[22].upper()[::-1][:4]])
    #c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*28), rec)
    c.execute("INSERT OR REPLACE INTO gnsloc VALUES (%s)" % ",".join(["?"]*29), rec)
    if i%100000==0:
        conn.commit()
        print i, datetime.datetime.now()
        j = 0
##    if i==50000:
##        break
conn.commit()
Esempio n. 4
0
        StateName VARCHAR(10),
        UNIQUE(Zipcode, City, State));
    CREATE INDEX IF NOT EXISTS uidx_all ON usloc (Zipcode, City, State);
    CREATE INDEX IF NOT EXISTS uidx_zip ON usloc (Zipcode);
    CREATE INDEX IF NOT EXISTS uidx_cty ON usloc (City);
    CREATE INDEX IF NOT EXISTS uidx_st  ON usloc (State);
    """)

for x in fnameUS:
    c.executemany("INSERT OR REPLACE INTO usloc VALUES (?,?,?,?,?,?)", 
            [x for x in csv.reader(open("%s/%s" % (flderUS, x), "r"))][1:])


# TYPOS
typos = [[cityctry(x[0], x[2]), x[1], cityctry(x[0], x[2], ret="ctry"),
          uniasc(unicode(x[3], "latin-1")).upper(), x[4], x[5]]
         #for x in csv.reader(open("Typos\Typos.csv", "rb"))][1:]
         for x in csv.reader(open("/var/share/patentdata/Typos/typos.csv", "rb"))][1:]

c.executescript("""
    DROP TABLE IF EXISTS typos;
    CREATE TABLE IF NOT EXISTS typos (
        City VARCHAR,       State VARCHAR,      Country VARCHAR,
        NewCity VARCHAR,    NewState VARCHAR,   NewCountry VARCHAR,
        UNIQUE(City, State, Country));
    CREATE INDEX IF NOT EXISTS typos_all   ON typos (City, State, Country);
    CREATE INDEX IF NOT EXISTS typos_ctry  ON typos (Country);
    CREATE INDEX IF NOT EXISTS typos_ctate ON typos (City, State);
    CREATE INDEX IF NOT EXISTS typos_cctry ON typos (City, Country);
    """)
c.executemany("INSERT OR REPLACE INTO typos VALUES (?, ?, ?, ?, ?, ?)", typos)