def setUp(self): self.removeFile("test.db") self.removeFile("test.csv") self.removeFile("test2.db") self.removeFile("test2.csv") # create a really basic dataset self.createFile(file="test.db") self.s = SQLite.SQLite(db="test.db", tbl="test") self.createFile("test2.db") s = SQLite.SQLite("test2.db", tbl="test") self.s.attach(s)
def __init__(self): self.sqlite = SQLite.SQLite() self.connection = self.sqlite.create_connection(self.sqlite.database) self.degree = self.map_degree() self.octave = self.map_octave() self.type = self.map_type()
def __init__(self, filepath, dbnames, graphml='', begin=1975, end=2010, increment=3): """ takes a filepath string and a list of dbnames if graphml files already exist, take the list of files and read into graph list as graph objects ex: import DVN D = DVN.DVN(filepath='/home/ayu/DVN/', dbnames=['patent', 'invpat', 'citation', 'class'], graphml = ['pat_2000.graphml', 'pat_2003.graphml']) D.summary() D.create_csv_file() """ self.filepath = filepath self.data = {} self.graphs = {} self.begin = begin self.end = end self.increment = increment for dbname in dbnames: self.data[dbname] = SQLite.SQLite(filepath + dbname + '.sqlite3', dbname) if graphml: i = 0 for year in range(self.begin, self.end, self.increment): self.graphs[year] = igraph.Graph.Read_GraphML(filepath + graphml[i]) i = i + 1
def __init__(self, link): self.sqlite = SQLite.SQLite() self.connection = self.sqlite.create_connection(self.sqlite.database) graph = self.retrieve_dataset(link) nodes = self.get_number_of_nodes(graph) in_degree_id = self.get_in_degree(graph) out_degree_id = self.get_out_degree(graph) metrics = (nodes, in_degree_id, out_degree_id) self.metrics_id = self.sqlite.create_metrics(self.connection, metrics) cursor = self.connection.cursor() cursor.execute("SELECT rowid FROM graph WHERE uri = ?", (link, )) data = cursor.fetchone() if data is None: graph_data = (link, self.metrics_id) self.graph_id = self.sqlite.create_graph(self.connection, graph_data) else: graph_data = (link, self.metrics_id, data[0]) self.sqlite.update_graph(self.connection, graph_data) self.graph_id = data[0] self.connection.commit()
def __init__(self, match="patent"): self.match = match files = { 'patent': ['/home/ron/disambig/sqlite/invpat.sqlite3', 'invpat'] } self.s = SQLite.SQLite(db=files[self.match][0], tbl=files[self.match][1])
def test___init__(self): s = SQLite.SQLite() self.assertEqual("main", s.tbl) self.assertEqual(":memory:", s.path) self.assertEqual("test.db", self.s.path) self.assertEqual("test", self.s.tbl) self.assertFalse(self.s.output)
def __init__(self, db=None, table=None): import SQLite self.table = (table == None and "invpat" or table) self.sql = SQLite.SQLite( db=(db == None and "/home/ron/disambig/sqlite/invpat.sqlite3" or db), tbl=self.table) self.sql.open()
def handle_patent(): p = SQLite.SQLite(db='patent.sqlite3', tbl='patent') p.conn.create_function('dVert', 1, dateVert) p.c.execute( """update patent set AppDate=dVert(AppDate), GDate=dVert(GDate);""") p.commit() p.close() print "DONE: Patent Date!", "\n -", datetime.datetime.now() - t1
def __init__(self, db="asg2.sqlite3", fld="Assignee", uqKey="AsgNum", other="NCity, NState, NCountry,", table="Assignee_2"): self.fld = fld self.uqKey = uqKey self.other = other self.table = table self.s = SQLite(db)
def fetch(self): tbl = self.tbl tbl2 = self.tbl2 db = self.db query = self.query category = self.category maxconn = self.maxconn #FIRST PATENT if category=="grant": base = "patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm&r=0&f=S&l=50&d=PTXT" params = urllib.urlencode({"p":1, "Query":query}) url = "http://{base}&{params}".format(base=base, params=params) firstkey = "{query}||{page}".format(query=query, page=1) PyCurl.PyCurl([[firstkey, url]], maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M") self.s = SQLite.SQLite(db=db, tbl=tbl) #SUBSEQUENT PATENT html = self.grab() pats = int(re.findall("<B>Results of Search.*?</B>: ([0-9]+) patents", html)[0]) pages = int(math.ceil(float(pats)/50)) print "Query: {query}\n - Patents: {pats}, Pages: {pages}".format(query=query, pats=pats, pages=pages) urls = [] Srch1 = re.findall('<INPUT TYPE="HIDDEN" NAME="Srch1" VALUE="(.*?)">', html)[0] for num in range(2, pages+1): params = urllib.urlencode({"Srch1":Srch1, "NextList{num}".format(num=num):"N"}) urls.append(["{query}||{page}".format(query=query, page=num), "http://{base}&{params}".format(base=base, params=params)]) if len(urls)>0: pc = PyCurl.PyCurl(urls, maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M") if pc.new or True: #BUILD PATENT LIST self.s.chgTbl(tbl2) self.s.c.execute("CREATE TABLE IF NOT EXISTS {tbl} (query TEXT, Patent VARCHAR(8), Title TEXT, UNIQUE(query, Patent))".format(tbl=tbl2)) self.s.index(["Patent"]) patUrl = [] for num in range(0, pages): html = self.grab(page=num+1) base = re.findall("<TABLE><TR><TD>.*?</TABLE>", html, re.S)[0] href = re.findall("<A HREF=.*?>(.*?)</A>", base, re.S) pats = [] for i in range(0, len(href), 2): pat = [query, re.sub(",", "", href[i]), re.sub(" +", " ", re.sub("\n", "", href[i+1])).strip()] pats.append(pat) patUrl.append([pat[1], "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=1&f=G&l=50&d=PTXT&p=1&p=1&S1={patent}.PN.".format(patent=pat[1])]) self.s.c.executemany("INSERT OR IGNORE INTO {tbl} VALUES (?, ?, ?)".format(tbl=tbl2), pats) self.s.conn.commit() if self.patentGrab: PyCurl.PyCurl(patUrl, maxconn=maxconn, SQLdb=db, SQLtbl="patent_search", opt="M", cache=None).new
def __init__(self, db=None, sql=None, table=None): self.table = (table == None and "invpat" or table) if sql == None: import SQLite self.sql = SQLite.SQLite(db=( db == None and "/home/ron/inputdata/Ron/fullset/invpatC.upper.Jan2011.sqlite3" or db), tbl=self.table) self.sql.open() else: self.sql = sql self.sql.chgTbl(table)
def setKey(self, db, table="main"): s = self.s s.open() OrgDct = dict(s.c.execute("SELECT %s, %s2 FROM grp" % (self.fld, self.uqKey)).fetchall()) s.close() t = SQLite(db) def OrgDctIt(x): if x in OrgDct: return OrgDct[x] else: return "" t.conn.create_function("OrgDct", 1, OrgDctIt) t.c.execute("UPDATE %s SET %s=OrgDct(%s)" % (table, self.uqKey, self.fld)) t.conn.commit() t.close()
def merge(self, keys, db=None, tbl="main"): s = self.s s.open() if len(keys[0])<13: keys = ["%s%0.12d" % (x[0], int(x[1:])) for x in keys] k1 = min(keys) for k in keys: s.c.execute("UPDATE grp SET %s2='%s' WHERE %s2='%s'" % (self.uqKey, k1, self.uqKey, k)) s.conn.commit() s.close() if db!=None: t = SQLite(db) for k in keys: t.c.execute("UPDATE %s SET %s='%s' WHERE %s='%s'" % (tbl, self.uqKey, k1, self.uqKey, k)) t.conn.commit() t.close()
def pat_match(self): #Match a query against the Patent data query = self.query category = self.category tbl = "{category}_list".format(category=category) self.t = SQLite.SQLite() self.t.attach(self.db) self.t.c.execute(""" CREATE TABLE main AS SELECT Patent, Title FROM {tbl} WHERE query=? """.format(tbl=tbl), (query,)).fetchall() self.t.index(["Patent"]) self.t.attach("/home/ron/disambig/sqlite/invpat.s3") self.t.c.execute(""" CREATE TABLE invpat AS SELECT a.* FROM db.invpat AS a INNER JOIN main AS b ON a.Patent = b.Patent WHERE a.AppYearStr BETWEEN 1975 and 2001; """)
def handle_inventor(): ## Clean inventor: ascit(Firstname, Lastname, Street) ## Create new table inventor_1 to hold prepped data i = SQLite.SQLite(db='inventor.sqlite3', tbl='inventor_1') i.conn.create_function("ascit", 1, ascit) i.conn.create_function("cc", 3, locFunc.cityctry) i.c.execute('drop table if exists inventor_1') i.replicate(tableTo='inventor_1', table='inventor') i.c.execute('insert or ignore into inventor_1 select * from inventor %s' % (debug and "LIMIT 2500" or "")) i.c.execute(""" UPDATE inventor_1 SET firstname = ascit(firstname), lastname = ascit(lastname), street = ascit(street), City = cc(city, country, 'city'), Country = cc(city, country, 'ctry'); """) i.commit() i.attach('hashTbl.sqlite3') i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'], on=['City', 'State', 'Country'], tableFrom='locMerge', db='db') i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'], on=['City', 'State', 'Country', 'Zipcode'], tableFrom='locMerge', db='db') i.commit() i.close() print "DONE: Inv Locationize!", "\n -", datetime.datetime.now() - t1
def test_constructor_empty(self): s = SQLite.SQLite() assert (s.db == ':memory:') assert (s.tbl == 'main')
def test_constructor_dbname(self): s = SQLite.SQLite(db='foobar.sqlite3') assert (s.db == 'foobar.sqlite3') assert (s.tbl == 'main')
import sys sys.path.append("/home/ron/PythonBase") import SQLite #file = "/media/data/edward/backup/v2/final_r7.sqlite3" file = "/home/ayu/DVN/upper_results/invpat_uc.sqlite3" s = SQLite.SQLite(file, 'invpat') print "Creating upper bound files..." for x in range(1975, 2010, 3): print x # Lower Bound results (currently marked as upr because we only have upr) s.igraph(where='AppYearStr BETWEEN %d AND %d' % (x, x+2), vx="invnum_N").g.save('/home/ayu/DVN/upper_results/pat_%d_uc.graphml' % x) # Upper Bound results # s.igraph(where='AppYearStr BETWEEN %d AND %d' % (x, x), vx="new_invnum_N").g.save('pat_%d_upr.graphml' % x) # Entire US graph for all history s.igraph(where='Country="US"', vx="invnum_N").g.save('/home/ayu/DVN/upper_results/pat_US_uc.graphml') #changed low to upr #s.igraph(where='Country="US"', vx="new_invnum_N").g.save('pat_US_upr.graphml') file = "/home/ayu/DVN/lower_results/invpat_oc.sqlite3" s = SQLite.SQLite(file, 'invpat') print "Creating lower bound files..." for x in range(1975, 2010, 3): print x s.igraph(where='AppYearStr BETWEEN %d AND %d' % (x, x+2), vx="invnum_N").g.save('/home/ayu/DVN/lower_results/pat_%d_oc.graphml' % x) # Entire US graph for all history
def test__dbAdd(self): s = SQLite.SQLite() self.assertEqual(s._dbAdd(), "main") self.assertEqual(s._dbAdd(db="db"), "db.main") self.assertEqual(s._dbAdd(tbl="temp"), "temp") self.assertEqual(s._dbAdd(db="db", tbl="temp"), "db.temp")
t1 = datetime.datetime.now() print "Start", t1 ##### Run B2_LocationMatch.py import B2_LocationMatch print " - Loc Merge", "\n -", datetime.datetime.now() - t1 ## ## ########################### ##### ### #### A S S I G N E E ## ##### ### ## ########################### ## ### Create copy of assignee table, add column for assigneeAsc s = SQLite.SQLite(db='assignee.sqlite3', tbl='assignee_1') s.conn.create_function("ascit", 1, ascit) s.conn.create_function("cc", 3, locFunc.cityctry) s.attach(database='NBER_asg') s.c.execute("DROP TABLE IF EXISTS assignee_1") s.replicate(tableTo='assignee_1', table='assignee') #s.addSQL(data='assignee', insert="IGNORE") s.c.execute("INSERT INTO assignee_1 SELECT * FROM assignee %s" % (debug and "LIMIT 2500" or "")) s.add('assigneeAsc', 'VARCHAR(30)') s.c.execute("UPDATE assignee_1 SET assigneeAsc = ascit(assignee);") s.commit() print "DONE: assignee_1 table created in assignee.sqlite3 with new column assigneeAsc", "\n -", datetime.datetime.now( ) - t1 s.merge(key=[['AsgNum', 'pdpass']], on=[['assigneeAsc', 'assignee']],
def search(self, params, path, orgNum=100, orgEach=5): import datetime, SQLite, senAdd, os, random, json, types import memcache, zipfile self.table = params['table'] def infoSearch(var, typ="str", blank=False): if blank: if var in params: return params[var] else: return None else: ret = var in params and ((typ=="int") and (int)(params[var]) or params[var]) or None return (ret!="" and ret!=None) and ret or None cd = infoSearch('CD', 'int') state = infoSearch('State') if state==None: return "" if cd==0 or cd=='0': cd=None mc = memcache.Client(['127.0.0.1:11211'], debug=0) mckey = "geoPut_"+"|".join([type(params[var])==types.ListType and ",".join(params[var]) or params[var] for var in params]) #mc.flush_all() #mc.delete(mckey) if len(mckey)<=250: jsonout = mc.get(mckey) else: jsonout = None if jsonout==None or len(jsonout)<100: output = '%s/sqlite/%s%s-%s.s3' % ("/".join(path.split("/")[:-1]), self.table, state, cd!=None and cd or "") s = SQLite.SQLite(output) #s.c.execute("DROP TABLE %sGeo" % self.table) if not(s.tables("%sGeo" % self.table)): tbl = self.table def tableIt(table): cols = self.m.columns(output=False, table=table) s.c.execute("CREATE TABLE IF NOT EXISTS %s (%s)" % (table, ", ".join([x for x in cols]))) return cols tableIt('%sGeo' % tbl) if cd==0 or cd==None: query = "SELECT * FROM %sGeo WHERE Level=1 AND State=%%s" % tbl self.m.c.execute(query, (state,)) else: query = "SELECT * FROM %sGeo WHERE CD=%%s AND State=%%s" % tbl self.m.c.execute(query, (cd, state)) data = self.m.c.fetchall() s.addSQL(data=data, table='%sGeo' % tbl) #s.index(["IDNum"], table='%sGeo' % tbl) s.index(["Year"], table='%sGeo' % tbl) s.index(["Org"], table='%sGeo' % tbl) s.index(["Lat", "Lng"], table='%sGeo' % tbl) #---------------# # Assign colors # #---------------# column = s.columns(output=False, table="%sGeo" % tbl) data = zip(*data) colors = [x[1] for x in senAdd.colors] colX = [] for w in [[[x, y] for y in colors if x!=y] for x in colors]: colX.extend(w) lbl = senAdd.flatten([x.split("/") for x in data[column.index("Label")] if x is not None]) lbl = sorted([[x, lbl.count(x)] for x in set(lbl)], key=lambda x:x[1], reverse=True) org = s.c.execute("SELECT Org, count(*) as cnt FROM %sGeo GROUP BY Org HAVING Org!='' ORDER BY cnt DESC" % tbl).fetchall() def setColors(lst, rand=True): col = rand and random.sample(colX, len(colX)) or colX def hexIt(): return hex(int(256*random.random()))[-2:].replace("x", "0") def colHex(): heX = "#%s%s%s" % (hexIt(), hexIt(), hexIt()) return [senAdd.colGrad("#FFFFFF", heX, 35), heX] lstDct = dict([[x[0], (i<len(col)) and col[i] or colHex()] for i,x in enumerate(lst)]) lstDct[""] = ["#ffffff", "#cccccc"] return lstDct lblDct = setColors(lbl) orgDct = setColors(org) s.c.execute("CREATE TABLE IF NOT EXISTS %sColors (Key, Color1, Color2, Type, UNIQUE(Key, Type));" % tbl) s.c.executemany("INSERT OR IGNORE INTO %sColors VALUES (?, ?, ?, ?)" % tbl, [[x[0], x[1][0], x[1][1], 'Label'] for x in lblDct.items()]) s.c.executemany("INSERT OR IGNORE INTO %sColors VALUES (?, ?, ?, ?)" % tbl, [[x[0], x[1][0], x[1][1], 'Org'] for x in orgDct.items()]) s.index(['Key', 'Type'], table="%sColors" % tbl) #OTHER PARAMETERS GET FILTERED HERE year = infoSearch('year') amt = infoSearch('amt') agency = infoSearch('agency') org = infoSearch('Org[]', blank=True) label = infoSearch('Label[]', blank=True) if type(org)!=types.ListType and org!=None: org = [org] if type(label)!=types.ListType and label!=None: label = [label] where = [] if year!=None: if len(str(year).split("-"))>1: where.append("Year BETWEEN %s AND %s" % tuple(year.split("-"))) else: where.append("Year=%s" % year) if amt!=None: if len(str(amt).split("-"))>1: where.append("Amount BETWEEN %s AND %s" % tuple([float(x)*1000000.0 for x in amt.split("-")])) else: where.append("Amount=%s" % (float(amt)*1000000.0)) #patent specific if agency!=None and agency!="": where.append("'/'||Agency||'/' LIKE '%%/%s/%%'" % agency) degree = infoSearch('degree') if self.table=="pat" and degree!=None: where.append("degree <= %s" % degree.split("-")[-1]) if org!=None: where.append("Org in ('%s')" % "', '".join(org)) if label!=None: labelStr = [] for x in label: labelStr.append("'/'||Label||'/' LIKE '%%/%s/%%'" % x) where.append("(%s)" % (" OR ".join(labelStr))) where = " AND ".join(where) if where!="": where = "WHERE %s" % where #Don't need colors now #cols = dict([["%s|%s" % (x[0], x[3]), [x[1], x[2]]] for x in s.c.execute("SELECT * FROM %sColors" % self.table).fetchall()]) #orgs = dict([x[:2] for x in s.c.execute("SELECT Org, OrgName, count(*) as cnt FROM %sGeo GROUP BY Org ORDER BY cnt DESC LIMIT %d" % (self.table, orgNum)).fetchall()]) orgs = dict([x for x in s.c.execute("SELECT Org, OrgName as cnt FROM %sGeo GROUP BY Org" % (self.table)).fetchall()]) if self.table=="grant": column = ['City', 'Org', 'Label', 'Agency', 'Lng', 'Lat', 'cnt', 'amt'] data = s.c.execute("SELECT City, GROUP_CONCAT(Org,'/'), GROUP_CONCAT(Label,'/'), GROUP_CONCAT(Agency,'/'), Lng, Lat, count(*) AS cnt, sum(Amount) AS amt FROM %sGeo %s GROUP BY Agency, Lat,Lng" % (self.table, where)).fetchall() else: column = ['City', 'Org', 'Label', 'Agency', 'Lng', 'Lat', 'cnt'] data = s.c.execute("SELECT City, GROUP_CONCAT(Org,'/'), GROUP_CONCAT(Label,'/'), GROUP_CONCAT(Agency,'/'), Lng, Lat, count(*) AS cnt FROM %sGeo %s GROUP BY Lat,Lng" % (self.table, where)).fetchall() data = zip(*data) def splitCount(lists): retLst = [] for lst in lists: if lst==None: retLst.append("") elif len(str(lst).split("/"))==1: retLst.append(lst) else: lst = lst.split("/") lst = sorted([[x, lst.count(x)] for x in set(lst)], key=lambda x:x[1], reverse=True)[:orgEach] lst = "/".join(["~".join([x[0], str(x[1])]) for x in lst]) retLst.append(lst) return retLst marks = {} for i,x in enumerate(data): marks[column[i]] = splitCount(x) #Determine relevant Colors col = {} for x in set(data[column.index("Org")]): if x!=None: for y in x.split("/"): k = y.split("~")[0] key = "%s|%s" % (k, "Org") if k in orgs and key not in col: #col[key] = [cols[key], orgs[k]] ##THIS IS COLOR col[key] = [[], orgs[k]] ##COLOR NOT NEEDED NOW #WE ONLY NEED TOP ONES.. LIKE TOP100 clsCnt = {} for x in set(data[column.index("Label")]): if x!=None: for y in x.split("/"): k = y.split("~")[0] if k not in clsCnt: clsCnt[k] = 0 clsCnt[k] = clsCnt[k] + 1 #key = "%s|%s" % (k, "Label") #col[key] = [cols[key], k in lblTxt[self.table] and lblTxt[self.table][k] or k] ##THIS IS COLOR #clsCnt = sorted(clsCnt.items(), reverse=True, key=lambda x:x[1])[:orgNum] clsCnt = sorted(clsCnt.items(), reverse=True, key=lambda x:x[1]) for x in clsCnt: k = x[0] key = "%s|%s" % (k, "Label") col[key] = [[], k in lblTxt[lblTbl[self.table]] and lblTxt[lblTbl[self.table]][k] or k] ######################### s.close() def cleanJSON(string): import re ## #H000123 > H123 ## string = re.sub("([A-Z])([0]+)([0-9]+)", lambda x:x.group(1)+x.group(3), string) #1.599999 > 1.6 string = re.sub("([0-9]+[.][0-9]+?)([0-8])[9]{4,}[0-9]+", lambda x:x.group(1)+str(int(x.group(2))+1), string) #1.100001 > 1.1 string = re.sub("([0-9]+[.][0-9]+?)[0]{4,}[0-9]+", lambda x:x.group(1), string) string = re.sub(" +", " ", string) return string jsonout = cleanJSON(json.dumps({'marks':marks, 'colors':col})) mc.set(mckey, jsonout) return jsonout
def __init__(self, urls, maxconn=20, SQLdb=":memory:", SQLtbl="main", cache=604800, opt="F"): #cache = how long do we cache the results, in seconds? default = 7 day #cache = None, cache forever #opt = S(low), M(edium), F(ast). Faster we go: more potential errors opt = { pycurl.FOLLOWLOCATION: 1, pycurl.CONNECTTIMEOUT: { "S": 120, "M": 60, "F": 30 }[opt], pycurl.TIMEOUT: { "S": 300, "M": 120, "F": 45 }[opt], pycurl.NOSIGNAL: 1 } #idea type: [[key, url] ... ] if type(urls) == types.StringType: urls = [[0, urls]] elif type(urls) == types.ListType: if type(urls[0]) != types.ListType: urls = [[i, x] for i, x in enumerate(urls)] self.maxconn = maxconn self.opt = opt self.list = [] self.SQLtbl = SQLtbl self.sql = SQLite.SQLite(SQLdb, tbl=SQLtbl) self.sql.c.execute( "CREATE TABLE IF NOT EXISTS {tbl} (key, url, html TEXT, created REAL)" .format(tbl=SQLtbl)) self.sql.index(["key"], unique=True) self.sql.index(["key", "created"]) self.sql.index(["url"]) okUrl = [] cTime = time.time() for x in urls: if self.sql.c.execute( "SELECT count(*) FROM {tbl} WHERE key=?".format( tbl=SQLtbl), (x[0], )).fetchone()[0] == 0: okUrl.append(x) elif cache != None: if self.sql.c.execute( "SELECT count(*) FROM {tbl} WHERE key=? and created>?". format(tbl=SQLtbl), (x[0], cTime - cache)).fetchone()[0] == 0: okUrl.append(x) if len(okUrl) > 0: self.__init_curl(okUrl, maxconn) self.new = True else: self.new = False
#This file is meant to separate the patent related datasets by Year import sys, datetime, os sys.path.append("/home/ron/PythonBase") import SQLite import senAdd yr = int(sys.argv[1]) #min year src = sys.argv[2] #source directory direc = '/home/ron/disambig/sqlite/backup' print "Generating patent{yr}".format(yr=yr) s = SQLite.SQLite(db='{direc}/patent{yr}'.format(direc=direc, yr=yr)) s.optimize() for file in [ x for x in os.listdir(src) if x.split(".")[1] == "sqlite3" and x.split(".")[0] != "hashTbl" ]: print file s.attach('{src}/{file}'.format(src=src, file=file)) table = file.split(".")[0] s.replicate(tableTo=table, table=table, db="db") s.addSQL(db="db", data=table, table=table) s.close()
#This file is meant to separate the patent related datasets by Year import sys, datetime sys.path.append("/home/ron/PythonBase") import SQLite import senAdd direc = '/home/ron/disambig/sqlite/backup' table = 'patent' yrRng = range(1975, 2010) s = SQLite.SQLite(db='%s/patent2009' % direc) tables = s.tables() s.close() for table in tables: print "Generating", table s = SQLite.SQLite(db='%s/%s.sqlite3' % (direc, table), tbl=table) s.optimize() for yr in yrRng: s.attach('%s/patent%d' % (direc, yr)) #Only add if table exists if s.tables(db='db', lookup=table): s.addSQL(data=table, db='db', insert='IGNORE') print " -", yr, datetime.datetime.now() s.count() s.close()
def graph(self, vertex_list=None, where=None, flag=[], output=":memory:"): import datetime, SQLite, os oldfile = os.path.isfile(output) and True or False s = SQLite.SQLite(output, tbl="G0") if not (oldfile): if vertex_list != None: if type(vertex_list) in (types.ListType, types.TupleType): vx = [(x[0], ) for x in vertex_list] else: vx = [(x, ) for x in vertex_list] self.c.execute( "CREATE TEMPORARY TABLE gmerge (Invnum_N VARCHAR(255), Unique(Invnum_N));" ) self.c.executemany("INSERT IGNORE INTO gmerge VALUES (%s)", vx) self.c.execute(""" CREATE TEMPORARY TABLE G0 AS SELECT a.* FROM %s AS a INNER JOIN gmerge AS b ON a.Invnum_N=b.Invnum_N %s; """ % (self.table, (where != None) and "WHERE %s" % where or "")) #flag gets created here... elif where == None: self.c.execute( """ CREATE TEMPORARY TABLE G0 AS SELECT a.* FROM %s AS a INNER JOIN gmerge AS b ON a.Invnum_N=b.Invnum_N WHERE %s;""" % self.table, where) # CREATE SQLite Data self.c.execute("DESCRIBE invpat") s.conn.create_function("flag", 1, lambda x: x in flag and "x" or "") s.c.execute("CREATE TABLE G0 (%s)" % ", ".join([" ".join(x[:2]) for x in self.c.fetchall()])) self.c.execute("SELECT * FROM G0") s.addSQL(data=self.c.fetchall()) s.add("flag", "") s.c.execute("UPDATE G0 SET flag=flag(Invnum_N);") #how do we incorporate new fields? s.c.executescript(""" DROP TABLE IF EXISTS vx0; DROP TABLE IF EXISTS ed0; CREATE INDEX IF NOT EXISTS G0_AY ON G0 (AppYear); CREATE INDEX IF NOT EXISTS G_id ON G0 (Patent); CREATE INDEX IF NOT EXISTS G_ed ON G0 (Invnum_N, Patent); CREATE TABLE vx0 AS SELECT Invnum_N AS id, count(*) AS cnt, *, GROUP_CONCAT(Class) AS Classes FROM G0 GROUP BY Invnum_N ORDER BY AppYear; CREATE INDEX IF NOT EXISTS vx_id ON vx0 (Invnum_N); CREATE TABLE ed0 AS SELECT a.Invnum_N AS h, b.Invnum_N AS t, a.AppYear AS AppYear, a.Patent AS Patent, a.Class AS Class FROM G0 AS a INNER JOIN G0 AS b ON a.Patent=b.Patent AND a.Invnum_N<b.Invnum_N; """) self.tab = senTab() self.tab.vList = s.c.execute("SELECT * FROM vx0").fetchall() self.tab.vlst = s.columns(table="vx0", output=False)[1:] self.tab.eList = s.c.execute("SELECT * FROM ed0").fetchall() self.tab.elst = s.columns(table="ed0", output=False)[2:] s.close()
line = f.readline() DayInformationTemp = DayInformation(line) if (DayInformationTemp.RegisterType == "01"): Alldata.append(DayInformationTemp) f.close() print("Load completed") Current_PTOEXE = "" AllDailyStockInfo = [] Alldata.sort(key=Get_PTOEXE) for DayInformationin in Alldata: if Current_PTOEXE != DayInformationin.PTOEXE: InsertStock(AllDailyStockInfo) AllDailyStockInfo.clear() AllDailyStockInfo.append(DayInformationin) Current_PTOEXE = DayInformationin.PTOEXE InsertStock(AllDailyStockInfo) #Change here the name of database file!!! CONST_NAME_DATABASE = 'ActionsInfo.db' sQLite = SQLite(CONST_NAME_DATABASE) sQLite.Open() CreateTables() #Change here the name of TXT bovespa file!!! FileName = "COTAHIST_A2018.TXT" LoadFileCallAddInDB(FileName) sQLite.Close() #input("Press Enter to continue...")
import sys sys.path.append('./lib/') import SQLite import datetime import shutil t1 = datetime.datetime.now() print "Start", t1 ##Create invpat ip = SQLite.SQLite(db='invpat.sqlite3', tbl='invpat') ip.c.execute("DROP TABLE IF EXISTS invpat") ip.c.execute( """CREATE TABLE invpat(Firstname TEXT, Middlename TEXT, Lastname TEXT, Street TEXT, City TEXT, State TEXT, Country TEXT, Zipcode TEXT, Latitude REAL, Longitude REAL, InvSeq INT, Patent TEXT, AppYear TEXT, ApplyYear TEXT, GYear INT, AppDate TEXT, Assignee TEXT, AsgNum INT, Class TEXT, Coauthor TEXT, Invnum TEXT, Invnum_N TEXT, Unique_Record_ID TEXT);""") ##From inventor.sqlite3: Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude, InvSeq ip.attach('inventor.sqlite3') ip.c.execute("""INSERT INTO invpat ( Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude,
def test_constructor_dbname_table(self): s = SQLite.SQLite(db='foobar.sqlite3', table='table_foo') assert (s.db == 'foobar.sqlite3') assert (s.tbl == 'table_foo')
def test_index(self): s = SQLite.SQLite('test.sqlite3') create_assignee_schema(s.c) initialize_assignees(s.conn) assert (1 == 1)
#unique subclass-combiation counter import sys, datetime sys.path.append("/home/ron/PythonBase") import SQLite import senAdd s = SQLite.SQLite(db='../sqlite/class_count.sqlite3', tbl='class'); s.conn.create_function("pType", 1, senAdd.patType) s.attach('../sqlite/class.sqlite3', name='cls') s.replicate(table='class', db='cls') s.add('pat_type', 'varchar(1)', table='class') s.add('fullcls', 'text', table='class') s.index(keys=['Patent', 'FullCls'], table="class", unique=True) s.index(keys=['Pat_Type'], table="class", unique=False) s.count() s.c.execute("INSERT OR IGNORE INTO class SELECT *, pType(patent), class||'-'||subclass FROM cls.class") s.commit() s.count() #I don't want to really deal with non utility patents right now, delete them s.c.execute("DELETE FROM class WHERE pat_type!='U'") s.commit() s.count() #First CLASS-SUBCLASS combinations, elimanate these cls = s.c.execute("SELECT min(Patent), FullCls FROM class GROUP BY FullCls").fetchall() s.c.executemany("DELETE FROM class WHERE patent=? and FullCls=?", cls) s.commit() s.count() #Determine all possible pairs for each patent s.c.execute("""