def stopwords(): cursor, conn = SQLite.connect_to_databse(database_location) datatable = SQLite.list_all_rows(cursor, 'papers') paperdata = [] for row in datatable: paperdata.append(row[6]) cleandoc = cleanupdocuments(paperdata) id2word = corpora.Dictionary(cleandoc) corpus = [id2word.doc2bow(text) for text in cleandoc] dictionary = corpora.Dictionary(cleandoc) pickle.dump(corpus, open('ALL_corpus.pkl', 'wb')) dictionary.save('ALL_dictionary.gensim') cp_all = [] for i in range (0,len(cleandoc)): for j in range (0,len(cleandoc[i])): cp_all.append(cleandoc[i][j]) cleandoc.insert(0,cp_all) print('done part dos, **thumbs up**') dct = Dictionary.load('ALL_dictionary.gensim') corpus = [dct.doc2bow(line) for line in cleandoc] model = TfidfModel(corpus) vector = model[corpus[0]] print('done part tres, **smiley face**') cp_stop = [] for token_id, token_weight in vector: cp_stop.append((dct.get(token_id),token_weight)) print('done part quatros, yeehaw!') import csv headers = ('word','score') with open('stopwords.csv','w',newline='',encoding='utf-8') as outFile: wtr = csv.writer(outFile) wtr.writerow(headers) wtr.writerows(cp_stop) with open('stopwords.csv', 'r', newline='', encoding='utf-8') as inFile: csvreader = csv.reader(inFile) itr = iter(csvreader) next(itr) # stopwordvalue = [row for row in itr if float(row[1]) > 0.007] stopwordvalue = [row for row in itr if float(row[1]) > float(sys.argv[2])] with open('stopwords.csv','w',newline='',encoding='utf-8') as OutFile: wt = csv.writer(OutFile) wt.writerow(headers) wt.writerows(stopwordvalue) print('STOP WORDS FOUND!!! Stored in stopwords.csv')
def setUp(self): self.removeFile("test.db") self.removeFile("test.csv") self.removeFile("test2.db") self.removeFile("test2.csv") # create a really basic dataset self.createFile(file="test.db") self.s = SQLite.SQLite(db="test.db", tbl="test") self.createFile("test2.db") s = SQLite.SQLite("test2.db", tbl="test") self.s.attach(s)
def setKey(self, db, table="main"): s = self.s s.open() OrgDct = dict(s.c.execute("SELECT %s, %s2 FROM grp" % (self.fld, self.uqKey)).fetchall()) s.close() t = SQLite(db) def OrgDctIt(x): if x in OrgDct: return OrgDct[x] else: return "" t.conn.create_function("OrgDct", 1, OrgDctIt) t.c.execute("UPDATE %s SET %s=OrgDct(%s)" % (table, self.uqKey, self.fld)) t.conn.commit() t.close()
def test___init__(self): s = SQLite.SQLite() self.assertEqual("main", s.tbl) self.assertEqual(":memory:", s.path) self.assertEqual("test.db", self.s.path) self.assertEqual("test", self.s.tbl) self.assertFalse(self.s.output)
def __init__(self, match="patent"): self.match = match files = { 'patent': ['/home/ron/disambig/sqlite/invpat.sqlite3', 'invpat'] } self.s = SQLite.SQLite(db=files[self.match][0], tbl=files[self.match][1])
def __init__(self, link): self.sqlite = SQLite.SQLite() self.connection = self.sqlite.create_connection(self.sqlite.database) graph = self.retrieve_dataset(link) nodes = self.get_number_of_nodes(graph) in_degree_id = self.get_in_degree(graph) out_degree_id = self.get_out_degree(graph) metrics = (nodes, in_degree_id, out_degree_id) self.metrics_id = self.sqlite.create_metrics(self.connection, metrics) cursor = self.connection.cursor() cursor.execute("SELECT rowid FROM graph WHERE uri = ?", (link, )) data = cursor.fetchone() if data is None: graph_data = (link, self.metrics_id) self.graph_id = self.sqlite.create_graph(self.connection, graph_data) else: graph_data = (link, self.metrics_id, data[0]) self.sqlite.update_graph(self.connection, graph_data) self.graph_id = data[0] self.connection.commit()
def __init__(self): self.sqlite = SQLite.SQLite() self.connection = self.sqlite.create_connection(self.sqlite.database) self.degree = self.map_degree() self.octave = self.map_octave() self.type = self.map_type()
def __init__(self, filepath, dbnames, graphml='', begin=1975, end=2010, increment=3): """ takes a filepath string and a list of dbnames if graphml files already exist, take the list of files and read into graph list as graph objects ex: import DVN D = DVN.DVN(filepath='/home/ayu/DVN/', dbnames=['patent', 'invpat', 'citation', 'class'], graphml = ['pat_2000.graphml', 'pat_2003.graphml']) D.summary() D.create_csv_file() """ self.filepath = filepath self.data = {} self.graphs = {} self.begin = begin self.end = end self.increment = increment for dbname in dbnames: self.data[dbname] = SQLite.SQLite(filepath + dbname + '.sqlite3', dbname) if graphml: i = 0 for year in range(self.begin, self.end, self.increment): self.graphs[year] = igraph.Graph.Read_GraphML(filepath + graphml[i]) i = i + 1
def __init__(self, db=None, table=None): import SQLite self.table = (table == None and "invpat" or table) self.sql = SQLite.SQLite( db=(db == None and "/home/ron/disambig/sqlite/invpat.sqlite3" or db), tbl=self.table) self.sql.open()
def handle_patent(): p = SQLite.SQLite(db='patent.sqlite3', tbl='patent') p.conn.create_function('dVert', 1, dateVert) p.c.execute( """update patent set AppDate=dVert(AppDate), GDate=dVert(GDate);""") p.commit() p.close() print "DONE: Patent Date!", "\n -", datetime.datetime.now() - t1
def merge(self, keys, db=None, tbl="main"): s = self.s s.open() if len(keys[0])<13: keys = ["%s%0.12d" % (x[0], int(x[1:])) for x in keys] k1 = min(keys) for k in keys: s.c.execute("UPDATE grp SET %s2='%s' WHERE %s2='%s'" % (self.uqKey, k1, self.uqKey, k)) s.conn.commit() s.close() if db!=None: t = SQLite(db) for k in keys: t.c.execute("UPDATE %s SET %s='%s' WHERE %s='%s'" % (tbl, self.uqKey, k1, self.uqKey, k)) t.conn.commit() t.close()
def ParseTime(celsStr): global DBtimer global DBinputTime global currentTime currentTime = datetime.now() if currentTime >= DBinputTime: SQLite.InsertData(celsStr) currentTime = datetime.now() DBinputTime = currentTime + timedelta(seconds=DBtimer)
def __init__(self, db="asg2.sqlite3", fld="Assignee", uqKey="AsgNum", other="NCity, NState, NCountry,", table="Assignee_2"): self.fld = fld self.uqKey = uqKey self.other = other self.table = table self.s = SQLite(db)
def getsec(): dr={} for i in range(gim): dr[i]=SQLite.calc(i) dr=U.sortDictV(dr) sr='';n=0 for k,v in dr: n+=1 sr+=sechtml.format(n,getpath(k),('No.%s '%(k+1))+getdes(k),v) return sr
def fetch(self): tbl = self.tbl tbl2 = self.tbl2 db = self.db query = self.query category = self.category maxconn = self.maxconn #FIRST PATENT if category=="grant": base = "patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm&r=0&f=S&l=50&d=PTXT" params = urllib.urlencode({"p":1, "Query":query}) url = "http://{base}&{params}".format(base=base, params=params) firstkey = "{query}||{page}".format(query=query, page=1) PyCurl.PyCurl([[firstkey, url]], maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M") self.s = SQLite.SQLite(db=db, tbl=tbl) #SUBSEQUENT PATENT html = self.grab() pats = int(re.findall("<B>Results of Search.*?</B>: ([0-9]+) patents", html)[0]) pages = int(math.ceil(float(pats)/50)) print "Query: {query}\n - Patents: {pats}, Pages: {pages}".format(query=query, pats=pats, pages=pages) urls = [] Srch1 = re.findall('<INPUT TYPE="HIDDEN" NAME="Srch1" VALUE="(.*?)">', html)[0] for num in range(2, pages+1): params = urllib.urlencode({"Srch1":Srch1, "NextList{num}".format(num=num):"N"}) urls.append(["{query}||{page}".format(query=query, page=num), "http://{base}&{params}".format(base=base, params=params)]) if len(urls)>0: pc = PyCurl.PyCurl(urls, maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M") if pc.new or True: #BUILD PATENT LIST self.s.chgTbl(tbl2) self.s.c.execute("CREATE TABLE IF NOT EXISTS {tbl} (query TEXT, Patent VARCHAR(8), Title TEXT, UNIQUE(query, Patent))".format(tbl=tbl2)) self.s.index(["Patent"]) patUrl = [] for num in range(0, pages): html = self.grab(page=num+1) base = re.findall("<TABLE><TR><TD>.*?</TABLE>", html, re.S)[0] href = re.findall("<A HREF=.*?>(.*?)</A>", base, re.S) pats = [] for i in range(0, len(href), 2): pat = [query, re.sub(",", "", href[i]), re.sub(" +", " ", re.sub("\n", "", href[i+1])).strip()] pats.append(pat) patUrl.append([pat[1], "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=1&f=G&l=50&d=PTXT&p=1&p=1&S1={patent}.PN.".format(patent=pat[1])]) self.s.c.executemany("INSERT OR IGNORE INTO {tbl} VALUES (?, ?, ?)".format(tbl=tbl2), pats) self.s.conn.commit() if self.patentGrab: PyCurl.PyCurl(patUrl, maxconn=maxconn, SQLdb=db, SQLtbl="patent_search", opt="M", cache=None).new
def __init__(self, db=None, sql=None, table=None): self.table = (table == None and "invpat" or table) if sql == None: import SQLite self.sql = SQLite.SQLite(db=( db == None and "/home/ron/inputdata/Ron/fullset/invpatC.upper.Jan2011.sqlite3" or db), tbl=self.table) self.sql.open() else: self.sql = sql self.sql.chgTbl(table)
def vote(id='Error!',ip=''): ip=ip[0] if(SQLite.ip(ip)): ip='<br><br><br>Warning: IP %s HAVE ALREADY VOTED'%ip else: ip='' id=id[0] sid=int(id)+1;sid=str(sid) #### Vote logic#### SQLite.vote(id) #################### sv=U.read(basedir+'/vote.html') try: sv=sv.replace('{id}',sid) sv=sv.replace('{img}',getpath(id)) sv=sv.replace('{des}',getdes(id)+ip) except Exception as e:sv=e return { "content":sv, "code":235, "Cache-Control":"no-cache" }
def pat_match(self): #Match a query against the Patent data query = self.query category = self.category tbl = "{category}_list".format(category=category) self.t = SQLite.SQLite() self.t.attach(self.db) self.t.c.execute(""" CREATE TABLE main AS SELECT Patent, Title FROM {tbl} WHERE query=? """.format(tbl=tbl), (query,)).fetchall() self.t.index(["Patent"]) self.t.attach("/home/ron/disambig/sqlite/invpat.s3") self.t.c.execute(""" CREATE TABLE invpat AS SELECT a.* FROM db.invpat AS a INNER JOIN main AS b ON a.Patent = b.Patent WHERE a.AppYearStr BETWEEN 1975 and 2001; """)
def handle_inventor(): ## Clean inventor: ascit(Firstname, Lastname, Street) ## Create new table inventor_1 to hold prepped data i = SQLite.SQLite(db='inventor.sqlite3', tbl='inventor_1') i.conn.create_function("ascit", 1, ascit) i.conn.create_function("cc", 3, locFunc.cityctry) i.c.execute('drop table if exists inventor_1') i.replicate(tableTo='inventor_1', table='inventor') i.c.execute('insert or ignore into inventor_1 select * from inventor %s' % (debug and "LIMIT 2500" or "")) i.c.execute(""" UPDATE inventor_1 SET firstname = ascit(firstname), lastname = ascit(lastname), street = ascit(street), City = cc(city, country, 'city'), Country = cc(city, country, 'ctry'); """) i.commit() i.attach('hashTbl.sqlite3') i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'], on=['City', 'State', 'Country'], tableFrom='locMerge', db='db') i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'], on=['City', 'State', 'Country', 'Zipcode'], tableFrom='locMerge', db='db') i.commit() i.close() print "DONE: Inv Locationize!", "\n -", datetime.datetime.now() - t1
def test__dbAdd(self): s = SQLite.SQLite() self.assertEqual(s._dbAdd(), "main") self.assertEqual(s._dbAdd(db="db"), "db.main") self.assertEqual(s._dbAdd(tbl="temp"), "temp") self.assertEqual(s._dbAdd(db="db", tbl="temp"), "db.temp")
def test_constructor_empty(self): s = SQLite.SQLite() assert (s.db == ':memory:') assert (s.tbl == 'main')
def Process(self, items = None): items = items or self.items self.logger.info("OGR2OGR processing", items) #### OVERRIDE OGR2OGR if both source and destination are the same SQLite file sourceformat = items.get('sourceformat') datasource = items.get('datasource') destination = items.get('destination') name = items.get('name') sql = items.get('sql') sqlprocessing = (items.get('sqlprocessing', 'OGR')).upper() if datasource == destination and sourceformat == 'SQLite' and sql and (sqlprocessing=="SQLITE"): message = "# Source and Destination are the same SQLite file:\n" message += "# will use internal SQLite module instead of OGR\n" message += "# SQLite file: %s\n" % datasource message += "# Output table: %s\n" % name message += "# SQL: \n%s" % sql self.logger.info("#" * 60 + "\n" + message) sqlite = SQLite() conn = sqlite.connect(datasource) cursor = conn.cursor() cursor.execute('DROP TABLE IF EXISTS "%s"' % name) cursor.execute('CREATE TABLE "%s" AS %s' % (name, sql)) self.logger.info("Done.") # ogr2ogr creates a directory for CSV output # We don't like that. That's why we have ogr2ogr # create a temp dir, move all files out if it after # it is done and then remove the temp dir elif items.get('format') == 'CSV': # Change the destination dir to the tmpDestDir # (which is a subdir of the destDir) destDir = os.path.dirname(items['destination']) tmpDestDir = destDir + "/tmp" + str(os.getpid()) items['destination'] = tmpDestDir command = self.CreateCommand(items) self.ExecuteCommand(command) # Move all files from tmpDestDir to destDir # and remove the tmpDestDir afterwards for file in glob.glob(tmpDestDir + '/*'): # Get File name from path fileName = os.path.basename(file) # Remove any pre-existing files in the # destDir first destFile = destDir + '/' + fileName if os.path.isfile(destFile): os.remove(destFile) # Move file from tmpDir to destDir shutil.move(file, destDir) self.logger.info("\n# Moved '%s' to '%s'" % (file, destDir)) # Remove tmpDestDir (should be empty now) os.rmdir(tmpDestDir) else: # When outputting to tab files, files can not be overwritten easily # We delete them manually if the overwrite option has been specified if items.get('format') == 'MapInfo File' and items.get('overwrite',True): self.logger.info("\n# Overwrite option is on: will delete existing files with same name (if they exist)\n") (filepath, filename) = os.path.split(items['destinationstore']) (shortname, extension) = os.path.splitext(filename) #We go throught the possible extensions for MapInfo files that compose a layer and delete the corresponding files if they exists for file in (glob.glob(os.path.join(filepath,shortname)+ext) for ext in ('.tab','.id','.map','.dat','.ind')): #shutil.move(file, file+'.bak') if file and os.path.isfile(file[0]): self.logger.info("# Deleting file: '%s' " % (file[0])) os.remove(file[0]) # Basic processing command = self.CreateCommand(items) self.ExecuteCommand(command) self.PostProcess(items)
def cleanUS(self, tbl, dbBase=None, locVar=["City", "State", "Zipcode"], reset=False): #Congressional District + Zipcode if dbBase==None: dbBase = "/home/ron/disambig/geo/CD_ZIP.sqlite3" s = SQLite(db=self.db, tbl=self.tbl) s.attach(dbBase) locStr = ", ".join(locVar) locQStr = "=? AND ".join(locVar)+"=?" s.index(locVar) if 'lat1' not in s.columns(output=False) or reset: s.merge(key=[['lat1', 'latitude'], ['lng1', 'longitude'], ['CD1', 'CD'], ['State1', 'State']], on=[[locVar[2], 'Zipcode']], tableFrom='congdistZip', db='db') s.merge(key=[[locVar[2]+"2", 'Zipcode']], on=[[locVar[0], 'City'], [locVar[1], 'State']], tableFrom='USCities', db='db') s.merge(key=[['lat2', 'latitude'], ['lng2', 'longitude'], ['CD2', 'CD'], ['State2', 'State']], on=[[locVar[2]+'2', 'Zipcode']], tableFrom='congdistZip', db='db') s.add('lat', '') s.add('lng', '') s.add('CD', '') s.c.execute("UPDATE %s SET lat='', lng='', CD=''" % tbl) ## HERE ARE MY ASSUMPTIONS TO PUT LNG/LAT INTO LOU'S GRANT FILE -- 944,549 total records ## 1. City, State match is more precise than Zipcode match (sometimes Zip is just wrong..) Use that as default -- (206,369) 21.8% ## 2. If City, State match doesn't happen, then I default to Zipcode match ... small (3,998) 0.4% ## 3. If CD, State match -- use Zipcode centroid. (693,922) 73.5% ## 4. 1-3 not capturing anything BUT city is filled (I did a quick scan, these all basically look foreign, see CSV) (7,217) 0.7% ## 5. Organization labeled as "UNKNOWN" (without City, State) - (30,640) 3.3% ## 6. Blanks ## a) create frequency table of Standized_Organization with Zipcodes. ## b) check if organization exists in database. If so, align it with most frequent Zipcode combo (1,356) 0.1% ## 7. Blank (non 6) (1007) 0.1% ## 8. UNKNOWN, Blank or "Foreign" -- Delete for now, although I have the CSV output saved as blankCode.csv (38,864) ## 9. Remaining records: (905,685) 95.8% if s.c.execute("SELECT count(*) FROM %s WHERE lat='' or lat is null" % tbl).fetchone()[0]>0: #Update everything to reflect 2nd #print datetime.datetime.now() g = s.c.execute("SELECT lat2, lng2, State2, CD2, %s FROM %s GROUP BY %s" % (locStr, tbl, locStr)).fetchall() if len(g)>0: s.c.executemany("UPDATE %s SET lat=?, lng=?, State=?, CD=? WHERE %s" % (tbl, locQStr), g) #If State,CD!= Take Lat1, Lng1 ... I trust the City, State combo more overall (not the Zipcode) #print datetime.datetime.now() g = s.c.execute("SELECT lat1, lng1, State1, CD1, %s FROM %s GROUP BY %s HAVING CD2='' or CD2 is null" % (locStr, tbl, locStr)).fetchall() if len(g)>0: s.c.executemany("UPDATE %s SET lat=?, lng=?, State=?, CD=? WHERE %s" % (tbl, locQStr), g) #If State,CD= Take Lat1, Lng1 #print datetime.datetime.now() g = s.c.execute("SELECT lat1, lng1, State1, CD1, %s FROM %s GROUP BY %s HAVING State1=State" % (locStr, tbl, locStr)).fetchall() if len(g)>0: s.c.executemany("UPDATE %s SET lat=?, lng=?, State=?, CD=? WHERE %s" % (tbl, locQStr), g) s.close()
import matplotlib as plt import numpy as np import time import sys # ============================================================================= # User variables # Location of SQLite database # ============================================================================= database_location = '/home/greenbur/NLP/Python Code/WorkingPapersGOMlg.sqlite' # Path where model will be saved savemodelpath = '/home/greenbur/NLP/Results/GOMlgvec.txt' # Load document data from database # connect to swlite database and load data cursor, conn = SQLite.connect_to_databse(database_location) datatable = SQLite.list_all_rows(cursor, 'papers') # Collect paper text and load to python list paperdata = [] for row in datatable: paperdata.append(row[6]) # Clean text for processing cleandoc = cleanupdocuments(paperdata) print("Documents loaded and ready to process") # This section builds the Word2Vec model and saves the model print("Starting word2vec") # Build Word2Vec model, params adjjusted for future testing
import socket, pickle, SQLite host = '' # Symbolic name meaning all available interfaces port = 12345 # Arbitrary non-privileged port s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((host, port)) s.listen(1) conn, addr = s.accept() print('Connection from: ', addr) while True: in_data = conn.recv(1024) if not in_data: break sql_data = pickle.loads(in_data) print('Adding the Following to the DataBase: ', sql_data) SQLite.add_to_db('server', [sql_data[0], sql_data[1], sql_data[2], sql_data[3]]) SQLite.close_conn() conn.sendall(in_data) conn.close()
def graph(self, vertex_list=None, where=None, flag=[], output=":memory:"): import datetime, SQLite, os oldfile = os.path.isfile(output) and True or False s = SQLite.SQLite(output, tbl="G0") if not (oldfile): if vertex_list != None: if type(vertex_list) in (types.ListType, types.TupleType): vx = [(x[0], ) for x in vertex_list] else: vx = [(x, ) for x in vertex_list] self.c.execute( "CREATE TEMPORARY TABLE gmerge (Invnum_N VARCHAR(255), Unique(Invnum_N));" ) self.c.executemany("INSERT IGNORE INTO gmerge VALUES (%s)", vx) self.c.execute(""" CREATE TEMPORARY TABLE G0 AS SELECT a.* FROM %s AS a INNER JOIN gmerge AS b ON a.Invnum_N=b.Invnum_N %s; """ % (self.table, (where != None) and "WHERE %s" % where or "")) #flag gets created here... elif where == None: self.c.execute( """ CREATE TEMPORARY TABLE G0 AS SELECT a.* FROM %s AS a INNER JOIN gmerge AS b ON a.Invnum_N=b.Invnum_N WHERE %s;""" % self.table, where) # CREATE SQLite Data self.c.execute("DESCRIBE invpat") s.conn.create_function("flag", 1, lambda x: x in flag and "x" or "") s.c.execute("CREATE TABLE G0 (%s)" % ", ".join([" ".join(x[:2]) for x in self.c.fetchall()])) self.c.execute("SELECT * FROM G0") s.addSQL(data=self.c.fetchall()) s.add("flag", "") s.c.execute("UPDATE G0 SET flag=flag(Invnum_N);") #how do we incorporate new fields? s.c.executescript(""" DROP TABLE IF EXISTS vx0; DROP TABLE IF EXISTS ed0; CREATE INDEX IF NOT EXISTS G0_AY ON G0 (AppYear); CREATE INDEX IF NOT EXISTS G_id ON G0 (Patent); CREATE INDEX IF NOT EXISTS G_ed ON G0 (Invnum_N, Patent); CREATE TABLE vx0 AS SELECT Invnum_N AS id, count(*) AS cnt, *, GROUP_CONCAT(Class) AS Classes FROM G0 GROUP BY Invnum_N ORDER BY AppYear; CREATE INDEX IF NOT EXISTS vx_id ON vx0 (Invnum_N); CREATE TABLE ed0 AS SELECT a.Invnum_N AS h, b.Invnum_N AS t, a.AppYear AS AppYear, a.Patent AS Patent, a.Class AS Class FROM G0 AS a INNER JOIN G0 AS b ON a.Patent=b.Patent AND a.Invnum_N<b.Invnum_N; """) self.tab = senTab() self.tab.vList = s.c.execute("SELECT * FROM vx0").fetchall() self.tab.vlst = s.columns(table="vx0", output=False)[1:] self.tab.eList = s.c.execute("SELECT * FROM ed0").fetchall() self.tab.elst = s.columns(table="ed0", output=False)[2:] s.close()
#This file is meant to separate the patent related datasets by Year import sys, datetime, os sys.path.append("/home/ron/PythonBase") import SQLite import senAdd yr = int(sys.argv[1]) #min year src = sys.argv[2] #source directory direc = '/home/ron/disambig/sqlite/backup' print "Generating patent{yr}".format(yr=yr) s = SQLite.SQLite(db='{direc}/patent{yr}'.format(direc=direc, yr=yr)) s.optimize() for file in [ x for x in os.listdir(src) if x.split(".")[1] == "sqlite3" and x.split(".")[0] != "hashTbl" ]: print file s.attach('{src}/{file}'.format(src=src, file=file)) table = file.split(".")[0] s.replicate(tableTo=table, table=table, db="db") s.addSQL(db="db", data=table, table=table) s.close()
def test_constructor_dbname(self): s = SQLite.SQLite(db='foobar.sqlite3') assert (s.db == 'foobar.sqlite3') assert (s.tbl == 'main')
def test_index(self): s = SQLite.SQLite('test.sqlite3') create_assignee_schema(s.c) initialize_assignees(s.conn) assert (1 == 1)
line = f.readline() DayInformationTemp = DayInformation(line) if (DayInformationTemp.RegisterType == "01"): Alldata.append(DayInformationTemp) f.close() print("Load completed") Current_PTOEXE = "" AllDailyStockInfo = [] Alldata.sort(key=Get_PTOEXE) for DayInformationin in Alldata: if Current_PTOEXE != DayInformationin.PTOEXE: InsertStock(AllDailyStockInfo) AllDailyStockInfo.clear() AllDailyStockInfo.append(DayInformationin) Current_PTOEXE = DayInformationin.PTOEXE InsertStock(AllDailyStockInfo) #Change here the name of database file!!! CONST_NAME_DATABASE = 'ActionsInfo.db' sQLite = SQLite(CONST_NAME_DATABASE) sQLite.Open() CreateTables() #Change here the name of TXT bovespa file!!! FileName = "COTAHIST_A2018.TXT" LoadFileCallAddInDB(FileName) sQLite.Close() #input("Press Enter to continue...")
def test_constructor_dbname_table(self): s = SQLite.SQLite(db='foobar.sqlite3', table='table_foo') assert (s.db == 'foobar.sqlite3') assert (s.tbl == 'table_foo')
#unique subclass-combiation counter import sys, datetime sys.path.append("/home/ron/PythonBase") import SQLite import senAdd s = SQLite.SQLite(db='../sqlite/class_count.sqlite3', tbl='class'); s.conn.create_function("pType", 1, senAdd.patType) s.attach('../sqlite/class.sqlite3', name='cls') s.replicate(table='class', db='cls') s.add('pat_type', 'varchar(1)', table='class') s.add('fullcls', 'text', table='class') s.index(keys=['Patent', 'FullCls'], table="class", unique=True) s.index(keys=['Pat_Type'], table="class", unique=False) s.count() s.c.execute("INSERT OR IGNORE INTO class SELECT *, pType(patent), class||'-'||subclass FROM cls.class") s.commit() s.count() #I don't want to really deal with non utility patents right now, delete them s.c.execute("DELETE FROM class WHERE pat_type!='U'") s.commit() s.count() #First CLASS-SUBCLASS combinations, elimanate these cls = s.c.execute("SELECT min(Patent), FullCls FROM class GROUP BY FullCls").fetchall() s.c.executemany("DELETE FROM class WHERE patent=? and FullCls=?", cls) s.commit() s.count() #Determine all possible pairs for each patent s.c.execute("""
"sid_meta2_name TEXT," "sid_meta3_name TEXT," "contains_crosslink BOOLEAN)" ) # add meta columns try: cur.execute('ALTER TABLE spectrum_identifications ADD COLUMN meta1 TEXT') cur.execute('ALTER TABLE spectrum_identifications ADD COLUMN meta2 TEXT') cur.execute('ALTER TABLE spectrum_identifications ADD COLUMN meta3 TEXT') except Exception: print('{}: Meta columns exist already - not updated'.format(db_name)) try: # add precursor information from peak list file to DB cur.execute('ALTER TABLE spectra ADD COLUMN precursor_mz TEXT') cur.execute('ALTER TABLE spectra ADD COLUMN precursor_charge TEXT') except Exception: print('{}: spectrum precursor columns exist already - not updated'.format(db_name)) con.commit() return True import glob for db_name in glob.glob("./dbs/saved/*.db"): con = SQLite.connect(db_name) update_database(con, db_name)
import sys sys.path.append('./lib/') import SQLite import datetime import shutil t1 = datetime.datetime.now() print "Start", t1 ##Create invpat ip = SQLite.SQLite(db='invpat.sqlite3', tbl='invpat') ip.c.execute("DROP TABLE IF EXISTS invpat") ip.c.execute( """CREATE TABLE invpat(Firstname TEXT, Middlename TEXT, Lastname TEXT, Street TEXT, City TEXT, State TEXT, Country TEXT, Zipcode TEXT, Latitude REAL, Longitude REAL, InvSeq INT, Patent TEXT, AppYear TEXT, ApplyYear TEXT, GYear INT, AppDate TEXT, Assignee TEXT, AsgNum INT, Class TEXT, Coauthor TEXT, Invnum TEXT, Invnum_N TEXT, Unique_Record_ID TEXT);""") ##From inventor.sqlite3: Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude, InvSeq ip.attach('inventor.sqlite3') ip.c.execute("""INSERT INTO invpat ( Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude,