Example #1
0
def stopwords():
    cursor, conn = SQLite.connect_to_databse(database_location)
    datatable = SQLite.list_all_rows(cursor, 'papers')
    paperdata = []
    for row in datatable:
        paperdata.append(row[6])

    cleandoc = cleanupdocuments(paperdata)
    id2word = corpora.Dictionary(cleandoc)
    corpus = [id2word.doc2bow(text) for text in cleandoc]
    dictionary = corpora.Dictionary(cleandoc)

    pickle.dump(corpus, open('ALL_corpus.pkl', 'wb'))
    dictionary.save('ALL_dictionary.gensim')

    cp_all = []
    for i in range (0,len(cleandoc)):
        for j in range (0,len(cleandoc[i])):
            cp_all.append(cleandoc[i][j])

    cleandoc.insert(0,cp_all)
    print('done part dos, **thumbs up**')

    dct = Dictionary.load('ALL_dictionary.gensim')
    corpus = [dct.doc2bow(line) for line in cleandoc]
    model = TfidfModel(corpus)
    vector = model[corpus[0]]
    print('done part tres, **smiley face**')

    cp_stop = []
    for token_id, token_weight in vector:
        cp_stop.append((dct.get(token_id),token_weight))
    print('done part quatros, yeehaw!')

    import csv
    headers = ('word','score')

    with open('stopwords.csv','w',newline='',encoding='utf-8') as outFile:
        wtr = csv.writer(outFile)
        wtr.writerow(headers)
        wtr.writerows(cp_stop)

    with open('stopwords.csv', 'r', newline='', encoding='utf-8') as inFile:
        csvreader = csv.reader(inFile)
        itr = iter(csvreader)
        next(itr)
        # stopwordvalue = [row for row in itr if float(row[1]) > 0.007]
        stopwordvalue = [row for row in itr if float(row[1]) > float(sys.argv[2])]

    with open('stopwords.csv','w',newline='',encoding='utf-8') as OutFile:
        wt = csv.writer(OutFile)
        wt.writerow(headers)
        wt.writerows(stopwordvalue)

    print('STOP WORDS FOUND!!! Stored in stopwords.csv')
 def setUp(self):
     self.removeFile("test.db")
     self.removeFile("test.csv")
     self.removeFile("test2.db")
     self.removeFile("test2.csv")
     # create a really basic dataset
     self.createFile(file="test.db")
     self.s = SQLite.SQLite(db="test.db", tbl="test")
     self.createFile("test2.db")
     s = SQLite.SQLite("test2.db", tbl="test")
     self.s.attach(s)
Example #3
0
 def setKey(self, db, table="main"):
     s = self.s
     s.open()
     OrgDct = dict(s.c.execute("SELECT %s, %s2 FROM grp" % (self.fld, self.uqKey)).fetchall())
     s.close()
     t = SQLite(db)
     def OrgDctIt(x):
         if x in OrgDct:
             return OrgDct[x]
         else:
             return ""
     t.conn.create_function("OrgDct", 1, OrgDctIt)
     t.c.execute("UPDATE %s SET %s=OrgDct(%s)" % (table, self.uqKey, self.fld))
     t.conn.commit()
     t.close()
 def test___init__(self):
     s = SQLite.SQLite()
     self.assertEqual("main", s.tbl)
     self.assertEqual(":memory:", s.path)
     self.assertEqual("test.db", self.s.path)
     self.assertEqual("test", self.s.tbl)
     self.assertFalse(self.s.output)
Example #5
0
 def __init__(self, match="patent"):
     self.match = match
     files = {
         'patent': ['/home/ron/disambig/sqlite/invpat.sqlite3', 'invpat']
     }
     self.s = SQLite.SQLite(db=files[self.match][0],
                            tbl=files[self.match][1])
Example #6
0
    def __init__(self, link):
        self.sqlite = SQLite.SQLite()
        self.connection = self.sqlite.create_connection(self.sqlite.database)

        graph = self.retrieve_dataset(link)
        nodes = self.get_number_of_nodes(graph)
        in_degree_id = self.get_in_degree(graph)
        out_degree_id = self.get_out_degree(graph)

        metrics = (nodes, in_degree_id, out_degree_id)
        self.metrics_id = self.sqlite.create_metrics(self.connection, metrics)

        cursor = self.connection.cursor()
        cursor.execute("SELECT rowid FROM graph WHERE uri = ?", (link, ))
        data = cursor.fetchone()

        if data is None:
            graph_data = (link, self.metrics_id)
            self.graph_id = self.sqlite.create_graph(self.connection,
                                                     graph_data)
        else:
            graph_data = (link, self.metrics_id, data[0])
            self.sqlite.update_graph(self.connection, graph_data)
            self.graph_id = data[0]

        self.connection.commit()
Example #7
0
    def __init__(self):
        self.sqlite = SQLite.SQLite()
        self.connection = self.sqlite.create_connection(self.sqlite.database)

        self.degree = self.map_degree()
        self.octave = self.map_octave()
        self.type = self.map_type()
Example #8
0
    def __init__(self,
                 filepath,
                 dbnames,
                 graphml='',
                 begin=1975,
                 end=2010,
                 increment=3):
        """
        takes a filepath string and a list of dbnames
        if graphml files already exist, take the list of files and read into graph list as graph objects

        ex:
        import DVN
        D = DVN.DVN(filepath='/home/ayu/DVN/', dbnames=['patent', 'invpat', 'citation', 'class'], graphml = ['pat_2000.graphml', 'pat_2003.graphml'])
        D.summary()
        D.create_csv_file()
        """
        self.filepath = filepath
        self.data = {}
        self.graphs = {}
        self.begin = begin
        self.end = end
        self.increment = increment
        for dbname in dbnames:
            self.data[dbname] = SQLite.SQLite(filepath + dbname + '.sqlite3',
                                              dbname)
        if graphml:
            i = 0
            for year in range(self.begin, self.end, self.increment):
                self.graphs[year] = igraph.Graph.Read_GraphML(filepath +
                                                              graphml[i])
                i = i + 1
Example #9
0
 def __init__(self, db=None, table=None):
     import SQLite
     self.table = (table == None and "invpat" or table)
     self.sql = SQLite.SQLite(
         db=(db == None and "/home/ron/disambig/sqlite/invpat.sqlite3"
             or db),
         tbl=self.table)
     self.sql.open()
Example #10
0
def handle_patent():
    p = SQLite.SQLite(db='patent.sqlite3', tbl='patent')
    p.conn.create_function('dVert', 1, dateVert)
    p.c.execute(
        """update patent set AppDate=dVert(AppDate), GDate=dVert(GDate);""")
    p.commit()
    p.close()
    print "DONE: Patent Date!", "\n   -", datetime.datetime.now() - t1
Example #11
0
    def merge(self, keys, db=None, tbl="main"):
        s = self.s
        s.open()
        if len(keys[0])<13:
            keys = ["%s%0.12d" % (x[0], int(x[1:])) for x in keys]

        k1 = min(keys)
        for k in keys:
            s.c.execute("UPDATE grp SET %s2='%s' WHERE %s2='%s'" % (self.uqKey, k1, self.uqKey, k))
        s.conn.commit()
        s.close()
        if db!=None:
            t = SQLite(db)
            for k in keys:
                t.c.execute("UPDATE %s SET %s='%s' WHERE %s='%s'" % (tbl, self.uqKey, k1, self.uqKey, k))
            t.conn.commit()
            t.close()
Example #12
0
def ParseTime(celsStr):
    global DBtimer
    global DBinputTime
    global currentTime
    currentTime = datetime.now()
    if currentTime >= DBinputTime:
        SQLite.InsertData(celsStr)
        currentTime = datetime.now()
        DBinputTime = currentTime + timedelta(seconds=DBtimer)
Example #13
0
    def __init__(self,
                 db="asg2.sqlite3", fld="Assignee", uqKey="AsgNum",
                 other="NCity, NState, NCountry,", table="Assignee_2"):

        self.fld = fld
        self.uqKey = uqKey
        self.other = other
        self.table = table
        self.s = SQLite(db)
Example #14
0
File: PVote.py Project: QGB/PVote
def getsec():
	dr={}
	for i in range(gim):
		dr[i]=SQLite.calc(i)
	dr=U.sortDictV(dr)
	sr='';n=0
	for k,v in dr:
		n+=1
		sr+=sechtml.format(n,getpath(k),('No.%s '%(k+1))+getdes(k),v)
	return sr
Example #15
0
    def fetch(self):
        tbl = self.tbl
        tbl2 = self.tbl2
        db = self.db
        query = self.query
        category = self.category
        maxconn = self.maxconn
        #FIRST PATENT
        if category=="grant":
            base = "patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm&r=0&f=S&l=50&d=PTXT"
            params = urllib.urlencode({"p":1, "Query":query})
        url = "http://{base}&{params}".format(base=base, params=params)
        
        firstkey = "{query}||{page}".format(query=query, page=1)
        PyCurl.PyCurl([[firstkey, url]], maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M")
        self.s = SQLite.SQLite(db=db, tbl=tbl)

        #SUBSEQUENT PATENT
        html = self.grab()
        pats = int(re.findall("<B>Results of Search.*?</B>: ([0-9]+) patents", html)[0])
        pages = int(math.ceil(float(pats)/50))
        print "Query: {query}\n  - Patents: {pats}, Pages: {pages}".format(query=query, pats=pats, pages=pages)

        urls = []
        Srch1 = re.findall('<INPUT TYPE="HIDDEN" NAME="Srch1" VALUE="(.*?)">', html)[0]
        for num in range(2, pages+1):
            params = urllib.urlencode({"Srch1":Srch1, "NextList{num}".format(num=num):"N"})
            urls.append(["{query}||{page}".format(query=query, page=num), "http://{base}&{params}".format(base=base, params=params)])

        if len(urls)>0:
            pc = PyCurl.PyCurl(urls, maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M")

            if pc.new or True:
                #BUILD PATENT LIST
                self.s.chgTbl(tbl2)
                self.s.c.execute("CREATE TABLE IF NOT EXISTS {tbl} (query TEXT, Patent VARCHAR(8), Title TEXT, UNIQUE(query, Patent))".format(tbl=tbl2))
                self.s.index(["Patent"])
                patUrl = []
                for num in range(0, pages):
                    html = self.grab(page=num+1)
                    base = re.findall("<TABLE><TR><TD>.*?</TABLE>", html, re.S)[0]
                    href = re.findall("<A  HREF=.*?>(.*?)</A>", base, re.S)
                    pats = []
                    for i in range(0, len(href), 2):
                         pat = [query, re.sub(",", "", href[i]), re.sub("  +", " ", re.sub("\n", "", href[i+1])).strip()]
                         pats.append(pat)
                         patUrl.append([pat[1], "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=1&f=G&l=50&d=PTXT&p=1&p=1&S1={patent}.PN.".format(patent=pat[1])])
                    self.s.c.executemany("INSERT OR IGNORE INTO {tbl} VALUES (?, ?, ?)".format(tbl=tbl2), pats)
                self.s.conn.commit()

                if self.patentGrab:
                    PyCurl.PyCurl(patUrl, maxconn=maxconn, SQLdb=db, SQLtbl="patent_search", opt="M", cache=None).new
Example #16
0
 def __init__(self, db=None, sql=None, table=None):
     self.table = (table == None and "invpat" or table)
     if sql == None:
         import SQLite
         self.sql = SQLite.SQLite(db=(
             db == None and
             "/home/ron/inputdata/Ron/fullset/invpatC.upper.Jan2011.sqlite3"
             or db),
                                  tbl=self.table)
         self.sql.open()
     else:
         self.sql = sql
         self.sql.chgTbl(table)
Example #17
0
File: PVote.py Project: QGB/PVote
def vote(id='Error!',ip=''):
	ip=ip[0]
	if(SQLite.ip(ip)):
		ip='<br><br><br>Warning: IP %s HAVE ALREADY VOTED'%ip
	else:
		ip=''
	id=id[0]
	sid=int(id)+1;sid=str(sid)
	####  Vote logic####
	SQLite.vote(id)
	####################
	sv=U.read(basedir+'/vote.html')
	try:
		sv=sv.replace('{id}',sid)
		sv=sv.replace('{img}',getpath(id))
		sv=sv.replace('{des}',getdes(id)+ip)
	except Exception as e:sv=e
	
	return {
	"content":sv,
	"code":235,
	"Cache-Control":"no-cache"
	}	
Example #18
0
    def pat_match(self):
    #Match a query against the Patent data
        query = self.query
        category = self.category
        tbl  = "{category}_list".format(category=category)

        self.t = SQLite.SQLite()
        self.t.attach(self.db)
        self.t.c.execute("""
            CREATE TABLE main AS
                SELECT Patent, Title FROM {tbl} WHERE query=?
            """.format(tbl=tbl), (query,)).fetchall()
        self.t.index(["Patent"])
        self.t.attach("/home/ron/disambig/sqlite/invpat.s3")
        self.t.c.execute("""
            CREATE TABLE invpat AS
                SELECT  a.*
                  FROM  db.invpat AS a
            INNER JOIN  main AS b
                    ON  a.Patent = b.Patent
                 WHERE  a.AppYearStr BETWEEN 1975 and 2001;
            """)
Example #19
0
def handle_inventor():

    ## Clean inventor: ascit(Firstname, Lastname, Street)
    ## Create new table inventor_1 to hold prepped data

    i = SQLite.SQLite(db='inventor.sqlite3', tbl='inventor_1')
    i.conn.create_function("ascit", 1, ascit)
    i.conn.create_function("cc", 3, locFunc.cityctry)
    i.c.execute('drop table if exists inventor_1')
    i.replicate(tableTo='inventor_1', table='inventor')
    i.c.execute('insert or ignore into inventor_1 select * from inventor  %s' %
                (debug and "LIMIT 2500" or ""))

    i.c.execute("""
            UPDATE  inventor_1
               SET  firstname = ascit(firstname),
                    lastname  = ascit(lastname),
                    street    = ascit(street),
                    City      = cc(city, country, 'city'),
                    Country   = cc(city, country, 'ctry');
                """)

    i.commit()

    i.attach('hashTbl.sqlite3')
    i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'],
            on=['City', 'State', 'Country'],
            tableFrom='locMerge',
            db='db')

    i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'],
            on=['City', 'State', 'Country', 'Zipcode'],
            tableFrom='locMerge',
            db='db')

    i.commit()
    i.close()
    print "DONE: Inv Locationize!", "\n   -", datetime.datetime.now() - t1
 def test__dbAdd(self):
     s = SQLite.SQLite()
     self.assertEqual(s._dbAdd(), "main")
     self.assertEqual(s._dbAdd(db="db"), "db.main")
     self.assertEqual(s._dbAdd(tbl="temp"), "temp")
     self.assertEqual(s._dbAdd(db="db", tbl="temp"), "db.temp")
 def test_constructor_empty(self):
     s = SQLite.SQLite()
     assert (s.db == ':memory:')
     assert (s.tbl == 'main')
Example #22
0
    def Process(self, items = None):
        items = items or self.items
        self.logger.info("OGR2OGR processing", items)

        #### OVERRIDE OGR2OGR if both source and destination are the same SQLite file

        sourceformat = items.get('sourceformat')
        datasource = items.get('datasource')
        destination = items.get('destination')
        name = items.get('name')
        sql = items.get('sql')
        sqlprocessing = (items.get('sqlprocessing', 'OGR')).upper()

        if datasource == destination and sourceformat == 'SQLite' and sql and (sqlprocessing=="SQLITE"):
            message = "# Source and Destination are the same SQLite file:\n"
            message += "# will use internal SQLite module instead of OGR\n"
            message += "# SQLite file: %s\n" % datasource
            message += "# Output table: %s\n" % name
            message += "# SQL: \n%s" % sql

            self.logger.info("#" * 60 + "\n" + message)

            sqlite = SQLite()
            conn = sqlite.connect(datasource)
            cursor = conn.cursor()
            cursor.execute('DROP TABLE IF EXISTS "%s"' % name)
            cursor.execute('CREATE TABLE "%s" AS %s' % (name, sql))

            self.logger.info("Done.")

        # ogr2ogr creates a directory for CSV output
        # We don't like that. That's why we have ogr2ogr
        # create a temp dir, move all files out if it after
        # it is done and then remove the temp dir
        elif items.get('format') == 'CSV':

            # Change the destination dir to the tmpDestDir
            # (which is a subdir of the destDir)
            destDir = os.path.dirname(items['destination'])
            tmpDestDir = destDir + "/tmp" + str(os.getpid())
            items['destination'] = tmpDestDir

            command = self.CreateCommand(items)
            self.ExecuteCommand(command)

            # Move all files from tmpDestDir to destDir
            # and remove the tmpDestDir afterwards
            for file in glob.glob(tmpDestDir + '/*'):

                # Get File name from path
                fileName = os.path.basename(file)

                # Remove any pre-existing files in the
                # destDir first
                destFile = destDir + '/' + fileName
                if os.path.isfile(destFile):
                    os.remove(destFile)

                # Move file from tmpDir to destDir
                shutil.move(file, destDir)
                self.logger.info("\n# Moved '%s' to '%s'" % (file, destDir))


            # Remove tmpDestDir (should be empty now)
            os.rmdir(tmpDestDir)

        else:
            # When outputting to tab files, files can not be overwritten easily
            # We delete them manually if the overwrite option has been specified
            if items.get('format') == 'MapInfo File' and items.get('overwrite',True):
                self.logger.info("\n# Overwrite option is on: will delete existing files with same name (if they exist)\n")
                (filepath, filename) = os.path.split(items['destinationstore'])
                (shortname, extension) = os.path.splitext(filename)

                #We go throught the possible extensions for MapInfo files that compose a layer and delete the corresponding files if they exists
                for file in (glob.glob(os.path.join(filepath,shortname)+ext) for ext in ('.tab','.id','.map','.dat','.ind')):
                    #shutil.move(file, file+'.bak')
                    if file and os.path.isfile(file[0]):
                        self.logger.info("# Deleting file: '%s' " % (file[0]))
                        os.remove(file[0])

            # Basic processing
            command = self.CreateCommand(items)
            self.ExecuteCommand(command)
            self.PostProcess(items)
Example #23
0
    def cleanUS(self, tbl, dbBase=None, locVar=["City", "State", "Zipcode"], reset=False):
        #Congressional District + Zipcode
        if dbBase==None:
            dbBase = "/home/ron/disambig/geo/CD_ZIP.sqlite3"
        s = SQLite(db=self.db, tbl=self.tbl)
        s.attach(dbBase)

        locStr = ", ".join(locVar)
        locQStr = "=? AND ".join(locVar)+"=?"
        s.index(locVar)

        if 'lat1' not in s.columns(output=False) or reset:
            s.merge(key=[['lat1', 'latitude'], ['lng1', 'longitude'], ['CD1', 'CD'], ['State1', 'State']], on=[[locVar[2], 'Zipcode']], tableFrom='congdistZip', db='db')
            s.merge(key=[[locVar[2]+"2", 'Zipcode']], on=[[locVar[0], 'City'], [locVar[1], 'State']], tableFrom='USCities', db='db')
            s.merge(key=[['lat2', 'latitude'], ['lng2', 'longitude'], ['CD2', 'CD'], ['State2', 'State']], on=[[locVar[2]+'2', 'Zipcode']], tableFrom='congdistZip', db='db')
        
            s.add('lat', '')
            s.add('lng', '')
            s.add('CD', '')
            
            s.c.execute("UPDATE %s SET lat='', lng='', CD=''" % tbl)
            ##    HERE ARE MY ASSUMPTIONS TO PUT LNG/LAT INTO LOU'S GRANT FILE -- 944,549 total records
            ##      1. City, State match is more precise than Zipcode match (sometimes Zip is just wrong..) Use that as default -- (206,369) 21.8%
            ##      2. If City, State match doesn't happen, then I default to Zipcode match ... small (3,998) 0.4%
            ##      3. If CD, State match -- use Zipcode centroid.  (693,922) 73.5%
            ##      4. 1-3 not capturing anything BUT city is filled (I did a quick scan, these all basically look foreign, see CSV) (7,217) 0.7%
            ##      5. Organization labeled as "UNKNOWN" (without City, State) - (30,640) 3.3%
            ##      6. Blanks
            ##         a) create frequency table of Standized_Organization with Zipcodes.
            ##         b) check if organization exists in database.  If so, align it with most frequent Zipcode combo (1,356) 0.1%
            ##      7. Blank (non 6) (1007) 0.1%
            ##      8. UNKNOWN, Blank or "Foreign" -- Delete for now, although I have the CSV output saved as blankCode.csv (38,864) 
            ##      9. Remaining records: (905,685) 95.8%

        if s.c.execute("SELECT count(*) FROM %s WHERE lat='' or lat is null" % tbl).fetchone()[0]>0:
            #Update everything to reflect 2nd
            #print datetime.datetime.now()
            g = s.c.execute("SELECT lat2, lng2, State2, CD2, %s FROM %s GROUP BY %s" % (locStr, tbl, locStr)).fetchall()
            if len(g)>0:
                s.c.executemany("UPDATE %s SET lat=?, lng=?, State=?, CD=? WHERE %s" % (tbl, locQStr), g)
            #If State,CD!= Take Lat1, Lng1 ... I trust the City, State combo more overall (not the Zipcode)
            #print datetime.datetime.now()
            g = s.c.execute("SELECT lat1, lng1, State1, CD1, %s FROM %s GROUP BY %s HAVING CD2='' or CD2 is null" % (locStr, tbl, locStr)).fetchall()
            if len(g)>0:
                s.c.executemany("UPDATE %s SET lat=?, lng=?, State=?, CD=? WHERE %s" % (tbl, locQStr), g)
            #If State,CD= Take Lat1, Lng1
            #print datetime.datetime.now()
            g = s.c.execute("SELECT lat1, lng1, State1, CD1, %s FROM %s GROUP BY %s HAVING State1=State" % (locStr, tbl, locStr)).fetchall()
            if len(g)>0:
                s.c.executemany("UPDATE %s SET lat=?, lng=?, State=?, CD=? WHERE %s" % (tbl, locQStr), g)

        s.close()
Example #24
0
import matplotlib as plt
import numpy as np
import time
import sys
# =============================================================================
# User variables
# Location of SQLite database
# =============================================================================
database_location = '/home/greenbur/NLP/Python Code/WorkingPapersGOMlg.sqlite'

# Path where model will be saved
savemodelpath = '/home/greenbur/NLP/Results/GOMlgvec.txt'

# Load document data from database
# connect to swlite database and load data
cursor, conn = SQLite.connect_to_databse(database_location)
datatable = SQLite.list_all_rows(cursor, 'papers')

# Collect paper text and load to python list
paperdata = []
for row in datatable:
    paperdata.append(row[6])

# Clean text for processing
cleandoc = cleanupdocuments(paperdata)
print("Documents loaded and ready to process")

# This section builds the Word2Vec model and saves the model
print("Starting word2vec")

# Build Word2Vec model, params adjjusted for future testing
Example #25
0
import socket, pickle, SQLite

host = ''        # Symbolic name meaning all available interfaces
port = 12345     # Arbitrary non-privileged port
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((host, port))
s.listen(1)
conn, addr = s.accept()

print('Connection from: ', addr)
while True:
    in_data = conn.recv(1024)
    if not in_data: break
    sql_data = pickle.loads(in_data)
    print('Adding the Following to the DataBase: ', sql_data)
    SQLite.add_to_db('server', [sql_data[0], sql_data[1], sql_data[2], sql_data[3]])
    SQLite.close_conn()
    conn.sendall(in_data)
conn.close()
Example #26
0
    def graph(self, vertex_list=None, where=None, flag=[], output=":memory:"):
        import datetime, SQLite, os

        oldfile = os.path.isfile(output) and True or False
        s = SQLite.SQLite(output, tbl="G0")
        if not (oldfile):
            if vertex_list != None:
                if type(vertex_list) in (types.ListType, types.TupleType):
                    vx = [(x[0], ) for x in vertex_list]
                else:
                    vx = [(x, ) for x in vertex_list]

                self.c.execute(
                    "CREATE TEMPORARY TABLE gmerge (Invnum_N VARCHAR(255), Unique(Invnum_N));"
                )
                self.c.executemany("INSERT IGNORE INTO gmerge VALUES (%s)", vx)
                self.c.execute("""            
                    CREATE TEMPORARY TABLE G0 AS
                        SELECT  a.*
                          FROM  %s AS a
                    INNER JOIN  gmerge AS b
                            ON  a.Invnum_N=b.Invnum_N
                            %s;
                    """ % (self.table,
                           (where != None) and "WHERE %s" % where or ""))
                #flag gets created here...
            elif where == None:
                self.c.execute(
                    """
                    CREATE TEMPORARY TABLE G0 AS
                        SELECT  a.*
                          FROM  %s AS a
                    INNER JOIN  gmerge AS b
                            ON  a.Invnum_N=b.Invnum_N
                         WHERE  %s;""" % self.table, where)

            # CREATE SQLite Data
            self.c.execute("DESCRIBE invpat")
            s.conn.create_function("flag", 1,
                                   lambda x: x in flag and "x" or "")
            s.c.execute("CREATE TABLE G0 (%s)" %
                        ", ".join([" ".join(x[:2])
                                   for x in self.c.fetchall()]))
            self.c.execute("SELECT * FROM G0")
            s.addSQL(data=self.c.fetchall())
            s.add("flag", "")
            s.c.execute("UPDATE G0 SET flag=flag(Invnum_N);")

            #how do we incorporate new fields?
            s.c.executescript("""
                DROP TABLE IF EXISTS vx0;
                DROP TABLE IF EXISTS ed0;
                CREATE INDEX IF NOT EXISTS G0_AY ON G0 (AppYear);
                CREATE INDEX IF NOT EXISTS G_id ON G0 (Patent);
                CREATE INDEX IF NOT EXISTS G_ed ON G0 (Invnum_N, Patent);
                CREATE TABLE vx0 AS
                    SELECT  Invnum_N AS id, count(*) AS cnt, *,
                            GROUP_CONCAT(Class) AS Classes
                      FROM  G0
                  GROUP BY  Invnum_N
                  ORDER BY  AppYear;
                CREATE INDEX IF NOT EXISTS vx_id ON vx0 (Invnum_N);
                CREATE TABLE ed0 AS
                    SELECT  a.Invnum_N AS h, b.Invnum_N AS t, a.AppYear AS AppYear, a.Patent AS Patent, a.Class AS Class
                      FROM  G0 AS a INNER JOIN G0 AS b
                        ON  a.Patent=b.Patent AND a.Invnum_N<b.Invnum_N;
                """)

        self.tab = senTab()
        self.tab.vList = s.c.execute("SELECT * FROM vx0").fetchall()
        self.tab.vlst = s.columns(table="vx0", output=False)[1:]
        self.tab.eList = s.c.execute("SELECT * FROM ed0").fetchall()
        self.tab.elst = s.columns(table="ed0", output=False)[2:]
        s.close()
#This file is meant to separate the patent related datasets by Year

import sys, datetime, os
sys.path.append("/home/ron/PythonBase")
import SQLite
import senAdd

yr = int(sys.argv[1])  #min year
src = sys.argv[2]  #source directory

direc = '/home/ron/disambig/sqlite/backup'

print "Generating patent{yr}".format(yr=yr)
s = SQLite.SQLite(db='{direc}/patent{yr}'.format(direc=direc, yr=yr))
s.optimize()
for file in [
        x for x in os.listdir(src)
        if x.split(".")[1] == "sqlite3" and x.split(".")[0] != "hashTbl"
]:
    print file
    s.attach('{src}/{file}'.format(src=src, file=file))
    table = file.split(".")[0]
    s.replicate(tableTo=table, table=table, db="db")
    s.addSQL(db="db", data=table, table=table)

s.close()
 def test_constructor_dbname(self):
     s = SQLite.SQLite(db='foobar.sqlite3')
     assert (s.db == 'foobar.sqlite3')
     assert (s.tbl == 'main')
 def test_index(self):
     s = SQLite.SQLite('test.sqlite3')
     create_assignee_schema(s.c)
     initialize_assignees(s.conn)
     assert (1 == 1)
Example #30
0
        line = f.readline()
        DayInformationTemp = DayInformation(line)
        if (DayInformationTemp.RegisterType == "01"):
            Alldata.append(DayInformationTemp)

    f.close()
    print("Load completed")
    Current_PTOEXE = ""
    AllDailyStockInfo = []
    Alldata.sort(key=Get_PTOEXE)
    for DayInformationin in Alldata:
        if Current_PTOEXE != DayInformationin.PTOEXE:
            InsertStock(AllDailyStockInfo)
            AllDailyStockInfo.clear()
        AllDailyStockInfo.append(DayInformationin)
        Current_PTOEXE = DayInformationin.PTOEXE
    InsertStock(AllDailyStockInfo)


#Change here the name of database file!!!
CONST_NAME_DATABASE = 'ActionsInfo.db'

sQLite = SQLite(CONST_NAME_DATABASE)
sQLite.Open()
CreateTables()
#Change here the name of TXT bovespa file!!!
FileName = "COTAHIST_A2018.TXT"
LoadFileCallAddInDB(FileName)
sQLite.Close()
#input("Press Enter to continue...")
 def test_constructor_dbname_table(self):
     s = SQLite.SQLite(db='foobar.sqlite3', table='table_foo')
     assert (s.db == 'foobar.sqlite3')
     assert (s.tbl == 'table_foo')
Example #32
0
#unique subclass-combiation counter

import sys, datetime
sys.path.append("/home/ron/PythonBase")

import SQLite
import senAdd

s = SQLite.SQLite(db='../sqlite/class_count.sqlite3', tbl='class');
s.conn.create_function("pType", 1, senAdd.patType)
s.attach('../sqlite/class.sqlite3', name='cls')
s.replicate(table='class', db='cls')
s.add('pat_type', 'varchar(1)', table='class')
s.add('fullcls', 'text', table='class')
s.index(keys=['Patent', 'FullCls'], table="class", unique=True)
s.index(keys=['Pat_Type'], table="class", unique=False)
s.count()
s.c.execute("INSERT OR IGNORE INTO class SELECT *, pType(patent), class||'-'||subclass FROM cls.class")
s.commit()
s.count()
#I don't want to really deal with non utility patents right now, delete them
s.c.execute("DELETE FROM class WHERE pat_type!='U'")
s.commit()
s.count()
#First CLASS-SUBCLASS combinations, elimanate these
cls = s.c.execute("SELECT min(Patent), FullCls FROM class GROUP BY FullCls").fetchall()
s.c.executemany("DELETE FROM class WHERE patent=? and FullCls=?", cls)
s.commit()
s.count()
#Determine all possible pairs for each patent
s.c.execute("""
Example #33
0
        "sid_meta2_name TEXT,"
        "sid_meta3_name TEXT,"
        "contains_crosslink BOOLEAN)"
    )

    # add meta columns
    try:
        cur.execute('ALTER TABLE spectrum_identifications ADD COLUMN meta1 TEXT')
        cur.execute('ALTER TABLE spectrum_identifications ADD COLUMN meta2 TEXT')
        cur.execute('ALTER TABLE spectrum_identifications ADD COLUMN meta3 TEXT')

    except Exception:
        print('{}: Meta columns exist already - not updated'.format(db_name))

    try:
        # add precursor information from peak list file to DB
        cur.execute('ALTER TABLE spectra ADD COLUMN precursor_mz TEXT')
        cur.execute('ALTER TABLE spectra ADD COLUMN precursor_charge TEXT')
    except Exception:
        print('{}: spectrum precursor columns exist already - not updated'.format(db_name))
    con.commit()

    return True


import glob

for db_name in glob.glob("./dbs/saved/*.db"):
    con = SQLite.connect(db_name)
    update_database(con, db_name)
Example #34
0
import sys
sys.path.append('./lib/')
import SQLite
import datetime
import shutil

t1 = datetime.datetime.now()
print "Start", t1

##Create invpat
ip = SQLite.SQLite(db='invpat.sqlite3', tbl='invpat')
ip.c.execute("DROP TABLE IF EXISTS invpat")
ip.c.execute(
    """CREATE TABLE invpat(Firstname TEXT, Middlename TEXT, Lastname TEXT, Street TEXT,
            City TEXT, State TEXT, Country TEXT, Zipcode TEXT, Latitude REAL,
            Longitude REAL, InvSeq INT, Patent TEXT, AppYear TEXT, ApplyYear TEXT, GYear INT,
            AppDate TEXT, Assignee TEXT, AsgNum INT, Class TEXT, Coauthor TEXT, Invnum TEXT,
            Invnum_N TEXT, Unique_Record_ID TEXT);""")

##From inventor.sqlite3: Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude, InvSeq
ip.attach('inventor.sqlite3')
ip.c.execute("""INSERT INTO invpat (
                  Firstname,
                  Lastname,
                  Street,
                  City,
                  State,
                  Country,
                  Zipcode,
                  Latitude,
                  Longitude,