예제 #1
0
 def setUp(self):
     self.removeFile("test.db")
     self.removeFile("test.csv")
     self.removeFile("test2.db")
     self.removeFile("test2.csv")
     # create a really basic dataset
     self.createFile(file="test.db")
     self.s = SQLite.SQLite(db="test.db", tbl="test")
     self.createFile("test2.db")
     s = SQLite.SQLite("test2.db", tbl="test")
     self.s.attach(s)
예제 #2
0
    def __init__(self):
        self.sqlite = SQLite.SQLite()
        self.connection = self.sqlite.create_connection(self.sqlite.database)

        self.degree = self.map_degree()
        self.octave = self.map_octave()
        self.type = self.map_type()
예제 #3
0
    def __init__(self,
                 filepath,
                 dbnames,
                 graphml='',
                 begin=1975,
                 end=2010,
                 increment=3):
        """
        takes a filepath string and a list of dbnames
        if graphml files already exist, take the list of files and read into graph list as graph objects

        ex:
        import DVN
        D = DVN.DVN(filepath='/home/ayu/DVN/', dbnames=['patent', 'invpat', 'citation', 'class'], graphml = ['pat_2000.graphml', 'pat_2003.graphml'])
        D.summary()
        D.create_csv_file()
        """
        self.filepath = filepath
        self.data = {}
        self.graphs = {}
        self.begin = begin
        self.end = end
        self.increment = increment
        for dbname in dbnames:
            self.data[dbname] = SQLite.SQLite(filepath + dbname + '.sqlite3',
                                              dbname)
        if graphml:
            i = 0
            for year in range(self.begin, self.end, self.increment):
                self.graphs[year] = igraph.Graph.Read_GraphML(filepath +
                                                              graphml[i])
                i = i + 1
예제 #4
0
    def __init__(self, link):
        self.sqlite = SQLite.SQLite()
        self.connection = self.sqlite.create_connection(self.sqlite.database)

        graph = self.retrieve_dataset(link)
        nodes = self.get_number_of_nodes(graph)
        in_degree_id = self.get_in_degree(graph)
        out_degree_id = self.get_out_degree(graph)

        metrics = (nodes, in_degree_id, out_degree_id)
        self.metrics_id = self.sqlite.create_metrics(self.connection, metrics)

        cursor = self.connection.cursor()
        cursor.execute("SELECT rowid FROM graph WHERE uri = ?", (link, ))
        data = cursor.fetchone()

        if data is None:
            graph_data = (link, self.metrics_id)
            self.graph_id = self.sqlite.create_graph(self.connection,
                                                     graph_data)
        else:
            graph_data = (link, self.metrics_id, data[0])
            self.sqlite.update_graph(self.connection, graph_data)
            self.graph_id = data[0]

        self.connection.commit()
예제 #5
0
 def __init__(self, match="patent"):
     self.match = match
     files = {
         'patent': ['/home/ron/disambig/sqlite/invpat.sqlite3', 'invpat']
     }
     self.s = SQLite.SQLite(db=files[self.match][0],
                            tbl=files[self.match][1])
예제 #6
0
 def test___init__(self):
     s = SQLite.SQLite()
     self.assertEqual("main", s.tbl)
     self.assertEqual(":memory:", s.path)
     self.assertEqual("test.db", self.s.path)
     self.assertEqual("test", self.s.tbl)
     self.assertFalse(self.s.output)
예제 #7
0
 def __init__(self, db=None, table=None):
     import SQLite
     self.table = (table == None and "invpat" or table)
     self.sql = SQLite.SQLite(
         db=(db == None and "/home/ron/disambig/sqlite/invpat.sqlite3"
             or db),
         tbl=self.table)
     self.sql.open()
예제 #8
0
def handle_patent():
    p = SQLite.SQLite(db='patent.sqlite3', tbl='patent')
    p.conn.create_function('dVert', 1, dateVert)
    p.c.execute(
        """update patent set AppDate=dVert(AppDate), GDate=dVert(GDate);""")
    p.commit()
    p.close()
    print "DONE: Patent Date!", "\n   -", datetime.datetime.now() - t1
예제 #9
0
    def __init__(self,
                 db="asg2.sqlite3", fld="Assignee", uqKey="AsgNum",
                 other="NCity, NState, NCountry,", table="Assignee_2"):

        self.fld = fld
        self.uqKey = uqKey
        self.other = other
        self.table = table
        self.s = SQLite(db)
예제 #10
0
    def fetch(self):
        tbl = self.tbl
        tbl2 = self.tbl2
        db = self.db
        query = self.query
        category = self.category
        maxconn = self.maxconn
        #FIRST PATENT
        if category=="grant":
            base = "patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=/netahtml/PTO/search-adv.htm&r=0&f=S&l=50&d=PTXT"
            params = urllib.urlencode({"p":1, "Query":query})
        url = "http://{base}&{params}".format(base=base, params=params)
        
        firstkey = "{query}||{page}".format(query=query, page=1)
        PyCurl.PyCurl([[firstkey, url]], maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M")
        self.s = SQLite.SQLite(db=db, tbl=tbl)

        #SUBSEQUENT PATENT
        html = self.grab()
        pats = int(re.findall("<B>Results of Search.*?</B>: ([0-9]+) patents", html)[0])
        pages = int(math.ceil(float(pats)/50))
        print "Query: {query}\n  - Patents: {pats}, Pages: {pages}".format(query=query, pats=pats, pages=pages)

        urls = []
        Srch1 = re.findall('<INPUT TYPE="HIDDEN" NAME="Srch1" VALUE="(.*?)">', html)[0]
        for num in range(2, pages+1):
            params = urllib.urlencode({"Srch1":Srch1, "NextList{num}".format(num=num):"N"})
            urls.append(["{query}||{page}".format(query=query, page=num), "http://{base}&{params}".format(base=base, params=params)])

        if len(urls)>0:
            pc = PyCurl.PyCurl(urls, maxconn=maxconn, SQLdb=db, SQLtbl=tbl, opt="M")

            if pc.new or True:
                #BUILD PATENT LIST
                self.s.chgTbl(tbl2)
                self.s.c.execute("CREATE TABLE IF NOT EXISTS {tbl} (query TEXT, Patent VARCHAR(8), Title TEXT, UNIQUE(query, Patent))".format(tbl=tbl2))
                self.s.index(["Patent"])
                patUrl = []
                for num in range(0, pages):
                    html = self.grab(page=num+1)
                    base = re.findall("<TABLE><TR><TD>.*?</TABLE>", html, re.S)[0]
                    href = re.findall("<A  HREF=.*?>(.*?)</A>", base, re.S)
                    pats = []
                    for i in range(0, len(href), 2):
                         pat = [query, re.sub(",", "", href[i]), re.sub("  +", " ", re.sub("\n", "", href[i+1])).strip()]
                         pats.append(pat)
                         patUrl.append([pat[1], "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&u=%2Fnetahtml%2FPTO%2Fsearch-adv.htm&r=1&f=G&l=50&d=PTXT&p=1&p=1&S1={patent}.PN.".format(patent=pat[1])])
                    self.s.c.executemany("INSERT OR IGNORE INTO {tbl} VALUES (?, ?, ?)".format(tbl=tbl2), pats)
                self.s.conn.commit()

                if self.patentGrab:
                    PyCurl.PyCurl(patUrl, maxconn=maxconn, SQLdb=db, SQLtbl="patent_search", opt="M", cache=None).new
예제 #11
0
 def __init__(self, db=None, sql=None, table=None):
     self.table = (table == None and "invpat" or table)
     if sql == None:
         import SQLite
         self.sql = SQLite.SQLite(db=(
             db == None and
             "/home/ron/inputdata/Ron/fullset/invpatC.upper.Jan2011.sqlite3"
             or db),
                                  tbl=self.table)
         self.sql.open()
     else:
         self.sql = sql
         self.sql.chgTbl(table)
예제 #12
0
 def setKey(self, db, table="main"):
     s = self.s
     s.open()
     OrgDct = dict(s.c.execute("SELECT %s, %s2 FROM grp" % (self.fld, self.uqKey)).fetchall())
     s.close()
     t = SQLite(db)
     def OrgDctIt(x):
         if x in OrgDct:
             return OrgDct[x]
         else:
             return ""
     t.conn.create_function("OrgDct", 1, OrgDctIt)
     t.c.execute("UPDATE %s SET %s=OrgDct(%s)" % (table, self.uqKey, self.fld))
     t.conn.commit()
     t.close()
예제 #13
0
    def merge(self, keys, db=None, tbl="main"):
        s = self.s
        s.open()
        if len(keys[0])<13:
            keys = ["%s%0.12d" % (x[0], int(x[1:])) for x in keys]

        k1 = min(keys)
        for k in keys:
            s.c.execute("UPDATE grp SET %s2='%s' WHERE %s2='%s'" % (self.uqKey, k1, self.uqKey, k))
        s.conn.commit()
        s.close()
        if db!=None:
            t = SQLite(db)
            for k in keys:
                t.c.execute("UPDATE %s SET %s='%s' WHERE %s='%s'" % (tbl, self.uqKey, k1, self.uqKey, k))
            t.conn.commit()
            t.close()
예제 #14
0
    def pat_match(self):
    #Match a query against the Patent data
        query = self.query
        category = self.category
        tbl  = "{category}_list".format(category=category)

        self.t = SQLite.SQLite()
        self.t.attach(self.db)
        self.t.c.execute("""
            CREATE TABLE main AS
                SELECT Patent, Title FROM {tbl} WHERE query=?
            """.format(tbl=tbl), (query,)).fetchall()
        self.t.index(["Patent"])
        self.t.attach("/home/ron/disambig/sqlite/invpat.s3")
        self.t.c.execute("""
            CREATE TABLE invpat AS
                SELECT  a.*
                  FROM  db.invpat AS a
            INNER JOIN  main AS b
                    ON  a.Patent = b.Patent
                 WHERE  a.AppYearStr BETWEEN 1975 and 2001;
            """)
예제 #15
0
def handle_inventor():

    ## Clean inventor: ascit(Firstname, Lastname, Street)
    ## Create new table inventor_1 to hold prepped data

    i = SQLite.SQLite(db='inventor.sqlite3', tbl='inventor_1')
    i.conn.create_function("ascit", 1, ascit)
    i.conn.create_function("cc", 3, locFunc.cityctry)
    i.c.execute('drop table if exists inventor_1')
    i.replicate(tableTo='inventor_1', table='inventor')
    i.c.execute('insert or ignore into inventor_1 select * from inventor  %s' %
                (debug and "LIMIT 2500" or ""))

    i.c.execute("""
            UPDATE  inventor_1
               SET  firstname = ascit(firstname),
                    lastname  = ascit(lastname),
                    street    = ascit(street),
                    City      = cc(city, country, 'city'),
                    Country   = cc(city, country, 'ctry');
                """)

    i.commit()

    i.attach('hashTbl.sqlite3')
    i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'],
            on=['City', 'State', 'Country'],
            tableFrom='locMerge',
            db='db')

    i.merge(key=['NCity', 'NState', 'NCountry', 'NZipcode', 'NLat', 'NLong'],
            on=['City', 'State', 'Country', 'Zipcode'],
            tableFrom='locMerge',
            db='db')

    i.commit()
    i.close()
    print "DONE: Inv Locationize!", "\n   -", datetime.datetime.now() - t1
예제 #16
0
 def test_constructor_empty(self):
     s = SQLite.SQLite()
     assert (s.db == ':memory:')
     assert (s.tbl == 'main')
예제 #17
0
 def test_constructor_dbname(self):
     s = SQLite.SQLite(db='foobar.sqlite3')
     assert (s.db == 'foobar.sqlite3')
     assert (s.tbl == 'main')
예제 #18
0
import sys

sys.path.append("/home/ron/PythonBase")
import SQLite

#file = "/media/data/edward/backup/v2/final_r7.sqlite3"

file = "/home/ayu/DVN/upper_results/invpat_uc.sqlite3"
s = SQLite.SQLite(file, 'invpat')

print "Creating upper bound files..."
for x in range(1975, 2010, 3):
    print x
    # Lower Bound results (currently marked as upr because we only have upr)
    s.igraph(where='AppYearStr BETWEEN %d AND %d' % (x, x+2), vx="invnum_N").g.save('/home/ayu/DVN/upper_results/pat_%d_uc.graphml' % x)
    # Upper Bound results
#   s.igraph(where='AppYearStr BETWEEN %d AND %d' % (x, x), vx="new_invnum_N").g.save('pat_%d_upr.graphml' % x)

# Entire US graph for all history
s.igraph(where='Country="US"', vx="invnum_N").g.save('/home/ayu/DVN/upper_results/pat_US_uc.graphml') #changed low to upr
#s.igraph(where='Country="US"', vx="new_invnum_N").g.save('pat_US_upr.graphml')

file = "/home/ayu/DVN/lower_results/invpat_oc.sqlite3"
s = SQLite.SQLite(file, 'invpat')

print "Creating lower bound files..."
for x in range(1975, 2010, 3):
    print x
    s.igraph(where='AppYearStr BETWEEN %d AND %d' % (x, x+2), vx="invnum_N").g.save('/home/ayu/DVN/lower_results/pat_%d_oc.graphml' % x)

# Entire US graph for all history
예제 #19
0
 def test__dbAdd(self):
     s = SQLite.SQLite()
     self.assertEqual(s._dbAdd(), "main")
     self.assertEqual(s._dbAdd(db="db"), "db.main")
     self.assertEqual(s._dbAdd(tbl="temp"), "temp")
     self.assertEqual(s._dbAdd(db="db", tbl="temp"), "db.temp")
예제 #20
0
t1 = datetime.datetime.now()
print "Start", t1
##### Run B2_LocationMatch.py
import B2_LocationMatch

print "   - Loc Merge", "\n   -", datetime.datetime.now() - t1

##
## ###########################
#####                       ###
####     A S S I G N E E     ##
#####                       ###
## ###########################
##
### Create copy of assignee table, add column for assigneeAsc
s = SQLite.SQLite(db='assignee.sqlite3', tbl='assignee_1')
s.conn.create_function("ascit", 1, ascit)
s.conn.create_function("cc", 3, locFunc.cityctry)
s.attach(database='NBER_asg')
s.c.execute("DROP TABLE IF EXISTS assignee_1")
s.replicate(tableTo='assignee_1', table='assignee')
#s.addSQL(data='assignee', insert="IGNORE")
s.c.execute("INSERT INTO assignee_1 SELECT * FROM assignee %s" %
            (debug and "LIMIT 2500" or ""))
s.add('assigneeAsc', 'VARCHAR(30)')
s.c.execute("UPDATE assignee_1 SET assigneeAsc = ascit(assignee);")
s.commit()
print "DONE: assignee_1 table created in assignee.sqlite3 with new column assigneeAsc", "\n   -", datetime.datetime.now(
) - t1
s.merge(key=[['AsgNum', 'pdpass']],
        on=[['assigneeAsc', 'assignee']],
예제 #21
0
    def search(self, params, path, orgNum=100, orgEach=5):
        import datetime, SQLite, senAdd, os, random, json, types
        import memcache, zipfile

        self.table = params['table']

        def infoSearch(var, typ="str", blank=False):
            if blank:
                if var in params:
                    return params[var]
                else:
                    return None
            else:
                ret = var in params and ((typ=="int") and (int)(params[var]) or params[var]) or None
                return (ret!="" and ret!=None) and ret or None

        cd = infoSearch('CD', 'int')
        state = infoSearch('State')

        if state==None:
            return ""
        
        if cd==0 or cd=='0':
            cd=None

        mc = memcache.Client(['127.0.0.1:11211'], debug=0)
        mckey = "geoPut_"+"|".join([type(params[var])==types.ListType and ",".join(params[var]) or params[var] for var in params])

        #mc.flush_all()
        #mc.delete(mckey)
        if len(mckey)<=250:
            jsonout = mc.get(mckey)
        else:
            jsonout = None

        if jsonout==None or len(jsonout)<100:
            output = '%s/sqlite/%s%s-%s.s3' % ("/".join(path.split("/")[:-1]), self.table, state, cd!=None and cd or "")
            s = SQLite.SQLite(output)
            #s.c.execute("DROP TABLE %sGeo" % self.table)

            if not(s.tables("%sGeo" % self.table)):
                tbl = self.table
                def tableIt(table):
                    cols = self.m.columns(output=False, table=table)
                    s.c.execute("CREATE TABLE IF NOT EXISTS %s (%s)" % (table, ", ".join([x for x in cols])))
                    return cols
                    
                tableIt('%sGeo' % tbl)
                if cd==0 or cd==None:
                    query = "SELECT * FROM %sGeo WHERE Level=1 AND State=%%s" % tbl
                    self.m.c.execute(query, (state,))
                else:
                    query = "SELECT * FROM %sGeo WHERE CD=%%s AND State=%%s" % tbl
                    self.m.c.execute(query, (cd, state))
                data = self.m.c.fetchall()
                s.addSQL(data=data, table='%sGeo' % tbl)
                #s.index(["IDNum"], table='%sGeo' % tbl)
                s.index(["Year"],  table='%sGeo' % tbl)
                s.index(["Org"],  table='%sGeo' % tbl)
                s.index(["Lat", "Lng"],  table='%sGeo' % tbl)

                #---------------#
                # Assign colors #
                #---------------#
                column = s.columns(output=False, table="%sGeo" % tbl)
                data = zip(*data)

                colors = [x[1] for x in senAdd.colors]
                colX = []
                for w in [[[x, y] for y in colors if x!=y] for x in colors]:
                    colX.extend(w)

                lbl = senAdd.flatten([x.split("/") for x in data[column.index("Label")] if x is not None])
                lbl = sorted([[x, lbl.count(x)] for x in set(lbl)], key=lambda x:x[1], reverse=True)
                org = s.c.execute("SELECT Org, count(*) as cnt FROM %sGeo GROUP BY Org HAVING Org!='' ORDER BY cnt DESC" % tbl).fetchall()

                def setColors(lst, rand=True):
                    col = rand and random.sample(colX, len(colX)) or colX
                    def hexIt():
                        return hex(int(256*random.random()))[-2:].replace("x", "0")
                    def colHex():
                        heX = "#%s%s%s" % (hexIt(), hexIt(), hexIt())
                        return [senAdd.colGrad("#FFFFFF", heX, 35), heX]
                    lstDct = dict([[x[0], (i<len(col)) and col[i] or colHex()] for i,x in enumerate(lst)])
                    lstDct[""] = ["#ffffff", "#cccccc"]
                    return lstDct

                lblDct = setColors(lbl)
                orgDct = setColors(org)

                s.c.execute("CREATE TABLE IF NOT EXISTS %sColors (Key, Color1, Color2, Type, UNIQUE(Key, Type));" % tbl)
                s.c.executemany("INSERT OR IGNORE INTO %sColors VALUES (?, ?, ?, ?)" % tbl, [[x[0], x[1][0], x[1][1], 'Label'] for x in lblDct.items()])
                s.c.executemany("INSERT OR IGNORE INTO %sColors VALUES (?, ?, ?, ?)" % tbl, [[x[0], x[1][0], x[1][1], 'Org'] for x in orgDct.items()])
                s.index(['Key', 'Type'], table="%sColors" % tbl)

            #OTHER PARAMETERS GET FILTERED HERE

            year = infoSearch('year')
            amt = infoSearch('amt')
            agency = infoSearch('agency')
            org = infoSearch('Org[]', blank=True)
            label = infoSearch('Label[]', blank=True)
            if type(org)!=types.ListType and org!=None:
                org = [org]
            if type(label)!=types.ListType and label!=None:
                label = [label]
            where = []

            if year!=None:
                if len(str(year).split("-"))>1:
                    where.append("Year BETWEEN %s AND %s" % tuple(year.split("-")))
                else:
                    where.append("Year=%s" % year)
                    
            if amt!=None:
                if len(str(amt).split("-"))>1:
                    where.append("Amount BETWEEN %s AND %s" % tuple([float(x)*1000000.0 for x in amt.split("-")]))
                else:
                    where.append("Amount=%s" % (float(amt)*1000000.0))

            #patent specific
            if agency!=None and agency!="":
                where.append("'/'||Agency||'/' LIKE '%%/%s/%%'" % agency)
                degree = infoSearch('degree')
                if self.table=="pat" and degree!=None:
                    where.append("degree <= %s" % degree.split("-")[-1])
            if org!=None:
                where.append("Org in ('%s')" % "', '".join(org))
            if label!=None:
                labelStr = []
                for x in label:
                    labelStr.append("'/'||Label||'/' LIKE '%%/%s/%%'" % x)
                where.append("(%s)" % (" OR ".join(labelStr)))

            where = " AND ".join(where)
            if where!="":
                where = "WHERE %s" % where            

            #Don't need colors now
            #cols = dict([["%s|%s" % (x[0], x[3]), [x[1], x[2]]] for x in s.c.execute("SELECT * FROM %sColors" % self.table).fetchall()])
            #orgs = dict([x[:2] for x in s.c.execute("SELECT Org, OrgName, count(*) as cnt FROM %sGeo GROUP BY Org ORDER BY cnt DESC LIMIT %d" % (self.table, orgNum)).fetchall()])
            orgs = dict([x for x in s.c.execute("SELECT Org, OrgName as cnt FROM %sGeo GROUP BY Org" % (self.table)).fetchall()])
            if self.table=="grant":
                column = ['City', 'Org', 'Label', 'Agency', 'Lng', 'Lat', 'cnt', 'amt']
                data = s.c.execute("SELECT City, GROUP_CONCAT(Org,'/'), GROUP_CONCAT(Label,'/'), GROUP_CONCAT(Agency,'/'), Lng, Lat, count(*) AS cnt, sum(Amount) AS amt FROM %sGeo %s GROUP BY Agency, Lat,Lng" % (self.table, where)).fetchall()
            else:
                column = ['City', 'Org', 'Label', 'Agency', 'Lng', 'Lat', 'cnt']
                data = s.c.execute("SELECT City, GROUP_CONCAT(Org,'/'), GROUP_CONCAT(Label,'/'), GROUP_CONCAT(Agency,'/'), Lng, Lat, count(*) AS cnt FROM %sGeo %s GROUP BY Lat,Lng" % (self.table, where)).fetchall()
            data = zip(*data)

            def splitCount(lists):
                retLst = []
                for lst in lists:
                    if lst==None:
                        retLst.append("")
                    elif len(str(lst).split("/"))==1:
                        retLst.append(lst)
                    else:
                        lst = lst.split("/")
                        lst = sorted([[x, lst.count(x)] for x in set(lst)], key=lambda x:x[1], reverse=True)[:orgEach]
                        lst = "/".join(["~".join([x[0], str(x[1])]) for x in lst])
                        retLst.append(lst)
                return retLst

            marks = {}
            for i,x in enumerate(data):
                marks[column[i]] = splitCount(x)

            #Determine relevant Colors
            col = {}
            for x in set(data[column.index("Org")]):
                if x!=None:
                    for y in x.split("/"):
                        k = y.split("~")[0]
                        key = "%s|%s" % (k, "Org")
                        if k in orgs and key not in col:
                            #col[key] = [cols[key], orgs[k]] ##THIS IS COLOR
                            col[key] = [[], orgs[k]] ##COLOR NOT NEEDED NOW
                        
            #WE ONLY NEED TOP ONES.. LIKE TOP100
            clsCnt = {}
            for x in set(data[column.index("Label")]):
                if x!=None:
                    for y in x.split("/"):
                        k = y.split("~")[0]
                        if k not in clsCnt:
                            clsCnt[k] = 0
                        clsCnt[k] = clsCnt[k] + 1
                        #key = "%s|%s" % (k, "Label")
                        #col[key] = [cols[key], k in lblTxt[self.table] and lblTxt[self.table][k] or k] ##THIS IS COLOR
            #clsCnt = sorted(clsCnt.items(), reverse=True, key=lambda x:x[1])[:orgNum]
            clsCnt = sorted(clsCnt.items(), reverse=True, key=lambda x:x[1])
            
            for x in clsCnt:
                k = x[0]
                key = "%s|%s" % (k, "Label")
                col[key] = [[], k in lblTxt[lblTbl[self.table]] and lblTxt[lblTbl[self.table]][k] or k]
            #########################

            s.close()

            def cleanJSON(string):
                import re
##                #H000123 > H123
##                string = re.sub("([A-Z])([0]+)([0-9]+)", lambda x:x.group(1)+x.group(3), string)
                #1.599999 > 1.6
                string = re.sub("([0-9]+[.][0-9]+?)([0-8])[9]{4,}[0-9]+", lambda x:x.group(1)+str(int(x.group(2))+1), string)
                #1.100001 > 1.1
                string = re.sub("([0-9]+[.][0-9]+?)[0]{4,}[0-9]+", lambda x:x.group(1), string)
                string = re.sub("  +", " ", string)
                return string
                
            jsonout = cleanJSON(json.dumps({'marks':marks, 'colors':col}))
            mc.set(mckey, jsonout)
        return jsonout
예제 #22
0
    def __init__(self,
                 urls,
                 maxconn=20,
                 SQLdb=":memory:",
                 SQLtbl="main",
                 cache=604800,
                 opt="F"):
        #cache = how long do we cache the results, in seconds?  default = 7 day
        #cache = None, cache forever

        #opt = S(low), M(edium), F(ast).  Faster we go: more potential errors
        opt = {
            pycurl.FOLLOWLOCATION: 1,
            pycurl.CONNECTTIMEOUT: {
                "S": 120,
                "M": 60,
                "F": 30
            }[opt],
            pycurl.TIMEOUT: {
                "S": 300,
                "M": 120,
                "F": 45
            }[opt],
            pycurl.NOSIGNAL: 1
        }

        #idea type: [[key, url] ... ]
        if type(urls) == types.StringType:
            urls = [[0, urls]]
        elif type(urls) == types.ListType:
            if type(urls[0]) != types.ListType:
                urls = [[i, x] for i, x in enumerate(urls)]

        self.maxconn = maxconn
        self.opt = opt
        self.list = []

        self.SQLtbl = SQLtbl
        self.sql = SQLite.SQLite(SQLdb, tbl=SQLtbl)
        self.sql.c.execute(
            "CREATE TABLE IF NOT EXISTS {tbl} (key, url, html TEXT, created REAL)"
            .format(tbl=SQLtbl))
        self.sql.index(["key"], unique=True)
        self.sql.index(["key", "created"])
        self.sql.index(["url"])

        okUrl = []
        cTime = time.time()
        for x in urls:
            if self.sql.c.execute(
                    "SELECT count(*) FROM {tbl} WHERE key=?".format(
                        tbl=SQLtbl), (x[0], )).fetchone()[0] == 0:
                okUrl.append(x)
            elif cache != None:
                if self.sql.c.execute(
                        "SELECT count(*) FROM {tbl} WHERE key=? and created>?".
                        format(tbl=SQLtbl),
                    (x[0], cTime - cache)).fetchone()[0] == 0:
                    okUrl.append(x)
        if len(okUrl) > 0:
            self.__init_curl(okUrl, maxconn)
            self.new = True
        else:
            self.new = False
예제 #23
0
#This file is meant to separate the patent related datasets by Year

import sys, datetime, os
sys.path.append("/home/ron/PythonBase")
import SQLite
import senAdd

yr = int(sys.argv[1])  #min year
src = sys.argv[2]  #source directory

direc = '/home/ron/disambig/sqlite/backup'

print "Generating patent{yr}".format(yr=yr)
s = SQLite.SQLite(db='{direc}/patent{yr}'.format(direc=direc, yr=yr))
s.optimize()
for file in [
        x for x in os.listdir(src)
        if x.split(".")[1] == "sqlite3" and x.split(".")[0] != "hashTbl"
]:
    print file
    s.attach('{src}/{file}'.format(src=src, file=file))
    table = file.split(".")[0]
    s.replicate(tableTo=table, table=table, db="db")
    s.addSQL(db="db", data=table, table=table)

s.close()
예제 #24
0
#This file is meant to separate the patent related datasets by Year

import sys, datetime
sys.path.append("/home/ron/PythonBase")

import SQLite
import senAdd

direc = '/home/ron/disambig/sqlite/backup'
table = 'patent'
yrRng = range(1975, 2010)

s = SQLite.SQLite(db='%s/patent2009' % direc)
tables = s.tables()
s.close()

for table in tables:
    print "Generating", table
    s = SQLite.SQLite(db='%s/%s.sqlite3' % (direc, table), tbl=table)
    s.optimize()
    for yr in yrRng:
        s.attach('%s/patent%d' % (direc, yr))
        #Only add if table exists
        if s.tables(db='db', lookup=table):
            s.addSQL(data=table, db='db', insert='IGNORE')
        print "  -", yr, datetime.datetime.now()
    s.count()
    s.close()
예제 #25
0
    def graph(self, vertex_list=None, where=None, flag=[], output=":memory:"):
        import datetime, SQLite, os

        oldfile = os.path.isfile(output) and True or False
        s = SQLite.SQLite(output, tbl="G0")
        if not (oldfile):
            if vertex_list != None:
                if type(vertex_list) in (types.ListType, types.TupleType):
                    vx = [(x[0], ) for x in vertex_list]
                else:
                    vx = [(x, ) for x in vertex_list]

                self.c.execute(
                    "CREATE TEMPORARY TABLE gmerge (Invnum_N VARCHAR(255), Unique(Invnum_N));"
                )
                self.c.executemany("INSERT IGNORE INTO gmerge VALUES (%s)", vx)
                self.c.execute("""            
                    CREATE TEMPORARY TABLE G0 AS
                        SELECT  a.*
                          FROM  %s AS a
                    INNER JOIN  gmerge AS b
                            ON  a.Invnum_N=b.Invnum_N
                            %s;
                    """ % (self.table,
                           (where != None) and "WHERE %s" % where or ""))
                #flag gets created here...
            elif where == None:
                self.c.execute(
                    """
                    CREATE TEMPORARY TABLE G0 AS
                        SELECT  a.*
                          FROM  %s AS a
                    INNER JOIN  gmerge AS b
                            ON  a.Invnum_N=b.Invnum_N
                         WHERE  %s;""" % self.table, where)

            # CREATE SQLite Data
            self.c.execute("DESCRIBE invpat")
            s.conn.create_function("flag", 1,
                                   lambda x: x in flag and "x" or "")
            s.c.execute("CREATE TABLE G0 (%s)" %
                        ", ".join([" ".join(x[:2])
                                   for x in self.c.fetchall()]))
            self.c.execute("SELECT * FROM G0")
            s.addSQL(data=self.c.fetchall())
            s.add("flag", "")
            s.c.execute("UPDATE G0 SET flag=flag(Invnum_N);")

            #how do we incorporate new fields?
            s.c.executescript("""
                DROP TABLE IF EXISTS vx0;
                DROP TABLE IF EXISTS ed0;
                CREATE INDEX IF NOT EXISTS G0_AY ON G0 (AppYear);
                CREATE INDEX IF NOT EXISTS G_id ON G0 (Patent);
                CREATE INDEX IF NOT EXISTS G_ed ON G0 (Invnum_N, Patent);
                CREATE TABLE vx0 AS
                    SELECT  Invnum_N AS id, count(*) AS cnt, *,
                            GROUP_CONCAT(Class) AS Classes
                      FROM  G0
                  GROUP BY  Invnum_N
                  ORDER BY  AppYear;
                CREATE INDEX IF NOT EXISTS vx_id ON vx0 (Invnum_N);
                CREATE TABLE ed0 AS
                    SELECT  a.Invnum_N AS h, b.Invnum_N AS t, a.AppYear AS AppYear, a.Patent AS Patent, a.Class AS Class
                      FROM  G0 AS a INNER JOIN G0 AS b
                        ON  a.Patent=b.Patent AND a.Invnum_N<b.Invnum_N;
                """)

        self.tab = senTab()
        self.tab.vList = s.c.execute("SELECT * FROM vx0").fetchall()
        self.tab.vlst = s.columns(table="vx0", output=False)[1:]
        self.tab.eList = s.c.execute("SELECT * FROM ed0").fetchall()
        self.tab.elst = s.columns(table="ed0", output=False)[2:]
        s.close()
예제 #26
0
        line = f.readline()
        DayInformationTemp = DayInformation(line)
        if (DayInformationTemp.RegisterType == "01"):
            Alldata.append(DayInformationTemp)

    f.close()
    print("Load completed")
    Current_PTOEXE = ""
    AllDailyStockInfo = []
    Alldata.sort(key=Get_PTOEXE)
    for DayInformationin in Alldata:
        if Current_PTOEXE != DayInformationin.PTOEXE:
            InsertStock(AllDailyStockInfo)
            AllDailyStockInfo.clear()
        AllDailyStockInfo.append(DayInformationin)
        Current_PTOEXE = DayInformationin.PTOEXE
    InsertStock(AllDailyStockInfo)


#Change here the name of database file!!!
CONST_NAME_DATABASE = 'ActionsInfo.db'

sQLite = SQLite(CONST_NAME_DATABASE)
sQLite.Open()
CreateTables()
#Change here the name of TXT bovespa file!!!
FileName = "COTAHIST_A2018.TXT"
LoadFileCallAddInDB(FileName)
sQLite.Close()
#input("Press Enter to continue...")
예제 #27
0
import sys
sys.path.append('./lib/')
import SQLite
import datetime
import shutil

t1 = datetime.datetime.now()
print "Start", t1

##Create invpat
ip = SQLite.SQLite(db='invpat.sqlite3', tbl='invpat')
ip.c.execute("DROP TABLE IF EXISTS invpat")
ip.c.execute(
    """CREATE TABLE invpat(Firstname TEXT, Middlename TEXT, Lastname TEXT, Street TEXT,
            City TEXT, State TEXT, Country TEXT, Zipcode TEXT, Latitude REAL,
            Longitude REAL, InvSeq INT, Patent TEXT, AppYear TEXT, ApplyYear TEXT, GYear INT,
            AppDate TEXT, Assignee TEXT, AsgNum INT, Class TEXT, Coauthor TEXT, Invnum TEXT,
            Invnum_N TEXT, Unique_Record_ID TEXT);""")

##From inventor.sqlite3: Firstname, Lastname, Street, City, State, Country, Zipcode, Latitude, Longitude, InvSeq
ip.attach('inventor.sqlite3')
ip.c.execute("""INSERT INTO invpat (
                  Firstname,
                  Lastname,
                  Street,
                  City,
                  State,
                  Country,
                  Zipcode,
                  Latitude,
                  Longitude,
예제 #28
0
 def test_constructor_dbname_table(self):
     s = SQLite.SQLite(db='foobar.sqlite3', table='table_foo')
     assert (s.db == 'foobar.sqlite3')
     assert (s.tbl == 'table_foo')
예제 #29
0
 def test_index(self):
     s = SQLite.SQLite('test.sqlite3')
     create_assignee_schema(s.c)
     initialize_assignees(s.conn)
     assert (1 == 1)
예제 #30
0
#unique subclass-combiation counter

import sys, datetime
sys.path.append("/home/ron/PythonBase")

import SQLite
import senAdd

s = SQLite.SQLite(db='../sqlite/class_count.sqlite3', tbl='class');
s.conn.create_function("pType", 1, senAdd.patType)
s.attach('../sqlite/class.sqlite3', name='cls')
s.replicate(table='class', db='cls')
s.add('pat_type', 'varchar(1)', table='class')
s.add('fullcls', 'text', table='class')
s.index(keys=['Patent', 'FullCls'], table="class", unique=True)
s.index(keys=['Pat_Type'], table="class", unique=False)
s.count()
s.c.execute("INSERT OR IGNORE INTO class SELECT *, pType(patent), class||'-'||subclass FROM cls.class")
s.commit()
s.count()
#I don't want to really deal with non utility patents right now, delete them
s.c.execute("DELETE FROM class WHERE pat_type!='U'")
s.commit()
s.count()
#First CLASS-SUBCLASS combinations, elimanate these
cls = s.c.execute("SELECT min(Patent), FullCls FROM class GROUP BY FullCls").fetchall()
s.c.executemany("DELETE FROM class WHERE patent=? and FullCls=?", cls)
s.commit()
s.count()
#Determine all possible pairs for each patent
s.c.execute("""