def __init__(self, database): self.db=Database(database,password = '******',type=MYSQL) self.userTable() self.relationTable() self.retweetTable() self.originalTable() self.wordTable()
def get_database(db_name, clean=False): if clean: if os.path.exists(db_name): os.remove(db_name) if os.path.exists(db_name + '-journal'): os.remove(db_name + '-journal') db = Database(db_name) if 'room' not in db.tables: db.create('room', fields=[ pk(), field('type', STRING(10)), field('email', STRING(100)), field('contact_name', STRING(100)), field('link', STRING(200)), field('rent', STRING(50)), field('available', STRING(100)), ]) return db
class WebsiteDatabase(object): class DbException(Exception): pass def __init__(self): self.db = Database('website_history') self.createTables() def createTables(self): errMsg = ',ommitting...' try: self.db.create('websites', fields=(pk('id'), field('address', index=UNIQUE), field('domain_id'), field('connected', BOOLEAN, default=True), field('lastVisited', DATE, default=NOW))) except TableError as err: print err, errMsg try: self.db.create('links', fields=(pk('id'), field('website', INTEGER, optional=False), field('reference', INTEGER, optional=False))) except TableError as err: print err, errMsg try: self.db.create('domains', fields=(pk('id'), field('name', STRING(80), index=UNIQUE))) except TableError as err: print err, errMsg try: self.db.create('session', fields=(pk('id'), field('website_id', INTEGER, optional=False, index=UNIQUE), field('depth', INTEGER, optional=False))) except TableError as err: print err, errMsg self.db.link(self.db.domains, 'domains.id', self.db.websites, 'websites.domain_id') self.db.link(self.db.session, 'session.website_id', self.db.websites, 'websites.id') def getDomainId(self, name): results = self.db.domains.search(filters=all(eq('name', name))).rows() if len(results) > 0: return results[0][0] return None def insertDomain(self, domainName): try: self.db.domains.append(name=domainName) except sqlite3.IntegrityError: pass def insertRelation(self, parent, child): if parent is None or child is None: return parentId = self.db.websites.search( filters=all(eq('address', parent.url.string))).rows()[0][0] childId = self.db.websites.search( filters=all(eq('address', child.url.string))).rows()[0][0] currentRecords = self.db.links.search(filters=all( eq('website', parentId), eq('reference', childId))).rows() if len(currentRecords) == 0: self.db.links.append(website=parentId, reference=childId) def insertWebpage(self, page, connection=False): idDomain = None dateVisited = None if page.url.domain: self.insertDomain(page.url.domain) idDomain = self.getDomainId(page.url.domain) if connection: dateVisited = date(NOW) try: self.db.websites.append(address=page.url.string, domain_id=idDomain, connected=connection, lastVisited=dateVisited) self.insertRelation(page.parent, page) except sqlite3.IntegrityError: if connection: self.db.websites.update(all(eq('address', page.url.string)), connected=True, lastVisited=dateVisited) # for link in page.getLinks(): # if link.url.anchor and not link.isWebPage(): # continue # self.insertWebpage(link) # self.insertRelation(page, link) def readWebPage(self, urlString, depth=1, isExternal=False): webPageData = self.db.websites.search( filters=all(eq('address', WebPage.parseUrl(urlString).string))).rows() pageLinks = [] result = None if len(webPageData) == 0: return result webPageData = webPageData[0] pageId = webPageData[0] depthData = self.db.session.search('depth', all(eq('website_id', pageId))) if len(depthData) > 0: depth = depthData[0][0] result = WebPage(url=webPageData[1], depth=depth, isExternal=isExternal) query = self.db.execute( 'SELECT w.{0}, r.{0} from links join websites as w on links.{1} = w.id join websites as r on links.{2} = r.id WHERE w.id = {3};' .format(self.db.websites.fields[1], self.db.links.fields[1], self.db.links.fields[2], pageId)) for row in iter(query): pageLinks.append( WebPage(url=row[1], parent=result, depth=depth + 1)) result.links = pageLinks return result def wasPageVisited(self, page): webPageData = self.db.websites.search('lastVisited', filters=all( eq('address', page.url.string))).rows() if len(webPageData) == 0: return False dateVisited = webPageData[0][0] if dateVisited is None: return False return True def isInThisSession(self, page): webPageData = self.db.session.search('websites.address', filters=all( eq('websites.address', page.url.string))).rows() if len(webPageData) > 0: return True return False def appendSession(self, page): pageId = self.db.websites.search('id', filters=all( eq('address', page.url.string))).rows() if len(pageId) == 0: raise DbException( "Trying to append website to session without having it in website table" ) pageId = pageId[0][0] try: self.db.session.append(website_id=pageId, depth=page.depth) except sqlite3.IntegrityError as err: print "Invalid session data, cleaning session..." self.clearSession() def clearSession(self): sessionData = self.db.session.rows() for row in iter(sessionData): self.db.session.remove(row[0])
def __init__(self): self.db = Database('website_history') self.createTables()
from pattern.db import Database, SQLITE, MYSQL from pattern.db import field, pk, STRING, INTEGER, DATE, NOW from pattern.db import rel # In this example, we'll build a mini-store: # with products, customers and orders. # We can combine the data from the three tables in an invoice query. # Create a new database. # Once it is created, you can use Database(name) to access it. # SQLite will create the database file in the current folder. # MySQL databases require a username and a password. # MySQL also requires that you install MySQLdb, see the installation instructions at: # http://www.clips.ua.ac.be/pages/pattern-db db = Database("store.db", type=SQLITE) #db._delete() # PRODUCTS # Create the products table if it doesn't exist yet. # An error will be raised if the table already exists. # Add sample data. if not "products" in db: # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length). # In MySQL, the length matters. Smaller fields have faster lookup. db.create( "products", fields=( pk(), # Auto-incremental id. field("description", STRING(50)), field("price", INTEGER)))
from pattern.db import field, pk, STRING, INTEGER, DATE, NOW from pattern.db import assoc from pattern.db import rel from pattern.db import pd # pd() = parent directory of current script. # In this example, we'll build a mini-store: # with products, customers and orders. # We can combine the data from the three tables in an invoice query. # Create a new database. # Once it is created, you can use Database(name) to access it. # SQLite will create the database file in the current folder. # MySQL databases require a username and a password. # MySQL also requires that you install MySQLdb, see the installation instructions at: # http://www.clips.ua.ac.be/pages/pattern-db db = Database(pd("store.db"), type=SQLITE) #db._delete() # PRODUCTS # Create the products table if it doesn't exist yet. # An error will be raised if the table already exists. # Add sample data. if not "products" in db: # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length). # In MySQL, the length matters. Smaller fields have faster lookup. schema = ( pk(), # Auto-incremental id. field("description", STRING(50)), field("price", INTEGER) ) db.create("products", schema)
from pattern.db import Database, SQLITE, MYSQL from pattern.db import field, pk, STRING, INTEGER, DATE, NOW from pattern.db import assoc from pattern.db import rel # In this example, we'll build a mini-store: # with products, customers and orders. # We can combine the data from the three tables in an invoice query. # Create a new database. # Once it is created, you can use Database(name) to access it. # SQLite will create the database file in the current folder. # MySQL databases require a username and a password. # MySQL also requires that you install MySQLdb, see the installation instructions at: # http://www.clips.ua.ac.be/pages/pattern-db db = Database("store.db", type=SQLITE) #db._delete() # PRODUCTS # Create the products table if it doesn't exist yet. # An error will be raised if the table already exists. # Add sample data. if not "products" in db: # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length). # In MySQL, the length matters. Smaller fields have faster lookup. schema = ( pk(), # Auto-incremental id. field("description", STRING(50)), field("price", INTEGER)) db.create("products", schema) db.products.append(description="pizza", price=15)
from pattern.db import Database, SQLITE, MYSQL from pattern.db import field, pk, STRING, INTEGER, DATE, NOW from pattern.db import rel # In this example, we'll build a mini-store: # with products, customers and orders. # We can combine the data from the three tables in an invoice query. # Create a new database. # Once it is created, you can use Database(name) to access it. # SQLite will create the database file in the current folder. # MySQL databases require a username and a password. # MySQL also requires that you install MySQLdb, see the installation instructions at: # http://www.clips.ua.ac.be/pages/pattern-db db = Database("store.db", type=SQLITE) #db._delete() # PRODUCTS # Create the products table if it doesn't exist yet. # An error will be raised if the table already exists. # Add sample data. if not "products" in db: # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length). # In MySQL, the length matters. Smaller fields have faster lookup. db.create("products", fields=( pk(), # Auto-incremental id. field("description", STRING(50)), field("price", INTEGER) )) db.products.append(description="pizza", price=15)
from pattern.db import Database, field, pk, STRING, BOOLEAN, DATE, NOW, date,\ MYSQL db = Database(name='hootguy_tweetgraph', host='50.87.144.127', port=3306, username='******', password='******' #, type = MYSQL ) print 'tables = ', db.tables print 'relations = ', db.relations print 'query = ', db.query #db.create('chows', fields=[ # pk() # , field('manufacturer', STRING(80)) # , field('brand', STRING(80), index=True) # , field('price', STRING(15)) #]) # db.create('pets', fields=[ # pk(), # field('name', STRING(80), index=True), # field('type', STRING(20)), # field('tail', BOOLEAN), # field('date_birth', DATE, default=None), # field('date_created', DATE, default=NOW) # ])
class weibodb(Database): def __init__(self, database): self.db=Database(database,password = '******',type=MYSQL) self.userTable() self.relationTable() self.retweetTable() self.originalTable() self.wordTable() def userTable(self): if not "usertable" in self.db: self.db.create("usertable",fields=( pk(), # Auto-incremental id. field("mapid",INTEGER),#用户映射id field("userid",STRING(20),index=UNIQUE),#用户UID field("name",STRING(50)),#友好显示名称 field("province",STRING(20)),#用户所在省级ID field("city",STRING(20)),# int 用户所在城市ID field("location",STRING(50)),# string 用户所在地 field("description",TEXT),# string 用户个人描述 field("gender",STRING(10)),# string 性别,m:男、f:女、n:未知 field("followersCount",INTEGER),# int 粉丝数 field("friendsCount",INTEGER),# int 关注数 field("statusesCount",INTEGER),# int 微博数 field("createdAt",DATE),# string 用户创建(注册)时间 field("verified",STRING(10)),# STRING(10) 是否是微博认证用户,即加V用户,true:是,false:否 field("verifiedType",STRING(20)),# int 暂未支持 field("biFollowersCount",INTEGER)# int 用户的互粉数 )) def wordTable(self): if not "wordtable" in self.db: self.db.create("wordtable",fields=( pk(), # Auto-incremental id. field("mapid",INTEGER,index=UNIQUE), field("freq",INTEGER), field("word",TEXT) )) def relationTable(self): if not "relationtable" in self.db: self.db.create("relationtable",fields=( pk(), # Auto-incremental id. field("user_v1",INTEGER),#用户1UID field("user_v2",INTEGER),#用户2UID field("re_0",INTEGER,default=-1),#初始关系 field("re_1",INTEGER,default=-1),#关系变化 field("re_2",INTEGER,default=-1),#关系变化 field("re_3",INTEGER,default=-1),#关系变化 field("re_4",INTEGER,default=-1),#关系变化 field("re_5",INTEGER,default=-1),#关系变化 field("re_6",INTEGER,default=-1),#关系变化 field("re_7",INTEGER,default=-1),#关系变化 field("re_8",INTEGER,default=-1),#关系变化 field("re_9",INTEGER,default=-1),#关系变化 field("re_10",INTEGER,default=-1),#关系变化 field("re_11",INTEGER,default=-1),#关系变化 field("re_12",INTEGER,default=-1),#关系变化 field("re_13",INTEGER,default=-1),#关系变化 field("re_14",INTEGER,default=-1),#关系变化 field("re_15",INTEGER,default=-1),#关系变化 field("re_16",INTEGER,default=-1),#关系变化 field("re_17",INTEGER,default=-1),#关系变化 field("re_18",INTEGER,default=-1),#关系变化 field("re_19",INTEGER,default=-1),#关系变化 field("re_20",INTEGER,default=-1),#关系变化 field("re_21",INTEGER,default=-1),#关系变化 field("re_22",INTEGER,default=-1),#关系变化 field("re_23",INTEGER,default=-1),#关系变化 field("re_24",INTEGER,default=-1),#关系变化 field("re_25",INTEGER,default=-1),#关系变化 field("re_26",INTEGER,default=-1),#关系变化 field("re_27",INTEGER,default=-1),#关系变化 field("re_28",INTEGER,default=-1),#关系变化 field("re_29",INTEGER,default=-1),#关系变化 field("re_30",INTEGER,default=-1),#关系变化 field("re_31",INTEGER,default=-1),#关系变化 field("re_32",INTEGER,default=-1)#关系变化 )) def retweetTable(self): if not "retwtable" in self.db: self.db.create("retwtable",fields=( pk(), # Auto-incremental id. field("sid",STRING(20),index=UNIQUE), field("origin_sid",STRING(20)), field("uid",STRING(20)), field("createdAt",DATE), field("status",TEXT), field("mention",STRING(50)), field("rwfrom",STRING(50)), field("link",STRING(50)) )) def originalTable(self): if not "originaltable" in self.db: self.db.create("originaltable",fields=( pk(), # Auto-incremental id. field("sid",STRING(20),index=UNIQUE), field("uid",STRING(20)), field("createdAt",DATE), field("status",TEXT), field("mention",STRING(50)), field("link",STRING(50)), field("totalrw",INTEGER), field("rwnum",INTEGER) )) def userInsert(self,userdict): try: self.db.usertable.append( mapid=userdict["mapid"],#用户映射id userid=userdict["userid"]#用户UID ) except IntegrityError: print "User"+str(userdict['userid'])+" has existed!" except: print "error when inserting user recorder:"+str(userdict['userid'])+"!!!!" for kk,vv in userdict.items(): print str(kk)+'='+str(vv) pass def userUpdate(self,userdict): try: q=self.db.usertable.search(fields=['id','mapid','userid'], filters=all(filter("userid",userdict[0])))# for row in q.rows(): a=row if (a): userdict.insert(0,a[1]) userup=tuple(userdict) # self.db.usertable.update(a[0],userup) self.db.usertable.update(a[0],name=userdict[2],province=userdict[3],city=userdict[4], location=userdict[5],description=userdict[6],gender=userdict[7], followersCount=userdict[8],friendsCount=userdict[9], statusesCount=userdict[10],createdAt=userdict[11], verified=userdict[12],verifiedType=userdict[13],biFollowersCount=userdict[14]) print "User "+str(userdict[1])+" has updated!" else: userdict.insert(0,-1) userup=tuple(userdict) self.db.usertable.append(userup) # self.db.usertable.append( # # mapid=userdict["mapid"],#用户映射id # userid=userdict["userid"],#用户UID # name=userdict["name"],#友好显示名称 # province=userdict["province"],#用户所在省级ID # city=userdict["city"],# int 用户所在城市ID # location=userdict["location"],# string 用户所在地 # description=userdict["description"],# string 用户个人描述 # gender=userdict["gender"],# string 性别,m:男、f:女、n:未知 # followersCount=userdict["followersCount"],# int 粉丝数 # friendsCount=userdict["friendsCount"],# int 关注数 # statusesCount=userdict["statusesCount"],# int 微博数 # createdAt=userdict["createdAt"],# string 用户创建(注册)时间 # verified=userdict["verified"],# STRING(10) 是否是微博认证用户,即加V用户,true:是,false:否 # verifiedType=userdict["verifiedType"],# int 暂未支持 # biFollowersCount=userdict["biFollowersCount"]# int 用户的互粉数 # ) print "user "+"User "+str(userdict[1])+" has appended!" except IntegrityError: print "User"+str(userdict['userid'])+" has existed!" pass except: print "error when updating user recorder:"+str(userdict['userid'])+"!!!!" for kk,vv in userdict.items(): print str(kk)+'='+str(vv) exit(0) def relBatchInsert(self,rellist): conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8') cursor = conn.cursor() try: DB_NAME = 'tangdb' conn.select_db(DB_NAME) cursor.executemany('INSERT INTO relationtable values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',rellist) # for rel in rellist: # self.db.relationtable.append((rel)) conn.commit() except: print "batch insert error!!" exit(0) cursor.close() conn.close() def wordInsert(self,wlist): conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8') cursor = conn.cursor() try: DB_NAME = 'tangdb' conn.select_db(DB_NAME) cursor.executemany('INSERT INTO wordtable values(%s,%s,%s,%s)',wlist) conn.commit() except: print "batch insert error!!" exit(0) cursor.close() conn.close() # def retweetInsert(self,rtlist): # conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8') # cursor = conn.cursor() # except: # print "batch insert error!!" # pass #============================================================================== # DB_NAME = 'tangdb' # conn.select_db(DB_NAME) # cursor.executemany('INSERT INTO retwtable values(%s,%s,%s,%s,%s,%s,%s,%s,%s)',rtlist) # conn.commit() #============================================================================== # # def originalInsert(self,orlist): # conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8') # cursor = conn.cursor() # def relationInsert(self,reldict): try: q=self.db.relationtable.search(fields=['id'],filters=all(filter("user_v1",reldict[0]),filter("user_v2",reldict[1]))) if (q.rows()): for row in q.rows(): a=row if (a): self.db.relationtable.update(a[0],{reldict[-1]:0}) print "Relation between User "+str(reldict[0])+" and User "+str(reldict[1])+" has updated!" else: self.db.relationtable.append({'user_v1':reldict[0],'user_v2':reldict[1],reldict[-1]:0}) print "Relation between User "+str(reldict[0])+" and User "+str(reldict[1])+" has inserted!" else: self.db.relationtable.append({'user_v1':reldict[0],'user_v2':reldict[1],reldict[-1]:0}) print "Relation between User "+str(reldict[0])+" and User "+str(reldict[1])+" has inserted!" except: print "error when inserting relation recorder:"+str(reldict[0])+' and '+str(reldict[1])+"!!!!" pass exit(0)
# coding: utf-8 from pattern.web import Twitter from pattern.db import Database, SQLITE from pattern.db import pd from pattern.db import field, pk, INTEGER, UNIQUE, STRING from sqlite3 import IntegrityError team = ['#galo', '#Galo', '#Atletico-MG', '#atletico mineiro'] twitter = Twitter() db = Database(pd('tweets.db')) if not "tweets" in db: db.create("tweets", fields = (pk(), field('code', INTEGER, UNIQUE), field('text', STRING(140)))) #query in Twitter for hashtag in team: for tweet in twitter.search(hashtag): try: db.tweets.append(code = tweet.id, text = tweet.text) except IntegrityError: pass #Separate tweets in database for data in db.tweets.filter(): print data[2] print '-'*30
import os, sys; sys.path.insert(0, os.path.join(os.path.dirname("../../"), "..", "..")) from pattern.web import Wikia from pattern.db import Database, SQLITE, MYSQL ########### math_qa config ################## math_config = qa_config.QASystemConfig() math_config.name = "Matrix Algebra" ############ database setup ######### db = Database( "db-1", host = 'localhost', port = 3306, username = '******', password = '******', type = MYSQL ) db.connect() ############ crawling ############ w = Wikia("mathematics", language = "en")