Exemple #1
0
 def __init__(self, database):
     self.db=Database(database,password = '******',type=MYSQL)
     self.userTable()
     self.relationTable()
     self.retweetTable()
     self.originalTable()
     self.wordTable()
def get_database(db_name, clean=False):
    if clean:
        if os.path.exists(db_name):
            os.remove(db_name)
        if os.path.exists(db_name + '-journal'):
            os.remove(db_name + '-journal')
    db = Database(db_name)
    if 'room' not in db.tables:
        db.create('room', fields=[
            pk(),
            field('type', STRING(10)),
            field('email', STRING(100)),
            field('contact_name', STRING(100)),
            field('link', STRING(200)),
            field('rent', STRING(50)),
            field('available', STRING(100)),
        ])
    return db
Exemple #3
0
class WebsiteDatabase(object):
    class DbException(Exception):
        pass

    def __init__(self):
        self.db = Database('website_history')
        self.createTables()

    def createTables(self):
        errMsg = ',ommitting...'

        try:
            self.db.create('websites',
                           fields=(pk('id'), field('address', index=UNIQUE),
                                   field('domain_id'),
                                   field('connected', BOOLEAN, default=True),
                                   field('lastVisited', DATE, default=NOW)))
        except TableError as err:
            print err, errMsg

        try:
            self.db.create('links',
                           fields=(pk('id'),
                                   field('website', INTEGER, optional=False),
                                   field('reference', INTEGER,
                                         optional=False)))
        except TableError as err:
            print err, errMsg

        try:
            self.db.create('domains',
                           fields=(pk('id'),
                                   field('name', STRING(80), index=UNIQUE)))
        except TableError as err:
            print err, errMsg

        try:
            self.db.create('session',
                           fields=(pk('id'),
                                   field('website_id',
                                         INTEGER,
                                         optional=False,
                                         index=UNIQUE),
                                   field('depth', INTEGER, optional=False)))
        except TableError as err:
            print err, errMsg

        self.db.link(self.db.domains, 'domains.id', self.db.websites,
                     'websites.domain_id')
        self.db.link(self.db.session, 'session.website_id', self.db.websites,
                     'websites.id')

    def getDomainId(self, name):
        results = self.db.domains.search(filters=all(eq('name', name))).rows()
        if len(results) > 0:
            return results[0][0]
        return None

    def insertDomain(self, domainName):
        try:
            self.db.domains.append(name=domainName)
        except sqlite3.IntegrityError:
            pass

    def insertRelation(self, parent, child):
        if parent is None or child is None:
            return

        parentId = self.db.websites.search(
            filters=all(eq('address', parent.url.string))).rows()[0][0]
        childId = self.db.websites.search(
            filters=all(eq('address', child.url.string))).rows()[0][0]
        currentRecords = self.db.links.search(filters=all(
            eq('website', parentId), eq('reference', childId))).rows()
        if len(currentRecords) == 0:
            self.db.links.append(website=parentId, reference=childId)

    def insertWebpage(self, page, connection=False):
        idDomain = None
        dateVisited = None

        if page.url.domain:
            self.insertDomain(page.url.domain)
            idDomain = self.getDomainId(page.url.domain)

        if connection:
            dateVisited = date(NOW)
        try:
            self.db.websites.append(address=page.url.string,
                                    domain_id=idDomain,
                                    connected=connection,
                                    lastVisited=dateVisited)
            self.insertRelation(page.parent, page)
        except sqlite3.IntegrityError:
            if connection:
                self.db.websites.update(all(eq('address', page.url.string)),
                                        connected=True,
                                        lastVisited=dateVisited)


#        for link in page.getLinks():
#            if link.url.anchor and not link.isWebPage():
#                continue
#            self.insertWebpage(link)
#            self.insertRelation(page, link)

    def readWebPage(self, urlString, depth=1, isExternal=False):
        webPageData = self.db.websites.search(
            filters=all(eq('address',
                           WebPage.parseUrl(urlString).string))).rows()
        pageLinks = []
        result = None

        if len(webPageData) == 0:
            return result

        webPageData = webPageData[0]
        pageId = webPageData[0]

        depthData = self.db.session.search('depth',
                                           all(eq('website_id', pageId)))
        if len(depthData) > 0:
            depth = depthData[0][0]

        result = WebPage(url=webPageData[1],
                         depth=depth,
                         isExternal=isExternal)

        query = self.db.execute(
            'SELECT w.{0}, r.{0} from links join websites as w on links.{1} = w.id join websites as r on links.{2} = r.id WHERE w.id = {3};'
            .format(self.db.websites.fields[1], self.db.links.fields[1],
                    self.db.links.fields[2], pageId))

        for row in iter(query):
            pageLinks.append(
                WebPage(url=row[1], parent=result, depth=depth + 1))
        result.links = pageLinks

        return result

    def wasPageVisited(self, page):
        webPageData = self.db.websites.search('lastVisited',
                                              filters=all(
                                                  eq('address',
                                                     page.url.string))).rows()
        if len(webPageData) == 0:
            return False
        dateVisited = webPageData[0][0]

        if dateVisited is None:
            return False
        return True

    def isInThisSession(self, page):
        webPageData = self.db.session.search('websites.address',
                                             filters=all(
                                                 eq('websites.address',
                                                    page.url.string))).rows()
        if len(webPageData) > 0:
            return True
        return False

    def appendSession(self, page):
        pageId = self.db.websites.search('id',
                                         filters=all(
                                             eq('address',
                                                page.url.string))).rows()
        if len(pageId) == 0:
            raise DbException(
                "Trying to append website to session without having it in website table"
            )

        pageId = pageId[0][0]
        try:
            self.db.session.append(website_id=pageId, depth=page.depth)
        except sqlite3.IntegrityError as err:
            print "Invalid session data, cleaning session..."
            self.clearSession()

    def clearSession(self):
        sessionData = self.db.session.rows()
        for row in iter(sessionData):
            self.db.session.remove(row[0])
Exemple #4
0
 def __init__(self):
     self.db = Database('website_history')
     self.createTables()
Exemple #5
0
from pattern.db import Database, SQLITE, MYSQL
from pattern.db import field, pk, STRING, INTEGER, DATE, NOW
from pattern.db import rel

# In this example, we'll build a mini-store:
# with products, customers and orders.
# We can combine the data from the three tables in an invoice query.

# Create a new database.
# Once it is created, you can use Database(name) to access it.
# SQLite will create the database file in the current folder.
# MySQL databases require a username and a password.
# MySQL also requires that you install MySQLdb, see the installation instructions at:
# http://www.clips.ua.ac.be/pages/pattern-db
db = Database("store.db", type=SQLITE)
#db._delete()

# PRODUCTS
# Create the products table if it doesn't exist yet.
# An error will be raised if the table already exists.
# Add sample data.
if not "products" in db:
    # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length).
    # In MySQL, the length matters. Smaller fields have faster lookup.
    db.create(
        "products",
        fields=(
            pk(),  # Auto-incremental id.
            field("description", STRING(50)),
            field("price", INTEGER)))
Exemple #6
0
from pattern.db import field, pk, STRING, INTEGER, DATE, NOW
from pattern.db import assoc
from pattern.db import rel
from pattern.db import pd # pd() = parent directory of current script.

# In this example, we'll build a mini-store:
# with products, customers and orders.
# We can combine the data from the three tables in an invoice query.

# Create a new database. 
# Once it is created, you can use Database(name) to access it.
# SQLite will create the database file in the current folder.
# MySQL databases require a username and a password.
# MySQL also requires that you install MySQLdb, see the installation instructions at:
# http://www.clips.ua.ac.be/pages/pattern-db
db = Database(pd("store.db"), type=SQLITE)
#db._delete()

# PRODUCTS
# Create the products table if it doesn't exist yet.
# An error will be raised if the table already exists.
# Add sample data.
if not "products" in db:
    # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length).
    # In MySQL, the length matters. Smaller fields have faster lookup.
    schema = (
        pk(), # Auto-incremental id.
        field("description", STRING(50)),
        field("price", INTEGER)    
    )
    db.create("products", schema)
Exemple #7
0
from pattern.db import Database, SQLITE, MYSQL
from pattern.db import field, pk, STRING, INTEGER, DATE, NOW
from pattern.db import assoc
from pattern.db import rel

# In this example, we'll build a mini-store:
# with products, customers and orders.
# We can combine the data from the three tables in an invoice query.

# Create a new database.
# Once it is created, you can use Database(name) to access it.
# SQLite will create the database file in the current folder.
# MySQL databases require a username and a password.
# MySQL also requires that you install MySQLdb, see the installation instructions at:
# http://www.clips.ua.ac.be/pages/pattern-db
db = Database("store.db", type=SQLITE)
#db._delete()

# PRODUCTS
# Create the products table if it doesn't exist yet.
# An error will be raised if the table already exists.
# Add sample data.
if not "products" in db:
    # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length).
    # In MySQL, the length matters. Smaller fields have faster lookup.
    schema = (
        pk(),  # Auto-incremental id.
        field("description", STRING(50)),
        field("price", INTEGER))
    db.create("products", schema)
    db.products.append(description="pizza", price=15)
Exemple #8
0
from pattern.db import Database, SQLITE, MYSQL
from pattern.db import field, pk, STRING, INTEGER, DATE, NOW
from pattern.db import rel

# In this example, we'll build a mini-store:
# with products, customers and orders.
# We can combine the data from the three tables in an invoice query.

# Create a new database. 
# Once it is created, you can use Database(name) to access it.
# SQLite will create the database file in the current folder.
# MySQL databases require a username and a password.
# MySQL also requires that you install MySQLdb, see the installation instructions at:
# http://www.clips.ua.ac.be/pages/pattern-db
db = Database("store.db", type=SQLITE)
#db._delete()

# PRODUCTS
# Create the products table if it doesn't exist yet.
# An error will be raised if the table already exists.
# Add sample data.
if not "products" in db:
    # Note: in SQLite, the STRING type is mapped to TEXT (unlimited length).
    # In MySQL, the length matters. Smaller fields have faster lookup.
    db.create("products", fields=(
        pk(), # Auto-incremental id.
        field("description", STRING(50)),
        field("price", INTEGER)
    ))
    db.products.append(description="pizza", price=15)
Exemple #9
0
from pattern.db import Database, field, pk, STRING, BOOLEAN, DATE, NOW, date,\
    MYSQL

db = Database(name='hootguy_tweetgraph',
              host='50.87.144.127',
              port=3306,
              username='******',
              password='******'
              #, type = MYSQL
              )

print 'tables = ', db.tables
print 'relations = ', db.relations
print 'query = ', db.query

#db.create('chows', fields=[
#    pk()
#    , field('manufacturer', STRING(80))
#    , field('brand', STRING(80), index=True)
#    , field('price', STRING(15))
#])

# db.create('pets', fields=[
#           pk(),
#           field('name', STRING(80), index=True),
#           field('type', STRING(20)),
#           field('tail', BOOLEAN),
#           field('date_birth', DATE, default=None),
#           field('date_created', DATE, default=NOW)
# ])
Exemple #10
0
class weibodb(Database):
    def __init__(self, database):
        self.db=Database(database,password = '******',type=MYSQL)
        self.userTable()
        self.relationTable()
        self.retweetTable()
        self.originalTable()
        self.wordTable()
    def userTable(self):
        if not "usertable" in self.db:
            self.db.create("usertable",fields=(
             pk(), # Auto-incremental id.
             field("mapid",INTEGER),#用户映射id
             field("userid",STRING(20),index=UNIQUE),#用户UID
             field("name",STRING(50)),#友好显示名称
             field("province",STRING(20)),#用户所在省级ID
             field("city",STRING(20)),#	int	用户所在城市ID
             field("location",STRING(50)),#	string	用户所在地
             field("description",TEXT),#	string	用户个人描述
             field("gender",STRING(10)),#	string	性别,m:男、f:女、n:未知
             field("followersCount",INTEGER),#	int	粉丝数
             field("friendsCount",INTEGER),#	int	关注数
             field("statusesCount",INTEGER),#	int	微博数
             field("createdAt",DATE),#	string	用户创建(注册)时间
             field("verified",STRING(10)),#	STRING(10)	是否是微博认证用户,即加V用户,true:是,false:否
             field("verifiedType",STRING(20)),#	int	暂未支持
             field("biFollowersCount",INTEGER)#	int	用户的互粉数
            ))
            
    def wordTable(self):
        if not "wordtable" in self.db:
            self.db.create("wordtable",fields=(
             pk(), # Auto-incremental id.
             field("mapid",INTEGER,index=UNIQUE),
             field("freq",INTEGER),
             field("word",TEXT)
             ))
    def relationTable(self):
        if not "relationtable" in self.db:
            self.db.create("relationtable",fields=(
             pk(), # Auto-incremental id.
             field("user_v1",INTEGER),#用户1UID
             field("user_v2",INTEGER),#用户2UID
             field("re_0",INTEGER,default=-1),#初始关系
             field("re_1",INTEGER,default=-1),#关系变化
             field("re_2",INTEGER,default=-1),#关系变化
             field("re_3",INTEGER,default=-1),#关系变化
             field("re_4",INTEGER,default=-1),#关系变化
             field("re_5",INTEGER,default=-1),#关系变化
             field("re_6",INTEGER,default=-1),#关系变化
             field("re_7",INTEGER,default=-1),#关系变化
             field("re_8",INTEGER,default=-1),#关系变化
             field("re_9",INTEGER,default=-1),#关系变化
             field("re_10",INTEGER,default=-1),#关系变化
             field("re_11",INTEGER,default=-1),#关系变化
             field("re_12",INTEGER,default=-1),#关系变化
             field("re_13",INTEGER,default=-1),#关系变化
             field("re_14",INTEGER,default=-1),#关系变化
             field("re_15",INTEGER,default=-1),#关系变化
             field("re_16",INTEGER,default=-1),#关系变化
             field("re_17",INTEGER,default=-1),#关系变化
             field("re_18",INTEGER,default=-1),#关系变化
             field("re_19",INTEGER,default=-1),#关系变化
             field("re_20",INTEGER,default=-1),#关系变化
             field("re_21",INTEGER,default=-1),#关系变化
             field("re_22",INTEGER,default=-1),#关系变化
             field("re_23",INTEGER,default=-1),#关系变化
             field("re_24",INTEGER,default=-1),#关系变化
             field("re_25",INTEGER,default=-1),#关系变化
             field("re_26",INTEGER,default=-1),#关系变化
             field("re_27",INTEGER,default=-1),#关系变化
             field("re_28",INTEGER,default=-1),#关系变化
             field("re_29",INTEGER,default=-1),#关系变化
             field("re_30",INTEGER,default=-1),#关系变化
             field("re_31",INTEGER,default=-1),#关系变化
             field("re_32",INTEGER,default=-1)#关系变化
            ))
    def retweetTable(self):
        if not "retwtable" in self.db:
            self.db.create("retwtable",fields=(
                pk(), # Auto-incremental id.
                field("sid",STRING(20),index=UNIQUE),
                field("origin_sid",STRING(20)),                
                field("uid",STRING(20)),
                field("createdAt",DATE),
                field("status",TEXT),
                field("mention",STRING(50)),
                field("rwfrom",STRING(50)),
                field("link",STRING(50))        
                ))
    def originalTable(self):
        if not "originaltable" in self.db:
            self.db.create("originaltable",fields=(
                pk(), # Auto-incremental id.
                field("sid",STRING(20),index=UNIQUE),
                field("uid",STRING(20)),
                field("createdAt",DATE),
                field("status",TEXT),
                field("mention",STRING(50)),
                field("link",STRING(50)),
                field("totalrw",INTEGER),
                field("rwnum",INTEGER)
                ))        
            
    def userInsert(self,userdict):
        try:
            self.db.usertable.append(
             mapid=userdict["mapid"],#用户映射id
             userid=userdict["userid"]#用户UID
             )
        except IntegrityError:
            print "User"+str(userdict['userid'])+" has existed!"
        except:
            print "error when inserting user recorder:"+str(userdict['userid'])+"!!!!"
            for kk,vv in userdict.items():
                print str(kk)+'='+str(vv)
            pass   
    def userUpdate(self,userdict):
        try:
            q=self.db.usertable.search(fields=['id','mapid','userid'],
                                       filters=all(filter("userid",userdict[0])))#
            for row in q.rows():
                a=row
            if (a):
                userdict.insert(0,a[1])
                userup=tuple(userdict)
#                self.db.usertable.update(a[0],userup)
                self.db.usertable.update(a[0],name=userdict[2],province=userdict[3],city=userdict[4],
                                         location=userdict[5],description=userdict[6],gender=userdict[7],
                                            followersCount=userdict[8],friendsCount=userdict[9],
                                            statusesCount=userdict[10],createdAt=userdict[11],
                                            verified=userdict[12],verifiedType=userdict[13],biFollowersCount=userdict[14])
                print "User "+str(userdict[1])+" has updated!"            
            else:
                userdict.insert(0,-1)
                userup=tuple(userdict)
                self.db.usertable.append(userup)
#                self.db.usertable.append(
#    #             mapid=userdict["mapid"],#用户映射id
#                 userid=userdict["userid"],#用户UID
#                 name=userdict["name"],#友好显示名称
#                 province=userdict["province"],#用户所在省级ID
#                 city=userdict["city"],#	int	用户所在城市ID
#                 location=userdict["location"],#	string	用户所在地
#                 description=userdict["description"],#	string	用户个人描述
#                 gender=userdict["gender"],#	string	性别,m:男、f:女、n:未知
#                 followersCount=userdict["followersCount"],#	int	粉丝数
#                 friendsCount=userdict["friendsCount"],#	int	关注数
#                 statusesCount=userdict["statusesCount"],#	int	微博数
#                 createdAt=userdict["createdAt"],#	string	用户创建(注册)时间
#                 verified=userdict["verified"],#	STRING(10)	是否是微博认证用户,即加V用户,true:是,false:否
#                 verifiedType=userdict["verifiedType"],#	int	暂未支持
#                 biFollowersCount=userdict["biFollowersCount"]#	int	用户的互粉数
#                 )
                print "user "+"User "+str(userdict[1])+" has appended!"
        except IntegrityError:
            print "User"+str(userdict['userid'])+" has existed!"         
            pass
        except:
            print "error when updating user recorder:"+str(userdict['userid'])+"!!!!"
            for kk,vv in userdict.items():
                print str(kk)+'='+str(vv)
            exit(0)
    def relBatchInsert(self,rellist):
        conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8')
        cursor = conn.cursor()
        try:
            DB_NAME = 'tangdb'
            conn.select_db(DB_NAME)
            cursor.executemany('INSERT INTO relationtable values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',rellist)
#            for rel in rellist:           
#                self.db.relationtable.append((rel))
            conn.commit()
        except:
            print "batch insert error!!"
            exit(0)
        cursor.close()
        conn.close()
        
    def wordInsert(self,wlist):
        conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8')
        cursor = conn.cursor()
        try:
            DB_NAME = 'tangdb'
            conn.select_db(DB_NAME)
            cursor.executemany('INSERT INTO wordtable values(%s,%s,%s,%s)',wlist)
            conn.commit()
        except:
            print "batch insert error!!"
            exit(0)
        cursor.close()
        conn.close()
#    def retweetInsert(self,rtlist):
#        conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8')
#        cursor = conn.cursor()        
        
#        except:
#            print "batch insert error!!"
#            pass
#==============================================================================
#         DB_NAME = 'tangdb'
#         conn.select_db(DB_NAME)
#         cursor.executemany('INSERT INTO retwtable values(%s,%s,%s,%s,%s,%s,%s,%s,%s)',rtlist)
#         conn.commit()        
#==============================================================================
#
#    def originalInsert(self,orlist):
#        conn = MySQLdb.connect(host='localhost',user='******',passwd='20090924',charset='utf8')
#        cursor = conn.cursor()
#        
       
    def relationInsert(self,reldict):
        try:
            q=self.db.relationtable.search(fields=['id'],filters=all(filter("user_v1",reldict[0]),filter("user_v2",reldict[1])))
            if (q.rows()):
                for row in q.rows():
                    a=row
                if (a):
                    self.db.relationtable.update(a[0],{reldict[-1]:0})
                    print "Relation between User "+str(reldict[0])+" and User "+str(reldict[1])+" has updated!"
                else:
                    self.db.relationtable.append({'user_v1':reldict[0],'user_v2':reldict[1],reldict[-1]:0})
                    print "Relation between User "+str(reldict[0])+" and User "+str(reldict[1])+" has inserted!"
            else:
                self.db.relationtable.append({'user_v1':reldict[0],'user_v2':reldict[1],reldict[-1]:0})
                print "Relation between User "+str(reldict[0])+" and User "+str(reldict[1])+" has inserted!"
        except:
            print "error when inserting relation recorder:"+str(reldict[0])+' and '+str(reldict[1])+"!!!!"
            pass
            exit(0)
# coding: utf-8

from pattern.web import Twitter
from pattern.db import Database, SQLITE
from pattern.db import pd
from pattern.db import field, pk, INTEGER, UNIQUE, STRING
from sqlite3 import IntegrityError

team = ['#galo', '#Galo', '#Atletico-MG', '#atletico mineiro']

twitter = Twitter()
db = Database(pd('tweets.db'))

if not "tweets" in db:	
	db.create("tweets", fields = (pk(), field('code', INTEGER, UNIQUE), field('text', STRING(140))))

#query in Twitter
for hashtag in team:
	for tweet in twitter.search(hashtag):
		try:
			db.tweets.append(code = tweet.id, text = tweet.text)
		except IntegrityError:
			pass

#Separate tweets in database
for data in db.tweets.filter():
	print data[2]
	print '-'*30

Exemple #12
0
import os, sys; sys.path.insert(0, os.path.join(os.path.dirname("../../"), "..", ".."))
from pattern.web import Wikia
from pattern.db import Database, SQLITE, MYSQL

########### math_qa config ##################

math_config = qa_config.QASystemConfig()
math_config.name = "Matrix Algebra"

############ database setup #########

db = Database(
			  "db-1",
			  host = 'localhost',
			  port = 3306,
			  username = '******',
			  password = '******',
			  type = MYSQL
			  )

db.connect()


############ crawling ############

w = Wikia("mathematics", language = "en")