예제 #1
0
def load_doc_labels(dbfile):
    labels = []
    dbcon = dbutils.connect_db(dbfile)
    cur = dbcon.execute("select t.rowid from document d join topic t on d.cats=t.name")
    labels = [r[0] for r in cur]
    dbcon.close()
    return labels
예제 #2
0
def transfer_data_file(dbfile, outputfile):
    con = dbutils.connect_db(dbfile)
    words = dict()
    for d in dbutils.iterRec(con, 'document','kw_content'):
        for ww in d[0].split(' '):
            word,weight = ww.split('/')
            words[word] = (words[word]+1) if word in words else 1
    # for w in words.keys():
    #     if words[w] <= 1:
    #         del words[w]
    # print 'count of words: %d' % len(words)
                
    out = open(outputfile, 'w')
    rang = range(0,len(words))
    out.write('\t'.join(words.iterkeys()).encode('utf8'))
    out.write('\r\n')
    out.write('\t'.join(['c' for i in rang]))
    out.write('\r\n\r\n')

    for d in dbutils.iterRec(con, 'document', 'kw_content'):
        for w in words: words[w] = 0.0
        for ww in d[0].split(' '):
            word,weight = ww.split('/')
            if word in words:
                words[word]=float(weight)
        out.write('\t'.join([str(v) for v in words.itervalues()]))
        out.write('\r\n')

    out.close()
    con.close()
예제 #3
0
def transfer_data_file(dbfile, outputfile):
    con = dbutils.connect_db(dbfile)
    words = dict()
    for d in dbutils.iterRec(con, 'document', 'kw_content'):
        for ww in d[0].split(' '):
            word, weight = ww.split('/')
            words[word] = (words[word] + 1) if word in words else 1
    # for w in words.keys():
    #     if words[w] <= 1:
    #         del words[w]
    # print 'count of words: %d' % len(words)

    out = open(outputfile, 'w')
    rang = range(0, len(words))
    out.write('\t'.join(words.iterkeys()).encode('utf8'))
    out.write('\r\n')
    out.write('\t'.join(['c' for i in rang]))
    out.write('\r\n\r\n')

    for d in dbutils.iterRec(con, 'document', 'kw_content'):
        for w in words:
            words[w] = 0.0
        for ww in d[0].split(' '):
            word, weight = ww.split('/')
            if word in words:
                words[word] = float(weight)
        out.write('\t'.join([str(v) for v in words.itervalues()]))
        out.write('\r\n')

    out.close()
    con.close()
예제 #4
0
 def load_doc_labels(self, docs):
     labels = []
     dbcon = dbutils.connect_db(self.dbfile)
     for d in docs:
         cur = dbcon.execute('select t.rowid from document d join topic t on d.cats=t.name and d.docid=?', (d.get_docid(),))
         labels.append(cur.fetchone()[0])
         cur.close()
     dbcon.close()
     return labels
예제 #5
0
    def iter_document(self):
        dbcon = dbutils.connect_db(self.dbfile)
        for r in dbutils.iterRec(dbcon,'document','docid kw_content'):
            word_dict = {}
            for ww in r[1].split():
                s = ww.split('/')
                word_dict[s[0]] = float(s[1])
            doc = Document(r[0], word_dict)
            yield doc

        dbcon.close()
예제 #6
0
 def load_doc_labels(self, docs):
     labels = []
     dbcon = dbutils.connect_db(self.dbfile)
     for d in docs:
         cur = dbcon.execute(
             'select t.rowid from document d join topic t on d.cats=t.name and d.docid=?',
             (d.get_docid(), ))
         labels.append(cur.fetchone()[0])
         cur.close()
     dbcon.close()
     return labels
예제 #7
0
    def iter_document(self):
        dbcon = dbutils.connect_db(self.dbfile)
        for r in dbutils.iterRec(dbcon, 'document', 'docid kw_content'):
            word_dict = {}
            for ww in r[1].split():
                s = ww.split('/')
                word_dict[s[0]] = float(s[1])
            doc = Document(r[0], word_dict)
            yield doc

        dbcon.close()
예제 #8
0
def loadTitleWordnet(dbfile='../data/cn-topic.db', min_coocur=2, min_weight=1e-3):
    dbcon = dbutils.connect_db(dbfile)
   
    #g = igraph.Graph(directed=False)
    #g.vs['name']=None
    #edge_size =dbutils.countOfRecs(dbcon,'t_wordpair','coocur_num>=? and weight>?', (min_coocur,min_weight))
    #cnt = 0;
    edgelist = []
    for r in dbutils.iterRec(dbcon,'t_wordpair',['word1', 'word2','weight'], 'coocur_num>=? and weight>?', (min_coocur,min_weight)):
        edgelist.append({'source':r[0], 'target':r[1], 'weight':r[2]})
        #cnt+=1
        #if cnt%100==0:
        #    utils.updateProgress(cnt, edge_size)
    #print ''
    dbutils.close_db(dbcon)
    return igraph.Graph.DictList(vertices=None, edges=edgelist)
예제 #9
0
def loadTitleWordnet(dbfile='../data/cn-topic.db',
                     min_coocur=2,
                     min_weight=1e-3):
    dbcon = dbutils.connect_db(dbfile)

    #g = igraph.Graph(directed=False)
    #g.vs['name']=None
    #edge_size =dbutils.countOfRecs(dbcon,'t_wordpair','coocur_num>=? and weight>?', (min_coocur,min_weight))
    #cnt = 0;
    edgelist = []
    for r in dbutils.iterRec(dbcon, 't_wordpair', ['word1', 'word2', 'weight'],
                             'coocur_num>=? and weight>?',
                             (min_coocur, min_weight)):
        edgelist.append({'source': r[0], 'target': r[1], 'weight': r[2]})
        #cnt+=1
        #if cnt%100==0:
        #    utils.updateProgress(cnt, edge_size)
    #print ''
    dbutils.close_db(dbcon)
    return igraph.Graph.DictList(vertices=None, edges=edgelist)
예제 #10
0
    def iter_title_words(self):
        dbcon = dbutils.connect_db(self.dbfile)
        for r in dbutils.iterRec(dbcon,'document','kw_title'):
            yield Document(0, {w:0 for w in r[0].split(' ')})

        dbcon.close()
예제 #11
0
    def iter_title_words(self):
        dbcon = dbutils.connect_db(self.dbfile)
        for r in dbutils.iterRec(dbcon, 'document', 'kw_title'):
            yield Document(0, {w: 0 for w in r[0].split(' ')})

        dbcon.close()
예제 #12
0
# Komunikacja z bazą danych

import datetime
#from dbutils import *
import dbutils

connection, cursor = dbutils.connect_db()

# SELECT
sql = " SELECT * FROM public.city WHERE city_id=92"
cursor.execute(sql)
row = cursor.fetchone()
print(row)

print("=" * 70)
sql = " SELECT * FROM public.city WHERE city_id<=10"
cursor.execute(sql)
rows = cursor.fetchall()
#rows = cursor.fetchone()
print(rows)

# INSERT
sql = "INSERT INTO public.city (city, last_update, country_id) VALUES ( %s, %s, 15 ) "
data = ('Nibylandia-MW', datetime.datetime.today())
cursor.execute(sql, data)
connection.commit()

# UPDATE
sql = " UPDATE public.city SET city=%s WHERE city=%s "
data = ('Nibylandia2-MW', 'Nibylandia-MW')
cursor.execute(sql, data)
예제 #13
0
from datetime import datetime

# Przykład korzystania z bazy danych z wykorzystaniem natywnego sterownika
from dbutils import connect_db

conn, cursor = connect_db()

# SELECT
sql = "SELECT film_id, title, description FROM public.film WHERE film_id<=10"
cursor.execute(sql)
rows = cursor.fetchall()
#print(rows)

sql = "SELECT film_id, title, description FROM public.film WHERE film_id=1"
cursor.execute(sql)
row = cursor.fetchone()
print(row)

# INSERT
sql = "INSERT INTO public.city " \
      "(city, last_update, country_id ) values (%s, %s, %s)"
data = ("Baku2A", datetime.today(), 10)
cursor.execute(sql, data)
conn.commit()

# UPDATE
sql = "UPDATE public.city SET city=%s WHERE city=%s "
data = ("Baku2AA", "Baku2A")
cursor.execute(sql, data)
conn.commit()