Esempio n. 1
0
def insert2(user_id,body,device_type,device_no,local_id,created_date,last_update):
    rows = list(dbw.select(table_name,what="pk_id,body", where="user_id=$user_id",vars=locals(),order="pk_id desc", limit=80))
    for r in rows:
        if cmp(r.body,body)==0:
            return r.pk_id
    return dbw.insert(table_name,user_id=user_id,subject="",body=body,
            device_no = device_no, local_id=local_id, device_type=device_type,
            app_created_date=created_date,
            last_update=last_update,
            created_date = web.SQLLiteral('now()'),
            plan_start_date=web.SQLLiteral('now()'))
Esempio n. 2
0
def compute_tf_idf():
    #数据量非常小的情况下可以这样用,数据量大就需要编写hadoop脚本
    dbw.query("""update terms as t, (SELECT term_id, count(*) as count FROM term_doc group by term_id) as tmp
        set t.count_domain = tmp.count
        where t.term_id=tmp.term_id;""")
    #update term's idf
    r = dbw.select('subjects',what="count(*) as count")  #select count(*) as count from subjects;
    doc_total_count = r[0].count    
    dbw.query("update terms set idf_domain=LOG(%s/(count_domain+1))" % doc_total_count )
    #update term's tf
    dbw.query("""update term_doc as t,
    (SELECT doc_id, sum(term_count) as doc_term_count FROM term_doc group by doc_id) as tmp
    set t.tf = t.term_count/tmp.doc_term_count
    where t.doc_id = tmp.doc_id""")
    #update term's tf-idf
    dbw.query("""update term_doc as td,
    terms as t
    set td.tf_idf = td.tf*t.idf_domain
    where td.term_id = t.term_id """)
Esempio n. 3
0
def compute_tf_idf():
    #数据量非常小的情况下可以这样用,数据量大就需要编写hadoop脚本
    dbw.query(
        """update terms as t, (SELECT term_id, count(*) as count FROM term_doc group by term_id) as tmp
        set t.count_domain = tmp.count
        where t.term_id=tmp.term_id;""")
    #update term's idf
    r = dbw.select(
        'subjects',
        what="count(*) as count")  #select count(*) as count from subjects;
    doc_total_count = r[0].count
    dbw.query("update terms set idf_domain=LOG(%s/(count_domain+1))" %
              doc_total_count)
    #update term's tf
    dbw.query("""update term_doc as t,
    (SELECT doc_id, sum(term_count) as doc_term_count FROM term_doc group by doc_id) as tmp
    set t.tf = t.term_count/tmp.doc_term_count
    where t.doc_id = tmp.doc_id""")
    #update term's tf-idf
    dbw.query("""update term_doc as td,
    terms as t
    set td.tf_idf = td.tf*t.idf_domain
    where td.term_id = t.term_id """)
Esempio n. 4
0
def load_last_one(user_id):
    rows = list(dbw.select(table_name,what="pk_id,subject,body,task_status",
        where='user_id=$user_id',vars=locals()))
    if rows:
        return rows[0]
    return False