Esempio n. 1
0
def get_patterns( boards ):

    os.environ["G2_PATH"] = "/home/lipuyeh/projects/foolpin/foolpindata/keyword_analyzer"
    database = "foolpin"
    config = { 'DBPATH' : "./sqldbs", "encrypt" : False, 'type' : "sqlite", 'database' : database }
    db = osedb.osedb_cl( config )

    patterns = db.load( database, "all_patterns", {'_id' : 999999 } )
    #patterns = []
    if patterns == []:

        patterns = {}

        for a_board in boards:
            print a_board
            temp = db.load( database, "all_patterns", {'name': a_board+"_taipei_filtered" } )
            if temp == []:
                pass
            else:
                temp = temp[0]

            patterns[ a_board ] = temp

        db.save( database, "all_patterns", [ { '_id' : 999999, 'patterns' : patterns } ] )
    else:
        patterns = patterns[0]['patterns']

    #print patterns[ "Road" ]
    #print patterns[ 'joke' ]

    return patterns
Esempio n. 2
0
# -*- coding: utf-8 -*-
import set_tag_to_para as set_tag
import os
import g2tools.db_tools.osedb as osedb


if __name__ == "__main__":

    import sys
    #print sys.argv
    name = sys.argv[1]

    os.environ["G2_PATH"] = "/home/lipuyeh/projects/foolpin/foolpindata/keyword_analyzer"
    database = "foolpin"
    config = { 'DBPATH' : "./sqldbs", "encrypt" : False, 'type' : "sqlite", 'database' : database }
    db = osedb.osedb_cl( config )

    discussions = db.load( database, "discussions", {'name' : name} )
    discussions = discussions[ len(discussions) - 1  ]['discussions']
    print len( discussions )


    # boards = [ "Gossiping", "joke", "MenTalk"]
    # final char, 1. garbage 2. funny 3. publicissue 4. foreign 5. KMT 6. DPP

    boards = [ "Gossiping", "joke", "PublicIssue", "HatePolitics", "FuMouDiscuss", "CrossStrait", "C_Chat", "StupidClown", "MRT", "home-sale", "Stock", "gay", "Neihu", "Nangang", "politics", "KMT", "Taipei", "LoL", "Zastrology", "poem", "ADS", "PresidentLi", "Elephants", "Monkeys", "Salary", "PublicServan", "ComeHere", "WomenTalk", "MenTalk", "ask", "movie", "Militarylife", "DPP" ]
    boards = [ "Gossiping", "joke", "PublicIssue", "HatePolitics", "FuMouDiscuss", "CrossStrait", "StupidClown", "MRT", "home-sale", "Stock", "gay",  "politics", "KMT", "DPP" ]
    character = [ "garbage", "funny", "public", "foreign", "KMT", "DPP" ]

    patterns = set_tag.get_patterns( boards )
    sum_boards = set_tag.compute_sum_boards( boards, patterns )