Esempio n. 1
0
def main():

    from optparse import OptionParser
   
    # option
    usage="usage: %prog [options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-r","--result",dest ='insertResult',action ='store_true', help="insert json result", default = False)
    parser.add_option("-t","--tokenize",dest ='insertToken',action ='store_true', help="insert tokenized rc pairs", default = False)

    (options,args) = parser.parse_args()
    insertResult = options.insertResult
    insertToken = options.insertToken
    

    CONN_STRING = mydb.get_CONN()
    con = mydb.getCon(CONN_STRING)
    dir_path = os.path.join(settings.PROJECT_DIR,'result/raw/')

    if insertResult:

        # create db
        querys = []
        querys.append('drop table if exists rc;')
        querys.append('create table rc(id int,pairs text, tuples text, coref text);')
        mydb.executeManyQuery(con,querys,False)

        # insert pairs
        query =  'insert into rc(id,pairs) values(%s, %s)'
        insert(con,os.path,join(dir_path,'result.sentence.json.txt'),query,'sen_pairs',False)
        # insert tuples
        query = 'update rc set tuples = %s where id = %s'
        insert(con,os.path.join(dir_path,'result.tuple.json.txt'),query,'pairs',True)
        # insert coref
        query = 'update rc set coref = %s where id = %s'
        insert(con,os.path.join(dir_path,'result.coref.json.txt'),query,'coref',True)
    if insertToken:
        querys = []
        querys.append('drop table if exists tokenizedrc;')
        querys.append('create table tokenizedrc(id char(50),tknPair text);')
        mydb.executeManyQuery(con,querys,False)
        
        data = loader.Loader.load()
        pairs = loader.Loader.sent2pair(data)
        query = 'insert into tokenizedrc(id,tknPair) values(%s,%s);'
        insertTokenFunc(con,pairs,query)
Esempio n. 2
0
def clean_review_clauses():
    CONN_STRING = mydb.get_CONN()
    con = mydb.getCon(CONN_STRING)
    query = 'select id,review_text,review_clauses from review where review_clauses is not null'
    records = mydb.executeQueryResult(con,query,False)
    idxs = []
    for record in records:
        idx = record[0]
        review_text = record[1]
        review_clauses = record[2]
        review_clauses = review_clauses.replace('###','')
        s1,s2,jd=jaccard_distance(review_text,review_clauses)
        if jd<0.6: # we think this would be a bad one
            print jd,idx
            idxs.append(idx)
    
    query = 'update review set review_clauses = NULL where id = __idx__'
    for idx in idxs:
        query_n = query.replace('__idx__',str(idx))
        mydb.executeQuery(con,query_n,False)
Esempio n. 3
0
    return (False, token)
        

def getClauseWord(loc_clause_cur, clauseLst):
    word_clause = clauseLst[loc_clause_cur]
    if word_clause.endswith('###'):
        word_clause = clauseLst[loc_clause_cur][0 : clauseLst[loc_clause_cur].index('###')]
    elif word_clause.startswith('###'):
        word_clause = clauseLst[loc_clause_cur][clauseLst[loc_clause_cur].index('###')+3 : ]
    else:
        word_clause = clauseLst[loc_clause_cur]
    return word_clause


if __name__ == '__main__':
    from optparse import OptionParser
   	
    # option
    usage="usage: %prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("-f","--file",dest ='file', help="input file")
    (options,args) = parser.parse_args()

    file = options.file
    
    CONN_STRING = mydb.get_CONN()
    merge(file)