Esempio n. 1
0
def getMergeNews():
    print tablemerge.getAllCount(dbconfig.mergetable);
    rows=tablemerge.getTitleSummary(dbconfig.mergetable)
    if rows !=-1:
        with open(news_file,'w') as fout:
            count=0
            for row in rows:
                # id,title,summary,ctime,source
                count+=1
                mtid,title,summary,ctime=row[0],row[1].strip(),re.sub('\s+','',row[2]),row[3]
                title=delpunc(' '.join(jieba.cut(title)).lower()).encode('utf-8')
                summary=delpunc(' '.join(jieba.cut(summary)).lower()).encode('utf-8')
                #timeStr=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(ctime))
                msg_ts='%s %s %s'%(count,title,summary)                
                fout.write(msg_ts+'\n')   
Esempio n. 2
0
def getFirstRmDuplicationResult(split=True):
    rows=tablemerge.getTitleSummary(dbconfig.mergetable)
    if rows !=-1:
        f_summary=open(merge1_summary_file,'w')
        with open(merge1_title_file,'w') as fout:
            count=0
            for row in rows:
                # title,summary,ctime,source
                count+=1
                mtid,title,summary,ctime=row[0],row[1].strip(),row[2].strip(),row[3]
                if split:
                    title=' '.join(jieba.cut(delpunc(title.lower()))).encode('utf-8')
                    summary=' '.join(jieba.cut(delpunc(summary.lower()))).encode('utf-8')
                timeStr=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(ctime))
                msg_t='%s %s %s %s'%(count,mtid,timeStr,title)
                msg_s='%s %s %s %s'%(count,mtid,timeStr,summary)
                print msg_t
                print msg_s
                fout.write(msg_t+'\n')                
                f_summary.write(msg_s+'\n')
        f_summary.close()