Beispiel #1
0
forbidCodeErr()
from com.office.util.sqlUtil import save
from com.office.util import fileUtil
from com.office.template2 import DBNAME
from com.office.util.excelUtil import read

if __name__ == "__main__":
    keywords = []
    srcpathname = r"/Users/yangjie/Downloads/qq/prescription_use_method"
    if srcpathname.endswith("xls") or srcpathname.endswith("xlsx"):
        tableData = read(srcpathname, "Sheet1")
        for row in tableData:
            keywords.append(row[0])
    else:
        f1 = fileUtil.readFile(srcpathname)
        keywords = f1.split("\r\n")
        if len(keywords) < 2:
            keywords = f1.split("\r")
        if len(keywords) < 2:
            keywords = f1.split("\n")

    #上面的代码就是将原始数据转换城一个列表

    data = []
    save_sql = """insert into "main"."keys" ( "status", "keyword") values ( ?, ?);"""
    import sqlite3
    conn = sqlite3.connect(DBNAME)
    conn.text_factory = str
    count = 0
    for keyword in keywords:
from com.office.stitch.db import DATABASE_PATHNAME
from com.office.util.excelUtil import read
from com.office.util.sqlUtil import save
from com.office.util import fileUtil

if __name__ == "__main__":
    #     import sqlite3
    #     conn = sqlite3.connect(DATABASE_PATHNAME)
    #     tableData = read(r"/Users/yangjie/Downloads/qq/ingredients_only.xls", "Sheet1");
    #     save_sql = """insert into "main"."keys" ( "status", "keyword") values ( ?, ?);"""
    #     data = []
    #     for row in tableData:
    #         keyword = row[0]
    #         data.append((0,keyword))
    #         if len(data) >=100:
    #             save(conn, save_sql, data)
    #             data=[]
    #     save(conn, save_sql, data)
    import sqlite3
    conn = sqlite3.connect(DATABASE_PATHNAME)
    f1 = fileUtil.readFile("omim-id")
    save_sql = """insert into "main"."keys" ( "status", "keyword") values ( ?, ?);"""
    data = []
    for row in f1.split("\r"):
        keyword = row
        data.append((0, keyword))
        if len(data) >= 100:
            save(conn, save_sql, data)
            data = []
    save(conn, save_sql, data)
Beispiel #3
0
from com.office.util.fileUtil import writeFile, readFile
sys.path.append('../')

import jieba.analyse

# file_name = "/Users/yangjie/Downloads/chrome/511.txt"
topK = 1000
withWeight = True
import codecs
# fp = codecs.open("/Users/yangjie/Downloads/qq/新建文本文档.txt", "r", 'gbk');
#
# alllines = fp.readlines();
# content = ""
# for line in alllines:
#     content += line
content = readFile("tmp.txt")
content = content.replace(u"老公", "").replace(u"老婆", "")
writeFile("tmp2.txt", content)
tags = jieba.analyse.extract_tags(content, topK=topK, withWeight=withWeight)

result = ""
if withWeight is True:
    for tag in tags:
        if tag[0] in [u"杨大爷", u"孔敏佳"]:
            continue
        result += tag[0]
        result += " "
        result += str(int(tag[1] * 1000))
        result += "\n"
        print("tag: %s\t\t weight: %f" % (tag[0], tag[1]))
else:
Beispiel #4
0
from com.office.util.fileUtil import writeFile, readFile
sys.path.append('../')

import jieba.analyse

# file_name = "/Users/yangjie/Downloads/chrome/511.txt"
topK = 1000
withWeight = True
import codecs
# fp = codecs.open("/Users/yangjie/Downloads/qq/新建文本文档.txt", "r", 'gbk');
#
# alllines = fp.readlines();
# content = ""
# for line in alllines:
#     content += line
content = readFile("/Users/yangjie/Downloads/qq/tmp.txt")
tags = jieba.analyse.extract_tags(content, topK=topK, withWeight=withWeight)

result = ""
if withWeight is True:
    for tag in tags:
        if tag[0] in [u"杨大爷", u"孔敏佳"]:
            continue
        result += tag[0]
        result += " "
        result += str(int(tag[1] * 1000))
        result += "\n"
        print("tag: %s\t\t weight: %f" % (tag[0], tag[1]))
else:
    print(",".join(tags))
writeFile("ss.txt", result)