forbidCodeErr() from com.office.util.sqlUtil import save from com.office.util import fileUtil from com.office.template2 import DBNAME from com.office.util.excelUtil import read if __name__ == "__main__": keywords = [] srcpathname = r"/Users/yangjie/Downloads/qq/prescription_use_method" if srcpathname.endswith("xls") or srcpathname.endswith("xlsx"): tableData = read(srcpathname, "Sheet1") for row in tableData: keywords.append(row[0]) else: f1 = fileUtil.readFile(srcpathname) keywords = f1.split("\r\n") if len(keywords) < 2: keywords = f1.split("\r") if len(keywords) < 2: keywords = f1.split("\n") #上面的代码就是将原始数据转换城一个列表 data = [] save_sql = """insert into "main"."keys" ( "status", "keyword") values ( ?, ?);""" import sqlite3 conn = sqlite3.connect(DBNAME) conn.text_factory = str count = 0 for keyword in keywords:
from com.office.stitch.db import DATABASE_PATHNAME from com.office.util.excelUtil import read from com.office.util.sqlUtil import save from com.office.util import fileUtil if __name__ == "__main__": # import sqlite3 # conn = sqlite3.connect(DATABASE_PATHNAME) # tableData = read(r"/Users/yangjie/Downloads/qq/ingredients_only.xls", "Sheet1"); # save_sql = """insert into "main"."keys" ( "status", "keyword") values ( ?, ?);""" # data = [] # for row in tableData: # keyword = row[0] # data.append((0,keyword)) # if len(data) >=100: # save(conn, save_sql, data) # data=[] # save(conn, save_sql, data) import sqlite3 conn = sqlite3.connect(DATABASE_PATHNAME) f1 = fileUtil.readFile("omim-id") save_sql = """insert into "main"."keys" ( "status", "keyword") values ( ?, ?);""" data = [] for row in f1.split("\r"): keyword = row data.append((0, keyword)) if len(data) >= 100: save(conn, save_sql, data) data = [] save(conn, save_sql, data)
from com.office.util.fileUtil import writeFile, readFile sys.path.append('../') import jieba.analyse # file_name = "/Users/yangjie/Downloads/chrome/511.txt" topK = 1000 withWeight = True import codecs # fp = codecs.open("/Users/yangjie/Downloads/qq/新建文本文档.txt", "r", 'gbk'); # # alllines = fp.readlines(); # content = "" # for line in alllines: # content += line content = readFile("tmp.txt") content = content.replace(u"老公", "").replace(u"老婆", "") writeFile("tmp2.txt", content) tags = jieba.analyse.extract_tags(content, topK=topK, withWeight=withWeight) result = "" if withWeight is True: for tag in tags: if tag[0] in [u"杨大爷", u"孔敏佳"]: continue result += tag[0] result += " " result += str(int(tag[1] * 1000)) result += "\n" print("tag: %s\t\t weight: %f" % (tag[0], tag[1])) else:
from com.office.util.fileUtil import writeFile, readFile sys.path.append('../') import jieba.analyse # file_name = "/Users/yangjie/Downloads/chrome/511.txt" topK = 1000 withWeight = True import codecs # fp = codecs.open("/Users/yangjie/Downloads/qq/新建文本文档.txt", "r", 'gbk'); # # alllines = fp.readlines(); # content = "" # for line in alllines: # content += line content = readFile("/Users/yangjie/Downloads/qq/tmp.txt") tags = jieba.analyse.extract_tags(content, topK=topK, withWeight=withWeight) result = "" if withWeight is True: for tag in tags: if tag[0] in [u"杨大爷", u"孔敏佳"]: continue result += tag[0] result += " " result += str(int(tag[1] * 1000)) result += "\n" print("tag: %s\t\t weight: %f" % (tag[0], tag[1])) else: print(",".join(tags)) writeFile("ss.txt", result)