def plot(game_name, game_id): dict = {} comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id) for comment in comments: result = jieba.analyse.extract_tags(comment[2], topK=3) for word in result: if len(word) < 2: continue elif word in stop: continue if word not in dict: dict[word] = 1 else: dict[word] += 1 print(dict) swd = sorted(dict.items(), key=itemgetter(1), reverse=True) swd = swd[1:50] tags = make_tags(swd, minsize=30, maxsize=120, colors=random.choice(list(COLOR_SCHEMES.values()))) create_tag_image(tags, 'c:/%s.png' % game_name, background=(0, 0, 0, 255), size=(900, 600), fontname='SimHei') print('having save file to dick')
def cut(): comments = DbUtil.getAllResult("select * from comment limit 300000") file = open("test1.txt", "w",encoding="utf-8") for comment in comments: list = [] result = jieba.cut(comment[2]) for word in result: if word not in stop and word != ' ': list.append(word) if list: file.write(" ".join(list)) file.write("\n") file.close() pass
def plot(game_name, game_id): dict = {} comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id) for comment in comments: result = jieba.analyse.extract_tags(comment[2], topK=3) for word in result: if len(word) < 2: continue elif word in stop: continue if word not in dict: dict[word] = 1 else: dict[word] += 1 swd = sorted(dict.items(), key=itemgetter(1), reverse=True) swd = swd[1:50] tags = make_tags(swd, minsize=30, maxsize=100, colors=random.choice(list(COLOR_SCHEMES.values()))) create_tag_image(tags, 'C:/Users/Administrator/Desktop/%s_%s.png' % (game_name, game_id), background=(0, 0, 0, 255), size=(900, 600), fontname='SimHei') print('create file ---%s' % game_name) # dict = {} # # for (k, v) in swd: # dict[k] = v # print('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s"' % (game_id, str(dict))) # cur.execute('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s")' % (game_id, str(dict))) # conn.commit() word = DbUtil.getOneResult('select keyword from keyword limit 1') print(eval(word[0]))
def toDict(list): return dict([(word, True) for word in list if word in best_words]) def features(feature_extraction_method): Features = [] for i in lists: words = feature_extraction_method(i) # 为积极文本赋予"pos" Features.append(words) return Features if __name__ == '__main__': getStop() comments = list(DbUtil.getAllResult("select * from comment")) shuffle(comments) conn, cur = DbUtil.getConn() for i in range(0, 1065000, 1000): print(i) for comment in comments[i:i + 1000]: print('UPDATE comment set comment_time = "%s" where id =%s' % (Ran.getTime(int(i / 1000 + 1) % 30 + 1), comment[0])) cur.execute('UPDATE comment set comment_time = "%s" where id =%s' % (Ran.getTime(int(i / 1000 + 1) % 30 + 1), comment[0])) conn.commit() DbUtil.close(conn, cur) # lists = [] # for comment in comments: # list = []
create_tag_image(tags, 'c:/%s.png' % game_name, background=(0, 0, 0, 255), size=(900, 600), fontname='SimHei') print('having save file to dick') if __name__ == '__main__': f = open("../StopWords.txt", encoding="utf-8") jieba.load_userdict("c:/dict.txt") while True: line = f.readline().replace("\n", '') if not line: break stop.append(line) games = DbUtil.getAllResult( "select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50" ) l = [] for game in games: if game[1] not in l: plot(game[1], game[0]) l.append(game[1])
colors=random.choice(list(COLOR_SCHEMES.values()))) create_tag_image(tags, 'c:/%s.png' % game_name, background=(0, 0, 0, 255), size=(900, 600), fontname='SimHei') print('having save file to dick') if __name__ == '__main__': f = open("../StopWords.txt", encoding="utf-8") jieba.load_userdict("c:/dict.txt") while True: line = f.readline().replace("\n", '') if not line: break stop.append(line) games = DbUtil.getAllResult("select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50") l =[] for game in games: if game[1] not in l: plot(game[1], game[0]) l.append(game[1])
if __name__ == '__main__': getStop() pos = pickle.load(open("pos_review.pkl", 'rb')) neg = pickle.load(open("neg_review.pkl", 'rb')) print(len(pos)) print(len(neg)) while [] in pos: pos.pop(pos.index([])) while [] in neg: neg.pop(neg.index([])) comments = list( DbUtil.getAllResult("select * from comment limit 10000 offset 600000")) shuffle(comments) comments = comments[:100] lists = [] for comment in comments: list = [] result = jieba.cut(comment[2]) for word in result: if word not in stop and word != ' ': list.append(word) lists.append(list) count = 0 frame = tkinter.Tk()
print(neg) if __name__ == '__main__': getStop() pos = pickle.load(open("pos_review.pkl", 'rb')) neg = pickle.load(open("neg_review.pkl", 'rb')) print(len(pos)) print(len(neg)) while [] in pos: pos.pop(pos.index([])) while [] in neg: neg.pop(neg.index([])) comments = list(DbUtil.getAllResult("select * from comment limit 10000 offset 600000")) shuffle(comments) comments = comments[:100] lists = [] for comment in comments: list = [] result = jieba.cut(comment[2]) for word in result: if word not in stop and word != ' ': list.append(word) lists.append(list) count = 0 frame = tkinter.Tk()
def toDict(list): return dict([(word, True) for word in list if word in best_words]) def features(feature_extraction_method): Features = [] for i in lists: words = feature_extraction_method(i) # 为积极文本赋予"pos" Features.append(words) return Features if __name__ == '__main__': getStop() comments = list(DbUtil.getAllResult("select * from comment")) shuffle(comments) conn, cur = DbUtil.getConn() for i in range(0,1065000,1000): print(i) for comment in comments[i:i+1000]: print('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0])) cur.execute('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0])) conn.commit() DbUtil.close(conn,cur) # lists = [] # for comment in comments: # list = [] # result = jieba.cut(comment[2]) # for word in result: