def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    print(dict)

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]
    tags = make_tags(swd,
                     minsize=30,
                     maxsize=120,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')
Exemple #2
0
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" %
                                   game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    print(dict)

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]
    tags = make_tags(swd,
                     minsize=30,
                     maxsize=120,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')
Exemple #3
0
def cut():
    comments = DbUtil.getAllResult("select * from comment limit 300000")


    file = open("test1.txt", "w",encoding="utf-8")
    for comment in comments:
        list = []
        result = jieba.cut(comment[2])
        for word in result:
            if word not in stop and word != ' ':
                list.append(word)

        if list:
            file.write(" ".join(list))
            file.write("\n")
    file.close()
    pass
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" %
                                   game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]

    tags = make_tags(swd,
                     minsize=30,
                     maxsize=100,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'C:/Users/Administrator/Desktop/%s_%s.png' %
                     (game_name, game_id),
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')
    print('create file ---%s' % game_name)

    # dict = {}
    #
    # for (k, v) in swd:
    #     dict[k] = v
    # print('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s"' % (game_id, str(dict)))
    # cur.execute('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s")' % (game_id, str(dict)))
    # conn.commit()

    word = DbUtil.getOneResult('select keyword from keyword limit 1')
    print(eval(word[0]))
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]

    tags = make_tags(swd,
                     minsize=30,
                     maxsize=100,
                     colors=random.choice(list(COLOR_SCHEMES.values())))


    create_tag_image(tags,
                 'C:/Users/Administrator/Desktop/%s_%s.png' % (game_name, game_id),
                 background=(0, 0, 0, 255),
                 size=(900, 600),
                 fontname='SimHei')
    print('create file ---%s' % game_name)

    # dict = {}
    #
    # for (k, v) in swd:
    #     dict[k] = v
    # print('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s"' % (game_id, str(dict)))
    # cur.execute('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s")' % (game_id, str(dict)))
    # conn.commit()

    word = DbUtil.getOneResult('select keyword from keyword limit 1')
    print(eval(word[0]))
def toDict(list):
    return dict([(word, True) for word in list if word in best_words])


def features(feature_extraction_method):
    Features = []
    for i in lists:
        words = feature_extraction_method(i)  # 为积极文本赋予"pos"
        Features.append(words)
    return Features


if __name__ == '__main__':
    getStop()

    comments = list(DbUtil.getAllResult("select * from comment"))

    shuffle(comments)
    conn, cur = DbUtil.getConn()
    for i in range(0, 1065000, 1000):
        print(i)
        for comment in comments[i:i + 1000]:
            print('UPDATE comment set comment_time = "%s" where id =%s' %
                  (Ran.getTime(int(i / 1000 + 1) % 30 + 1), comment[0]))
            cur.execute('UPDATE comment set comment_time = "%s" where id =%s' %
                        (Ran.getTime(int(i / 1000 + 1) % 30 + 1), comment[0]))
        conn.commit()
    DbUtil.close(conn, cur)
    # lists = []
    # for comment in comments:
    #     list = []
Exemple #7
0
    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')


if __name__ == '__main__':

    f = open("../StopWords.txt", encoding="utf-8")
    jieba.load_userdict("c:/dict.txt")

    while True:
        line = f.readline().replace("\n", '')

        if not line:
            break
        stop.append(line)

    games = DbUtil.getAllResult(
        "select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50"
    )

    l = []
    for game in games:
        if game[1] not in l:
            plot(game[1], game[0])
            l.append(game[1])
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')


if __name__ == '__main__':

    f = open("../StopWords.txt", encoding="utf-8")
    jieba.load_userdict("c:/dict.txt")

    while True:
        line = f.readline().replace("\n", '')

        if not line:
            break
        stop.append(line)

    games = DbUtil.getAllResult("select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50")

    l =[]
    for game in games:
        if game[1] not in l:
            plot(game[1], game[0])
            l.append(game[1])
Exemple #9
0

if __name__ == '__main__':
    getStop()
    pos = pickle.load(open("pos_review.pkl", 'rb'))
    neg = pickle.load(open("neg_review.pkl", 'rb'))
    print(len(pos))
    print(len(neg))

    while [] in pos:
        pos.pop(pos.index([]))
    while [] in neg:
        neg.pop(neg.index([]))

    comments = list(
        DbUtil.getAllResult("select * from comment limit 10000 offset 600000"))

    shuffle(comments)
    comments = comments[:100]
    lists = []
    for comment in comments:
        list = []
        result = jieba.cut(comment[2])
        for word in result:
            if word not in stop and word != ' ':
                list.append(word)

        lists.append(list)
    count = 0

    frame = tkinter.Tk()
    print(neg)


if __name__ == '__main__':
    getStop()
    pos = pickle.load(open("pos_review.pkl", 'rb'))
    neg = pickle.load(open("neg_review.pkl", 'rb'))
    print(len(pos))
    print(len(neg))

    while [] in pos:
        pos.pop(pos.index([]))
    while [] in neg:
        neg.pop(neg.index([]))

    comments = list(DbUtil.getAllResult("select * from comment limit 10000 offset 600000"))

    shuffle(comments)
    comments = comments[:100]
    lists = []
    for comment in comments:
        list = []
        result = jieba.cut(comment[2])
        for word in result:
            if word not in stop and word != ' ':
                list.append(word)

        lists.append(list)
    count = 0

    frame = tkinter.Tk()
def toDict(list):
    return dict([(word, True) for word in list if word in best_words])


def features(feature_extraction_method):
    Features = []
    for i in lists:
        words = feature_extraction_method(i)  # 为积极文本赋予"pos"
        Features.append(words)
    return Features


if __name__ == '__main__':
    getStop()

    comments = list(DbUtil.getAllResult("select * from comment"))

    shuffle(comments)
    conn, cur = DbUtil.getConn()
    for i in range(0,1065000,1000):
        print(i)
        for comment in comments[i:i+1000]:
            print('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0]))
            cur.execute('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0]))
        conn.commit()
    DbUtil.close(conn,cur)
    # lists = []
    # for comment in comments:
    #     list = []
    #     result = jieba.cut(comment[2])
    #     for word in result: