def getData(id, package_name):

    total = BsUtil.praseJson(
        'http://app.flyme.cn/apps/public/evaluate/list?app_id=%s&start=0&max=1'
        % id)
    conn, cur = DbUtil.getConn()

    totalComCount = total['value']['totalCount']
    # 获取总下载量和评分
    soup = BsUtil.praseHtml(
        'http://app.flyme.cn/games/public/detail?package_name=%s' %
        package_name)

    totalScore = soup.find('div', class_="star_bg").attrs['data-num']
    totalDownload = soup.find(
        text="下      载:").parent.next_sibling.next_sibling.string
    #获取游戏名
    for child in soup.find('div', class_="detail_top").children:
        if (child.name == 'h3'):
            game_name = child.string

    cur.execute(
        'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
        'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' %
        (game_name, 'meizu', totalComCount, totalScore, totalDownload,
         DateUtil.currentDate()))
    game_id = cur.lastrowid
    #获取所有评论内容
    value = BsUtil.praseJson(
        'http://app.flyme.cn/apps/public/evaluate/list?app_id=%s&start=0&max=%s'
        % (id, totalComCount))

    for com in value['value']['list']:
        comment = html.unescape(com['comment']).replace("\"", "'")
        time = com['create_time']
        author = html.unescape(com['user_name']).replace("\"", "'")
        score = com['star']

        try:
            cur.execute(
                'INSERT INTO comment(game_id, content, comment_time, author, score) '
                'VALUES ("%s", "%s", "%s", "%s", %d);' %
                (game_id, comment, time, author, score))
        except:
            pass

    conn.commit()
    DbUtil.close(conn, cur)
Exemple #2
0
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" %
                                   game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    print(dict)

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]
    tags = make_tags(swd,
                     minsize=30,
                     maxsize=120,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    print(dict)

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]
    tags = make_tags(swd,
                     minsize=30,
                     maxsize=120,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')
def getData(id, name, totalScore):
    page = 0
    hasMore = True

    # 插入游戏
    conn, cur = DbUtil.getConn()

    result = BsUtil.praseJson('http://market.xiaomi.com/apm/comment/list/%s?'
                              'clientId=2bb48bb54747e03a6ab667ab7b51050a&co=CN'
                              '&la=zh&os=1461822601&page=%s&sdk=22' % (id, page))
    totalComCount = result['pointCount']

    print('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
          'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' % (
              name, 'xiaomi', totalComCount, totalScore * 10, 0, DateUtil.currentDate()))
    cur.execute('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
                'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' % (
                    name, 'xiaomi', totalComCount, totalScore * 10, 0, DateUtil.currentDate()))
    game_id = cur.lastrowid
    # game_id = 0
    while (hasMore):
        result = BsUtil.praseJson('http://market.xiaomi.com/apm/comment/list/%s?'
                                  'clientId=2bb48bb54747e03a6ab667ab7b51050a&co=CN'
                                  '&la=zh&os=1461822601&page=%s&sdk=22' % (id, page))
        # print(result)
        for comment in result['comments']:
            content = comment['commentValue'].replace("\"", "'").replace(" ", "")
            score = comment['pointValue']
            time = comment['updateTime']
            author = comment['nickname'].replace("\"", "'")
            # 插入评论
            try:
                print('INSERT INTO comment(game_id, content, comment_time, author, score) '
                      'VALUES ("%s", "%s", "%s", "%s", %d);' % (
                          game_id, content, DateUtil.longToStrTime(time / 1000), author, score))
                cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
                            'VALUES ("%s", "%s", "%s", "%s", %d);' % (
                                game_id, content, DateUtil.longToStrTime(time / 1000), author, score))
            except:
                print(sys.exc_info()[0], ":", sys.exc_info()[1])
                pass
        page += 1
        hasMore = result['hasMore']

    conn.commit()
    DbUtil.close(conn, cur)
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" %
                                   game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]

    tags = make_tags(swd,
                     minsize=30,
                     maxsize=100,
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'C:/Users/Administrator/Desktop/%s_%s.png' %
                     (game_name, game_id),
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')
    print('create file ---%s' % game_name)

    # dict = {}
    #
    # for (k, v) in swd:
    #     dict[k] = v
    # print('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s"' % (game_id, str(dict)))
    # cur.execute('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s")' % (game_id, str(dict)))
    # conn.commit()

    word = DbUtil.getOneResult('select keyword from keyword limit 1')
    print(eval(word[0]))
def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
    swd = swd[1:50]

    tags = make_tags(swd,
                     minsize=30,
                     maxsize=100,
                     colors=random.choice(list(COLOR_SCHEMES.values())))


    create_tag_image(tags,
                 'C:/Users/Administrator/Desktop/%s_%s.png' % (game_name, game_id),
                 background=(0, 0, 0, 255),
                 size=(900, 600),
                 fontname='SimHei')
    print('create file ---%s' % game_name)

    # dict = {}
    #
    # for (k, v) in swd:
    #     dict[k] = v
    # print('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s"' % (game_id, str(dict)))
    # cur.execute('INSERT INTO keyword (game_id, keyword) VALUES (%s, "%s")' % (game_id, str(dict)))
    # conn.commit()

    word = DbUtil.getOneResult('select keyword from keyword limit 1')
    print(eval(word[0]))
def getData(id,package_name):

    total = BsUtil.praseJson('http://app.flyme.cn/apps/public/evaluate/list?app_id=%s&start=0&max=1' % id)
    conn,cur = DbUtil.getConn()

    totalComCount = total['value']['totalCount']
    # 获取总下载量和评分
    soup = BsUtil.praseHtml('http://app.flyme.cn/games/public/detail?package_name=%s' % package_name)

    totalScore = soup.find('div', class_="star_bg").attrs['data-num']
    totalDownload = soup.find(text="下      载:").parent.next_sibling.next_sibling.string
    #获取游戏名
    for child in soup.find('div', class_="detail_top").children:
        if (child.name == 'h3'):
            game_name = child.string

    cur.execute('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
          'VALUES ("%s", "%s", "%s", "%s", "%s", "%s");' %(game_name,'meizu',totalComCount,
            totalScore,totalDownload,DateUtil.currentDate()))
    game_id = cur.lastrowid
    #获取所有评论内容
    value = BsUtil.praseJson('http://app.flyme.cn/apps/public/evaluate/list?app_id=%s&start=0&max=%s'% (id,totalComCount))

    for com in value['value']['list']:
        comment = html.unescape(com['comment']).replace("\"","'")
        time = com['create_time']
        author = html.unescape(com['user_name']).replace("\"","'")
        score = com['star']

        try:
            cur.execute('INSERT INTO comment(game_id, content, comment_time, author, score) '
                    'VALUES ("%s", "%s", "%s", "%s", %d);' % (game_id,comment,time,author,score))
        except:
            pass

    conn.commit()
    DbUtil.close(conn,cur)
Exemple #8
0
def cut():
    comments = DbUtil.getAllResult("select * from comment limit 300000")


    file = open("test1.txt", "w",encoding="utf-8")
    for comment in comments:
        list = []
        result = jieba.cut(comment[2])
        for word in result:
            if word not in stop and word != ' ':
                list.append(word)

        if list:
            file.write(" ".join(list))
            file.write("\n")
    file.close()
    pass
import random
from operator import itemgetter
import jieba
import jieba.analyse
from pytagcloud import make_tags, create_tag_image
from pytagcloud.colors import COLOR_SCHEMES

from site.mybzz.util import DbUtil

stop = []
conn, cur = DbUtil.getConn()


def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" %
                                   game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1
def toDict(list):
    return dict([(word, True) for word in list if word in best_words])


def features(feature_extraction_method):
    Features = []
    for i in lists:
        words = feature_extraction_method(i)  # 为积极文本赋予"pos"
        Features.append(words)
    return Features


if __name__ == '__main__':
    getStop()

    comments = list(DbUtil.getAllResult("select * from comment"))

    shuffle(comments)
    conn, cur = DbUtil.getConn()
    for i in range(0, 1065000, 1000):
        print(i)
        for comment in comments[i:i + 1000]:
            print('UPDATE comment set comment_time = "%s" where id =%s' %
                  (Ran.getTime(int(i / 1000 + 1) % 30 + 1), comment[0]))
            cur.execute('UPDATE comment set comment_time = "%s" where id =%s' %
                        (Ran.getTime(int(i / 1000 + 1) % 30 + 1), comment[0]))
        conn.commit()
    DbUtil.close(conn, cur)
    # lists = []
    # for comment in comments:
    #     list = []
    print(neg)


if __name__ == '__main__':
    getStop()
    pos = pickle.load(open("pos_review.pkl", 'rb'))
    neg = pickle.load(open("neg_review.pkl", 'rb'))
    print(len(pos))
    print(len(neg))

    while [] in pos:
        pos.pop(pos.index([]))
    while [] in neg:
        neg.pop(neg.index([]))

    comments = list(DbUtil.getAllResult("select * from comment limit 10000 offset 600000"))

    shuffle(comments)
    comments = comments[:100]
    lists = []
    for comment in comments:
        list = []
        result = jieba.cut(comment[2])
        for word in result:
            if word not in stop and word != ' ':
                list.append(word)

        lists.append(list)
    count = 0

    frame = tkinter.Tk()
Exemple #12
0

if __name__ == '__main__':
    getStop()
    pos = pickle.load(open("pos_review.pkl", 'rb'))
    neg = pickle.load(open("neg_review.pkl", 'rb'))
    print(len(pos))
    print(len(neg))

    while [] in pos:
        pos.pop(pos.index([]))
    while [] in neg:
        neg.pop(neg.index([]))

    comments = list(
        DbUtil.getAllResult("select * from comment limit 10000 offset 600000"))

    shuffle(comments)
    comments = comments[:100]
    lists = []
    for comment in comments:
        list = []
        result = jieba.cut(comment[2])
        for word in result:
            if word not in stop and word != ' ':
                list.append(word)

        lists.append(list)
    count = 0

    frame = tkinter.Tk()
Exemple #13
0
    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')


if __name__ == '__main__':

    f = open("../StopWords.txt", encoding="utf-8")
    jieba.load_userdict("c:/dict.txt")

    while True:
        line = f.readline().replace("\n", '')

        if not line:
            break
        stop.append(line)

    games = DbUtil.getAllResult(
        "select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50"
    )

    l = []
    for game in games:
        if game[1] not in l:
            plot(game[1], game[0])
            l.append(game[1])
def toDict(list):
    return dict([(word, True) for word in list if word in best_words])


def features(feature_extraction_method):
    Features = []
    for i in lists:
        words = feature_extraction_method(i)  # 为积极文本赋予"pos"
        Features.append(words)
    return Features


if __name__ == '__main__':
    getStop()

    comments = list(DbUtil.getAllResult("select * from comment"))

    shuffle(comments)
    conn, cur = DbUtil.getConn()
    for i in range(0,1065000,1000):
        print(i)
        for comment in comments[i:i+1000]:
            print('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0]))
            cur.execute('UPDATE comment set comment_time = "%s" where id =%s' %(Ran.getTime(int(i/1000+1)%30+1),comment[0]))
        conn.commit()
    DbUtil.close(conn,cur)
    # lists = []
    # for comment in comments:
    #     list = []
    #     result = jieba.cut(comment[2])
    #     for word in result:
import sys

from site.mybzz.util import DbUtil
from site.mybzz.util import BsUtil
from site.mybzz.util import DateUtil

conn, cur = DbUtil.getConn()


def getData(name, id, score, totalDownload):
    commentUrl = "http://comment.mobilem.360.cn/comment/getComments?baike=%s&start=%s&count=%s"
    start, count = 0, 50
    result = BsUtil.praseJson(commentUrl % (id, start, 1))
    totalComCount = result['data']['total']

    print(
        'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
        'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' %
        (name, '360', totalComCount,
         (score * 10) / 2, totalDownload, DateUtil.currentDate()))
    cur.execute(
        'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
        'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' %
        (name, '360', totalComCount,
         (score * 10) / 2, totalDownload, DateUtil.currentDate()))
    game_id = cur.lastrowid

    while (True):
        try:
            result = BsUtil.praseJson(commentUrl % (id, start, count))
        except:
                     colors=random.choice(list(COLOR_SCHEMES.values())))

    create_tag_image(tags,
                     'c:/%s.png' % game_name,
                     background=(0, 0, 0, 255),
                     size=(900, 600),
                     fontname='SimHei')

    print('having save file to dick')


if __name__ == '__main__':

    f = open("../StopWords.txt", encoding="utf-8")
    jieba.load_userdict("c:/dict.txt")

    while True:
        line = f.readline().replace("\n", '')

        if not line:
            break
        stop.append(line)

    games = DbUtil.getAllResult("select game_id,games.game_name from `comment` join games on game_id = games.id GROUP BY game_id ORDER BY count(game_id) desc limit 50")

    l =[]
    for game in games:
        if game[1] not in l:
            plot(game[1], game[0])
            l.append(game[1])
# cur = conn.cursor()
#
# cur.execute("INSERT INTO comment(game_name, content, comment_time, author, score)"
#             " VALUES ('游戏名123', '内容123', '2016-05-19 15:56:07', 'ban', '44');")
# conn.commit()
# print("VALUES (%s, %s, %s, %s, %d);" % ('游戏名123', '内容123', '2016-05-19 15:56:07', 'ban', 44))
# print(time.localtime(1463739856))
# print(DateUtil.lomgToStrTime(1463739856))
# statement = "select * from comment"
#
# data =DbUtil.getAllResult(statement)

# for d in data:
#     print("游戏名:%s,内容:%s,时间:%s" % (d[1],d[2],d[3]))

conn, cur = DbUtil.getConn()


# if __name__ == '__main__':
#     comments = DbUtil.getAllResult("select * from comment where game_id = 275 limit 10000")
#     file = open("c:/穿越火线_输入.txt", "w", encoding = "GBK")
#     for comment in comments:
#         try:
#             print(comment[2])
#             file.write(comment[2])
#         except:
#             pass

word = DbUtil.getOneResult('select keyword from keyword limit 1')
print(eval(word[0]))
dict = eval(word[0])
import random
from operator import itemgetter
import jieba
import jieba.analyse
from pytagcloud import make_tags, create_tag_image
from pytagcloud.colors import COLOR_SCHEMES

from site.mybzz.util import DbUtil

stop = []
conn, cur = DbUtil.getConn()

def plot(game_name, game_id):
    dict = {}
    comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id)
    for comment in comments:

        result = jieba.analyse.extract_tags(comment[2], topK=3)

        for word in result:
            if len(word) < 2:
                continue
            elif word in stop:
                continue

            if word not in dict:
                dict[word] = 1
            else:
                dict[word] += 1

    swd = sorted(dict.items(), key=itemgetter(1), reverse=True)
import sys

from site.mybzz.util import DbUtil
from site.mybzz.util import BsUtil
from site.mybzz.util import DateUtil

conn, cur = DbUtil.getConn()


def getData(name, downloadCount, score, packageName):
    contextData = ''
    url = "http://sj.qq.com/myapp/app/comment.htm?apkName=%s&contextData=%s"

    totalComCount = 0
    while totalComCount == 0:
        try:
            result = BsUtil.praseJson(url % (packageName, contextData))
            totalComCount = result['obj']['total']
        except:
            pass

    print(
        'INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
        'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' % (
            name, 'qq', totalComCount, score, downloadCount, DateUtil.currentDate()))
    cur.execute('INSERT INTO games(game_name,from_store, total_comment_count, total_score, total_download, data_date) '
                'VALUES ("%s", "%s", "%s", %d, "%s", "%s");' % (
                    name, 'qq', totalComCount, score, downloadCount, DateUtil.currentDate()))
    game_id = cur.lastrowid
    while (True):
        try:
Exemple #20
0
# cur = conn.cursor()
#
# cur.execute("INSERT INTO comment(game_name, content, comment_time, author, score)"
#             " VALUES ('游戏名123', '内容123', '2016-05-19 15:56:07', 'ban', '44');")
# conn.commit()
# print("VALUES (%s, %s, %s, %s, %d);" % ('游戏名123', '内容123', '2016-05-19 15:56:07', 'ban', 44))
# print(time.localtime(1463739856))
# print(DateUtil.lomgToStrTime(1463739856))
# statement = "select * from comment"
#
# data =DbUtil.getAllResult(statement)

# for d in data:
#     print("游戏名:%s,内容:%s,时间:%s" % (d[1],d[2],d[3]))

conn, cur = DbUtil.getConn()

# if __name__ == '__main__':
#     comments = DbUtil.getAllResult("select * from comment where game_id = 275 limit 10000")
#     file = open("c:/穿越火线_输入.txt", "w", encoding = "GBK")
#     for comment in comments:
#         try:
#             print(comment[2])
#             file.write(comment[2])
#         except:
#             pass

word = DbUtil.getOneResult('select keyword from keyword limit 1')
print(eval(word[0]))
dict = eval(word[0])