예제 #1
0
def getAllUIDs():
    t1 = time.time()
    mysqlcon = getConnection()
    mysqlcon.setConnecter(host='192.168.1.199', port=3306, user="******", \
                          passwd='1q2w3e4r', db='test', \
                          use_unicode=True, charset="utf8")

    sqlstr = ""
    sqlstr += 'select cast(uid as signed int) '
    sqlstr += 'from hupu_bxj_advocate_posts_1 '
    # sqlstr += ' limit 10000'
    # sqlstr += ' where post_time>1059667200 and post_time<1059667200 + 86400*36'
    data1 = mysqlcon.queryWithReturn(sqlstr)
    sqlstr = ""
    sqlstr += 'select cast(uid as signed int) '
    sqlstr += ' from hupu_bxj_foll_posts_1'
    # sqlstr += ' where post_time>1059667200 and post_time<1059667200 + 86400*36'
    # sqlstr += ' limit 10000'

    data2 = mysqlcon.queryWithReturn(sqlstr)
    data = data1 + data2
    data = map(lambda x: x[0], data)
    data = list(data)
    data = set(data)

    print('目前为止出现的用户数量是', len(data))
    return data
예제 #2
0
def numOfPosts():
    mysqlcon = getConnection()
    mysqlcon.setConnecter(host='192.168.1.199',port=3306, user="******",\
                            passwd='1q2w3e4r', db='test',\
                             use_unicode=True, charset="utf8")

    sqlstr = ""
    sqlstr += 'select cast(post_id as signed int), post_time '
    sqlstr += 'from hupu_bxj_advocate_posts_1'
    t1 = time.time()
    data = mysqlcon.queryWithReturn(sqlstr)
    # print(len(data),data)
    data = np.array(data)
    df = pd.DataFrame(data, columns = ['data', 'timestamp'])
    t2 = time.time()
    showDataInTimeLine(df, 10)
    print('查询数据阶段耗时为', t2-t1, '秒。')
예제 #3
0
def numOfEachUser():
    t1 = time.time()
    mysqlcon = getConnection()
    mysqlcon.setConnecter(host='192.168.1.199', port=3306, user="******", \
                          passwd='1q2w3e4r', db='test', \
                          use_unicode=True, charset="utf8")

    sqlstr = ""
    sqlstr += 'select cast(uid as signed int), post_time '
    sqlstr += 'from hupu_bxj_advocate_posts_1 '
    # sqlstr += ' limit 10000'
    sqlstr += ' where post_time>1041350400 and post_time<1072886400'
    data1 = mysqlcon.queryWithReturn(sqlstr)
    sqlstr = ""
    sqlstr += 'select cast(uid as signed int), post_time '
    sqlstr += ' from hupu_bxj_foll_posts_1'
    sqlstr += ' where post_time>1041350400 and post_time<1072886400'
    # sqlstr += ' limit 10000'

    data2 = mysqlcon.queryWithReturn(sqlstr)
    data = data1 + data2
    res = {}
    for line in data:
        if line[0] in res:
            res[line[0]] += 1
        else:
            res[line[0]] = 1
    vs = list(res.values())
    print(vs)
    stastics = {'人均发帖数': np.mean(vs),
                '用户发帖数中位数':np.median(vs),
                '用户发帖数四分位数':[np.percentile(vs, 25), np.percentile(vs, 75)],
                '发帖最多的用户':{'用户id':searchDictByValue(res, np.max(vs)) , '帖子数': np.max(vs)}}
    print(stastics)
    plt.hist(vs,100)
    plt.show()
    return stastics
예제 #4
0
def numOfUsers():
    t1 = time.time()
    mysqlcon = getConnection()
    mysqlcon.setConnecter(host='192.168.1.199',port=3306, user="******",\
                            passwd='1q2w3e4r', db='test',\
                             use_unicode=True, charset="utf8")

    sqlstr = ""
    sqlstr += 'select cast(uid as signed int), post_time '
    sqlstr += 'from hupu_bxj_advocate_posts_1 '
    # sqlstr += ' where post_time>1059667200 and post_time<1059667200 + 86400*36'
    data1 = mysqlcon.queryWithReturn(sqlstr)
    sqlstr = ""
    sqlstr += 'select cast(uid as signed int), post_time '
    sqlstr += ' from hupu_bxj_foll_posts_1'
    # sqlstr += ' where post_time>1059667200 and post_time<1059667200 + 86400*36'
    data2 = mysqlcon.queryWithReturn(sqlstr)
    data = data1 + data2
    #print(data)
    data = np.array(data)
    df = pd.DataFrame(data, columns = ['data', 'timestamp'])
    t2 = time.time()
    showDataInTimeLine(df, 20)
    print('查询数据阶段耗时为', t2-t1, '秒。')
예제 #5
0
#统计用户的发帖总数,发帖被推荐的次数,发帖的金币总数,发帖的回复总数,回复被点亮的总数,发帖被浏览的总数;
#发帖的板块分布
#用户跟帖(回复)的总数,跟帖被点亮的总数,跟帖的金币总数,高亮跟帖的数量,高亮跟帖的被亮次数, 跟帖被引用的次数,
#从mysql中获取用户的主贴数据
from hupu.analisys.dbconnection.getMySQL import getConnection
import time
mysqlcon = getConnection()
mysqlcon.setConnecter(host='192.168.1.198',
                      port=3306,
                      user="******",
                      passwd='1q2w3e4r',
                      db='test',
                      use_unicode=True,
                      charset="utf8")


def getAdvocatePosts(uid):
    t1 = time.time()
    sql_str = "select * from hupu_bxj_advocate_posts_1 where uid='" + str(
        uid) + "'"
    data = mysqlcon.queryWithReturn(sql_str)
    t2 = time.time()
    print("耗时是", t2 - t1)
    return data


#统计主贴的基本情况
#统计用户的发帖总数,发帖被推荐的次数,发帖的金币总数,发帖的回复总数,被点亮的回复总数,发帖被浏览的总数;
def countAPosts(posts):
    #对帖子去重,用post_和title
    distinct_posts = {}