def showLastYears(word, years):
    begdate, enddate = getTimeDomain(RFF.getDateList())
    begdate = enddate - datetime.timedelta(days=years * 365)
    spostdate = getPostDatebyTimeDomain(begdate, enddate,
                                        RFF.getPostDataList())
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    print("begdate=", begdate, "enddate=", enddate)
    while x <= years:  #初始化频率数组
        feqlist.append(0)
        timeline.append(str(xdate.year) + "年")
        print(str(xdate.year))
        xdate += datetime.timedelta(days=365)
        x += 1
    #sposdate:[ [内容,作者,时间],[......],...... ]
    for post in spostdate:
        if post[0].find(word) > -1:
            postdate = datetime.datetime.strptime(post[2], "%Y-%m-%d %H:%M")
            satpos = postdate.year - begdate.year
            print("satpos=", satpos, "\tpostdate=", postdate, "\tbegdate=",
                  begdate, "\tyear1=", postdate.year, "\tyear2=", begdate.year)
            feqlist[satpos] += 1
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间', '出现次数(帖子/回帖总数:' + str(len(spostdate)) + ')', timeline, feqlist,
        '时间频率图(' + str(begdate.year) + "->" + str(enddate.year) + ")")
    print('>>>>>图像加载完毕')
def showLastDays(word, days):
    begdate, enddate = getTimeDomain(RFF.getDateList())
    begdate = enddate - datetime.timedelta(days=days)
    spostdate = getPostDatebyTimeDomain(begdate, enddate,
                                        RFF.getPostDataList())
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    while x < days:  #初始化频率数组
        feqlist.append(0)
        timeline.append(str(xdate.month) + "-" + str(xdate.day))
        xdate += datetime.timedelta(days=1)
        x += 1
    #sposdate:[ [内容,作者,时间],[......],...... ]
    for post in spostdate:
        if post[0].find(word) > -1:
            satpos = (datetime.datetime.strptime(post[2], "%Y-%m-%d %H:%M") -
                      begdate).days
            feqlist[satpos - 1] += 1
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间', '出现次数(帖子/回帖总数:' + str(len(spostdate)) + ')', timeline, feqlist,
        '时间频率图(' + str(begdate.date()) + "->" + str(enddate.date()) + ")")
    print('>>>>>图像加载完毕')
Ejemplo n.º 3
0
def showLastDays(authorname,days):
    print("加载任务结果文件...")
    buf = RFF.openResult()
    datebuf = RFF.getDateList(buf)
    begdate,enddate = getTimeDomain(datebuf)
    del datebuf
    print("计算时间区间...")
    begdate = enddate - datetime.timedelta(days=days)
    print("解析回帖数据...")
    buf = RFF.getPostDataList(buf)
    spostdate = []
    if days > 0:
        begdate = enddate - datetime.timedelta(days=days)
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    else:
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    del buf
    print("开始统计.")
    spostdate = getPostByAuthor(authorname,spostdate)
    llen = len(spostdate)
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    if days > 30:
        ommit_xlabel_per = days/30  #忽略x label的个数
        ommit_xlabel_per-=1  #同上
        while x<=days:
            feqlist.append(0)
            timeline.append(str(xdate.month)+"-"+str(xdate.day))
            xdate += datetime.timedelta(days=1)
            feqlist[x] = getCountByDate(xdate,spostdate)
            x+=1
            ppp = 0
            while ppp < ommit_xlabel_per and x <= days:
                feqlist.append(0)
                timeline.append("")
                xdate += datetime.timedelta(days=1)
                feqlist[x] = getCountByDate(xdate,spostdate)
                x+=1
                ppp+=1
        xdate -= datetime.timedelta(days=1)
        timeline[len(timeline)-1] == str(xdate.date())
    else:
        while x < days: #初始化频率数组
            feqlist.append(0)
            timeline.append(str(xdate.month)+"-"+str(xdate.day))
            xdate += datetime.timedelta(days=1)
            feqlist[x] = getCountByDate(xdate,spostdate)
            x+=1
    #开始绘图
    drawGraphic.linePlotGraphics('时间','出现次数(帖子/回帖总数:'+str(llen)+')',timeline,feqlist,"【"+ authorname +'】的活跃程度图('+ str(begdate.date()) + "->" + str(enddate.date()) +")")
    print('>>>>>图像加载完毕')
Ejemplo n.º 4
0
def showLastDays(word, days):
    print("加载任务结果文件...")
    buf = RFF.openResult()
    datebuf = RFF.getDateList(buf)
    begdate, enddate = getTimeDomain(datebuf)
    del datebuf
    print("计算时间区间...")
    begdate = enddate - datetime.timedelta(days=days)
    print("解析回帖数据...")
    buf = RFF.getPostDataList(buf)
    spostdate = getPostDatebyTimeDomain(begdate, enddate, buf)
    del buf
    print("开始统计.")
    #开始统计词频
    feqlist = []
    timeline = []
    x = 0
    xdate = begdate
    if days > 30:
        ommit_xlabel_per = days / 30  #忽略x label的个数
        ommit_xlabel_per -= 1  #同上
        while x <= days:
            feqlist.append(0)
            timeline.append(str(xdate.month) + "-" + str(xdate.day))
            xdate += datetime.timedelta(days=1)
            x += 1
            ppp = 0
            while ppp < ommit_xlabel_per and x <= days:
                feqlist.append(0)
                timeline.append("")
                xdate += datetime.timedelta(days=1)
                x += 1
                ppp += 1
        xdate -= datetime.timedelta(days=1)
        timeline[len(timeline) - 1] == str(xdate.date())
    else:
        while x < days:  #初始化频率数组
            feqlist.append(0)
            timeline.append(str(xdate.month) + "-" + str(xdate.day))
            xdate += datetime.timedelta(days=1)
            x += 1
    #sposdate:[ [内容,作者,时间],[......],...... ]
    for post in spostdate:
        if post[0].find(word) > -1:
            satpos = (datetime.datetime.strptime(post[2], "%Y-%m-%d %H:%M") -
                      begdate).days
            feqlist[satpos - 1] += 1
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间', '出现次数(帖子/回帖总数:' + str(len(spostdate)) + ')', timeline, feqlist,
        "【" + word + '】的时间频率图(' + str(begdate.date()) + "->" +
        str(enddate.date()) + ")")
    print('>>>>>图像加载完毕')
def singleWordTF(word, datalist, scale=30):
    #实现解析时间线,获取最小最大时间范围
    begtime, endtime = getTimeDomain(RFF.getDateList())
    print("对比日期范围:", begtime, "->", endtime)
    c = endtime - begtime
    blocks = int(c.days / scale)
    feqlist = []
    timeline = []
    x = 0
    #初始化频率数组
    print('>>>>>开始处理.....')
    xdate = begtime
    while x <= blocks:
        feqlist.append(0)
        timeline.append(str(xdate.date()))
        xdate += datetime.timedelta(days=scale)
        x += 1
    # [ [[帖子标题,作者,发帖时间] , [回帖列表:[回帖内容,作者,回帖时间],[回帖内容,作者,回帖时间],[[......]],.....]] ]
    for post in datalist:
        if post[0][0].find(word) > -1:
            titledate = datetime.datetime.strptime(post[0][2],
                                                   "%Y-%m-%d %H:%M")
            deltadate = titledate - begtime
            feqpos = int(deltadate.days / scale)
            feqlist[feqpos] += 1
        replylist = post[1]
        for reply in replylist:
            if len(reply) < 3:
                continue
            if reply[0].find(word) > -1:
                replydate = datetime.datetime.strptime(reply[2],
                                                       "%Y-%m-%d %H:%M")
                deltadate = replydate - begtime
                feqpos = int(deltadate.days / scale)
                feqlist[feqpos] += 1
    print('>>>>>处理完成,加载图像中.....')
    print(str(feqlist))
    print(str(timeline), str(feqlist))
    #开始绘图
    drawGraphic.linePlotGraphics(
        '时间',
        '出现次数(帖子/回帖总数:' + str(len(datalist * len(datalist[0][0][0]))) + ')',
        timeline, feqlist, '时间频率图(' + str(begtime) + "->" + str(endtime) + ")")
    print('>>>>>图像加载完毕')
Ejemplo n.º 6
0
def activeTimeAnaylize(authorname,days):
    buf = RFF.openResult()
    datebuf = RFF.getDateList(buf)
    begdate,enddate = getTimeDomain(datebuf)
    del datebuf
    spostdate = []
    buf = RFF.getPostDataList(buf)
    if days > 0:
        begdate = enddate - datetime.timedelta(days=days)
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    else:
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    del buf
    spostdate = getPostByAuthor(authorname,spostdate) #[[内容,时间],[...],...]
    tpostdata = sortandget(spostdate)
    tpostdata = gatherbyDays(tpostdata) # [  [date,[ countlist ]    ],    ]
    #for post in tpostdata:
    #    print(str(post))
    #开始分析活跃时间段
    #每天的情况都分析一次,然后叠加求均值
    # [  [date,[ countlist ]    ],    ]
    xvalue = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
    FEQLIST = []
    for post in tpostdata:
        feqlist = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
        for time in post[1]:
            hour = time.hour
            feqlist[hour]+=1
        FEQLIST.append(feqlist)
        print(str(feqlist))
    del tpostdata
    #平均下
    avgfeq = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
    hour = 0
    for x in avgfeq:
        sum = 0
        for hoursum in FEQLIST:
            sum+=hoursum[hour]
        avgfeq[hour] = sum
        hour+=1
    print("after add up all :\n\n",str(avgfeq))
    drawGraphic.linePlotGraphics('时间(小时)','发帖次数',xvalue,avgfeq,"【"+ authorname +'】的活跃时间段图(共 '+ str(len(FEQLIST)) +" 天数据)")
Ejemplo n.º 7
0
def showKeyWord(authorname,days):
    buf = RFF.openResult()
    datebuf = RFF.getDateList(buf)
    begdate,enddate = getTimeDomain(datebuf)
    del datebuf
    spostdate = []
    buf = RFF.getPostDataList(buf)
    if days > 0:
        begdate = enddate - datetime.timedelta(days=days)
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    else:
        spostdate = getPostDatebyTimeDomain(begdate,enddate,buf)
    del buf
    spostdate = getPostByAuthor(authorname,spostdate)
    dp = ""
    #开始统计关键词
    #合并回帖
    for post in spostdate:
        dp += "。" + post[0]
    del spostdate
    kd = jieba.analyse.extract_tags(dp, topK=10,allowPOS=( 'n', 'v'))
    print("\n\n贴吧ID:",authorname,":\n总计回帖长度(基于已有数据):",len(dp),"\n关键词:\n")
    feqlist = []
    sumfeq = 0
    for keyword in kd:
        print(keyword,end="\t")
        feqlist.append(0)
    print("\n\n")
    #显示条形图
    #统计词频
    ttt = 0
    for keyword in kd:
        feqlist[ttt] = dp.count(keyword)
        sumfeq+=feqlist[ttt]
        ttt+=1
    print(str(feqlist))
    drawGraphic.barHonGraphics("关键字","出现次数",kd,feqlist,"用户【"+authorname+"】的关键字")