Esempio n. 1
0
def makeexpecteddata(hour=-1):
    resultfile1 = open(os.path.abspath(os.path.dirname(__file__)) + '/../inputdata/DailyPeerOnlineTime.txt', 'r')
    orglines1 = resultfile1.readlines()
    resultfile1.close()

    tempfile = open(os.path.abspath(os.path.dirname(__file__)) + '/../outputdata/DailyPeerOnlineTimeDBWeek1.txt', 'w')
    for line in orglines1:
        tmpstr = line.split(',')[1]
        if float(tmpstr) > 64000:
            newline = line.replace(',%s' % tmpstr, ',%s' % (float(tmpstr) + float(random.randint(0, 120))))
            tmpstr1 = newline.split(',')[2]
            newline1 = newline.replace(',%s' % tmpstr1, ',%s,%s' % ('NULL', tmpstr1))
        else:
            newline = line.replace(',%s' % tmpstr, ',%s' % (float(tmpstr) - float(random.randint(0, 180))))
            tmpstr1 = newline.split(',')[2]
            newline1 = newline.replace(',%s' % tmpstr1, ',%s,%s' % ('NULL', tmpstr1))
        tempfile.write(newline1)
    tempfile.close()

    pipeofodps.uploaddatatoodps(
        'output_daily_peer_online_time',
        os.path.abspath(os.path.dirname(__file__) + '/../outputdata/DailyPeerOnlineTimeDBWeek1.txt'),
        3)

    resultfile2 = open(os.path.abspath(os.path.dirname(__file__)) + '/../outputdata/DailyPeerOnlineTimeDBWeek1.txt', 'r')
    orglines2 = resultfile2.readlines()
    resultfile2.close()

    expectedfile = open(os.path.abspath(os.path.dirname(__file__)) + "/../inputdata/%s.txt"%__name__.split('.')[-1],"w")
    countlist={}
    sumcount={}

    for line in orglines1:
        prefix,avatime,actcount = line.split(',')
        actcount = actcount.replace('\n', '')
        if prefix not in countlist:
            countlist[prefix] = countlist.setdefault(prefix,0) + float(avatime) * int(actcount)
        if prefix not in sumcount:
            sumcount[prefix] = sumcount.setdefault(prefix,0) + int(actcount)

    for line in orglines2:
        prefix,avatime,_,actcount = line.split(',')
        actcount = actcount.replace('\n', '')
        if prefix not in countlist:
            countlist[prefix] = countlist.setdefault(prefix,0) + float(avatime) * int(actcount)
        if prefix not in sumcount:
            sumcount[prefix] = sumcount.setdefault(prefix,0) + int(actcount)
        else:
            countlist[prefix] = countlist[prefix] + float(avatime) * int(actcount)
            sumcount[prefix] = sumcount[prefix] + int(actcount)

    for pre, avetime in countlist.items():
        expectedfile.write('%s,%f,%s\n' % (pre,float(avetime/sumcount[pre]),' '))

    expectedfile.close()

    return os.path.abspath(os.path.dirname(__file__)) + "/../inputdata/%s.txt"%__name__.split('.')[-1]
Esempio n. 2
0
def createtabledata(classname):
    m=sys.modules['testdata.'+classname]
#     m=sys.modules['lib.platform.dataprocess.testdata.'+classname]
    attstr=dir(m)
    for str in attstr:
        att=getattr(m,str)
        if issubclass(att,Dataprovider):
            tmpObj = att()
            tmpfile = tmpObj.make_data()
            print tmpObj.gettablename()
            pipeofodps.uploaddatatoodps(tmpObj.gettablename(), tmpfile)
            break
Esempio n. 3
0
def createalldata():

    m = sys.modules['testdata.dataprovider']
    attstr=dir(m)
    for str in attstr:
        att=getattr(m,str)
        if type(att)==types.ModuleType: 
            subattstr=dir(att)
            for substr in subattstr:
                subatt = getattr(att, substr)
                if type(subatt)==types.TypeType and issubclass(subatt,m.Dataprovider):
                    tmpObj = subatt()
                    tmpfile = tmpObj.make_data()
                    print tmpObj.gettablename()
                    pipeofodps.uploaddatatoodps(tmpObj.gettablename() ,tmpfile)
                    break
def makeexpecteddata(hour=-1):
    resultfile = open(
        os.path.abspath(os.path.dirname(__file__)) +
        '/../outputdata/TodayFilePlayCountDB.txt', 'r')
    orgdownlines = resultfile.readlines()
    resultfile.close()

    tempfile = open(
        os.path.abspath(os.path.dirname(__file__)) +
        '/../outputdata/TodayFilePlayCountDBtmp.txt', 'w')
    for line in orgdownlines:
        tmpstr = line.split(',')[2]
        newline = line.replace(',%s' % tmpstr, ',%s,%s' % (tmpstr, tmpstr))
        tempfile.write(newline)
    tempfile.close()

    pipeofodps.uploaddatatoodps(
        'output_daily_file_play_count',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/TodayFilePlayCountDBtmp.txt'), -1)

    expectedfile = open(
        os.path.abspath(os.path.dirname(__file__)) +
        "/../inputdata/%s.txt" % __name__.split('.')[-1], "w")
    resultlist = {}
    for line in orgdownlines:
        url, username, count, _ = line.split(',')
        if url not in resultlist:
            resultlist[url] = {}
        resultlist[url][username] = resultlist[url].setdefault(
            username, 0.0) + float(float(count) * (1.0 + math.exp(-1)))

    for url, users in resultlist.items():
        for user, count in users.items():
            expectedfile.write('%s,%s,%f,%s\n' % (url, user, count, ''))

    expectedfile.close()

    return os.path.abspath(os.path.dirname(
        __file__)) + "/../inputdata/%s.txt" % __name__.split('.')[-1]
def makeexpecteddata(hour=-1):
    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyProvinceFilePlayCountDB.txt',
            'r') as resultfile1:
        orglines1 = resultfile1.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyProvinceFilePlayCountDBWeek1.txt',
            'w') as tempfile:
        for line in orglines1:
            tmpstr = line.split(',')[2]
            if int(tmpstr) % 2 == 0:
                newline = line.replace(
                    ',%s' % tmpstr,
                    ',%s' % (int(tmpstr) + random.randint(0, 10)))
            else:
                newline = line.replace(
                    ',%s' % tmpstr,
                    ',%s' % (int(tmpstr) - random.randint(0, 6)))
            tempfile.write(newline)

    pipeofodps.uploaddatatoodps(
        'output_daily_province_file_play_count',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/DailyProvinceFilePlayCountDBWeek1.txt'), 2)

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyProvinceFilePlayCountDBWeek1.txt',
            'r') as resultfile2:
        orglines2 = resultfile2.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            "/../inputdata/%s.txt" % __name__.split('.')[-1],
            "w") as expectedfile:
        resultlist = {}

        for line in orglines1:
            province, username, playcount, _ = line.split(',')
            if province not in resultlist:
                resultlist[province] = {}
            if username not in resultlist[province]:
                resultlist[province][username] = int(playcount)
            else:
                resultlist[province][
                    username] = resultlist[province][username] + int(playcount)

        for line in orglines2:
            province, username, playcount, _ = line.split(',')
            if province not in resultlist:
                resultlist[province] = {}
            if username not in resultlist[province]:
                resultlist[province][username] = int(playcount)
            else:
                resultlist[province][
                    username] = resultlist[province][username] + int(playcount)

        for pro, value in resultlist.items():
            for user, count in value.items():
                expectedfile.write('%s,%s,%d\n' % (pro, user, count))

    return os.path.abspath(os.path.dirname(
        __file__)) + "/../inputdata/%s.txt" % __name__.split('.')[-1]
def makeexpecteddata(hour=-1):

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../testdata/PeerInfoCleaned.txt', 'r') as resultfile:
        orglines = resultfile.readlines()

    # prepare middle table data and upload to odps. middle data will be involved in making the expected data create
    peeridlist = []
    orglinestmp = []
    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../testdata/%stmp.txt' % __name__.split('.')[-1],
            "w") as tmpfile:
        linecount = 0
        for line in orglines:
            _, _, peerid, _, _, _, _, public_ip, _, _, _, _, _, _, _ = line.split(
                ',')
            if peerid not in peeridlist:
                if linecount % 100 == 0:
                    peerid = peerid[:-3] + 'xyz'
                username = datavars.name_list[peerid[:8]]
                ispinfo = datavars.ip2isp[public_ip].split(',')[1]
                proviceinfo = datavars.ip2isp[public_ip].split(',')[0]
                tmpstr = '%s,%s,%s,%s\n' % (peerid, ispinfo, proviceinfo,
                                            username)
                orglinestmp.append(tmpstr)
                tmpfile.write(tmpstr)
            linecount += 1
            peeridlist.append(peerid)
    pipeofodps.uploaddatatoodps(
        'output_all_peer_isp_province_distribute',
        os.path.abspath(os.path.dirname(__file__)) +
        '/../testdata/%stmp.txt' % __name__.split('.')[-1], -1)

    ispresultlist = {}
    ispresultlist1 = {}
    proviceresultlist = {}
    proviceresultlist1 = {}

    for line in orglines:
        _, _, peerid, _, _, _, _, public_ip, _, _, _, _, _, _, _ = line.split(
            ',')
        if peerid not in peeridlist:
            username = datavars.name_list[peerid[:8]]
            ispinfo = datavars.ip2isp[public_ip].split(',')[1]
            proviceinfo = datavars.ip2isp[public_ip].split(',')[0]
            tmpstr = '%s,%s,%s,%s\n' % (peerid, ispinfo, proviceinfo, username)
            orglinestmp.append(tmpstr)
            peeridlist.append(peerid)

    for tmpline in orglinestmp:
        peerid, ispinfo, proviceinfo, username = tmpline.split(',')
        username = username.replace('\n', '')
        if username not in ispresultlist:
            ispresultlist[username] = {}
        ispresultlist[username][ispinfo] = ispresultlist[username].setdefault(
            ispinfo, 0) + 1
        ispresultlist1[ispinfo] = ispresultlist1.setdefault(ispinfo, 0) + 1
        if username not in proviceresultlist:
            proviceresultlist[username] = {}
        proviceresultlist[username][
            proviceinfo] = proviceresultlist[username].setdefault(
                proviceinfo, 0) + 1
        proviceresultlist1[proviceinfo] = proviceresultlist1.setdefault(
            proviceinfo, 0) + 1

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            "/../inputdata/%sISP.txt" % __name__.split('.')[-1],
            "w") as expectedfile:
        for username, ipsinfos in ispresultlist.items():
            for ips, count in ipsinfos.items():
                expectedfile.write('%s,%s,%d\n' % (username, ips, count))
        for ips, count in ispresultlist1.items():
            expectedfile.write('%s,%s,%d\n' % ('all', ips, count))

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            "/../inputdata/%sProvince.txt" % __name__.split('.')[-1],
            "w") as expectedfile:
        for username, proviceinfos in proviceresultlist.items():
            for provice, count in proviceinfos.items():
                expectedfile.write('%s,%s,%d\n' % (username, provice, count))
        for provice, count in proviceresultlist1.items():
            expectedfile.write('%s,%s,%d\n' % ('all', provice, count))

    return (os.path.abspath(os.path.dirname(__file__)) +
            "/../inputdata/%sISP.txt" % __name__.split('.')[-1],
            os.path.abspath(os.path.dirname(__file__)) +
            "/../inputdata/%sProvince.txt" % __name__.split('.')[-1])
Esempio n. 7
0
def makeexpecteddata(hour=-1):
    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../inputdata/DailyPeerActivity.txt', 'r') as resultfile:
        orglines = resultfile.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyPeerActivityDBWeek1.txt', 'w') as tempfile1:
        for line in orglines:
            randomnum = random.randint(150, 300)
            tmpcount = line.split(',')[1]

            newline = line.replace(',%s' % tmpcount,
                                   ',%s' % str(int(tmpcount) * randomnum))
            tmpstr1 = newline.split(',')[1]
            newline1 = newline.replace(',%s' % tmpstr1,
                                       ',%s,%s\n' % (tmpstr1, 'NULL'))

            tempfile1.write(newline1)

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyPeerActivityDBWeek2.txt', 'w') as tempfile2:
        for line in orglines:
            randomnum = random.randint(150, 300)
            tmpcount = line.split(',')[1]

            newline = line.replace(',%s' % tmpcount,
                                   ',%s' % str(int(tmpcount) * randomnum))
            tmpstr1 = newline.split(',')[1]
            newline1 = newline.replace(',%s' % tmpstr1,
                                       ',%s,%s\n' % (tmpstr1, 'NULL'))

            tempfile2.write(newline1)

    pipeofodps.uploaddatatoodps(
        'output_daily_peer_activity',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/DailyPeerActivityDBWeek1.txt'), 3)

    pipeofodps.uploaddatatoodps(
        'output_daily_peer_activity',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/DailyPeerActivityDBWeek2.txt'), 7)

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyPeerActivityDBWeek1.txt', 'r') as resultfile1:
        orglines1 = resultfile1.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyPeerActivityDBWeek2.txt', 'r') as resultfile2:
        orglines2 = resultfile2.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            "/../inputdata/%s.txt" % __name__.split('.')[-1],
            "w") as expectedfile:
        sumcount = {}

        for line in orglines:
            prefix, actcount = line.split(',')
            actcount = actcount.replace('\n', '')
            if prefix not in sumcount:
                sumcount[prefix] = sumcount.setdefault(prefix,
                                                       0) + int(actcount)

        for line in orglines1:
            prefix, actcount, _ = line.split(',')
            if prefix not in sumcount:
                sumcount[prefix] = sumcount.setdefault(prefix,
                                                       0) + int(actcount)
            else:
                sumcount[prefix] = sumcount[prefix] + int(actcount)

        for line in orglines2:
            prefix, actcount, _ = line.split(',')
            if prefix not in sumcount:
                sumcount[prefix] = sumcount.setdefault(prefix,
                                                       0) + int(actcount)
            else:
                sumcount[prefix] = sumcount[prefix] + int(actcount)

        for pre, count in sumcount.items():
            if (pre != '99999999'):
                expectedfile.write('%s,%d\n' % (pre, int(count / 30)))

    return os.path.abspath(os.path.dirname(
        __file__)) + "/../inputdata/%s.txt" % __name__.split('.')[-1]
Esempio n. 8
0
def makeexpecteddata(hour=-1):
    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyTotalFlowDB.txt', 'r') as resultfile:
        orglines = resultfile.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyTotalFlowDBWeek1.txt', 'w') as tempfile1:
        for line in orglines:
            tempfile1.write(line)

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyTotalFlowDBWeek2.txt', 'w') as tempfile2:
        for line in orglines:
            tempfile2.write(line)

    pipeofodps.uploaddatatoodps(
        'output_daily_total_flow',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/DailyTotalFlowDBWeek1.txt'), -7, False)

    pipeofodps.uploaddatatoodps(
        'output_daily_total_flow',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/DailyTotalFlowDBWeek2.txt'), 7, False)

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyTotalFlowDBWeek1.txt', 'r') as resultfile1:
        orglines1 = resultfile1.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            '/../outputdata/DailyTotalFlowDBWeek2.txt', 'r') as resultfile2:
        orglines2 = resultfile2.readlines()

    with open(
            os.path.abspath(os.path.dirname(__file__)) +
            "/../inputdata/%s.txt" % __name__.split('.')[-1],
            "w") as expectedfile:
        resultlist = {}

        for line in orglines:
            username, cdndown, p2pdown, totaldown, upload, typ = line.split(
                ',')
            typ = typ.replace('\n', '')
            if username not in resultlist:
                resultlist[username] = {}
            if typ not in resultlist[username]:
                resultlist[username][typ] = {}
            resultlist[username][typ][
                'cdndown'] = resultlist[username][typ].setdefault(
                    'cdndown', 0) + int(cdndown)
            resultlist[username][typ][
                'p2pdown'] = resultlist[username][typ].setdefault(
                    'p2pdown', 0) + int(p2pdown)
            resultlist[username][typ][
                'totaldown'] = resultlist[username][typ].setdefault(
                    'totaldown', 0) + int(totaldown)
            resultlist[username][
                typ]['upload'] = resultlist[username][typ].setdefault(
                    'upload', 0) + int(upload)

        for line in orglines1:
            username, cdndown, p2pdown, totaldown, upload, typ = line.split(
                ',')
            typ = typ.replace('\n', '')
            if username not in resultlist:
                resultlist[username] = {}
            if typ not in resultlist[username]:
                resultlist[username][typ] = {}
            resultlist[username][typ]['cdndown'] = resultlist[username][typ][
                'cdndown'] + int(cdndown)
            resultlist[username][typ]['p2pdown'] = resultlist[username][typ][
                'p2pdown'] + int(p2pdown)
            resultlist[username][typ]['totaldown'] = resultlist[username][typ][
                'totaldown'] + int(totaldown)
            resultlist[username][typ][
                'upload'] = resultlist[username][typ]['upload'] + int(upload)

        for line in orglines2:
            username, cdndown, p2pdown, totaldown, upload, typ = line.split(
                ',')
            typ = typ.replace('\n', '')
            if username not in resultlist:
                resultlist[username] = {}
            if typ not in resultlist[username]:
                resultlist[username][typ] = {}
            resultlist[username][typ]['cdndown'] = resultlist[username][typ][
                'cdndown'] + int(cdndown)
            resultlist[username][typ]['p2pdown'] = resultlist[username][typ][
                'p2pdown'] + int(p2pdown)
            resultlist[username][typ]['totaldown'] = resultlist[username][typ][
                'totaldown'] + int(totaldown)
            resultlist[username][typ][
                'upload'] = resultlist[username][typ]['upload'] + int(upload)

        for user, value1 in resultlist.items():
            for typeloc, value2 in value1.items():
                expectedfile.write(
                    '%s,%d,%d,%d,%d,%s\n' %
                    (user, resultlist[user][typeloc]['cdndown'],
                     resultlist[user][typeloc]['p2pdown'],
                     resultlist[user][typeloc]['totaldown'],
                     resultlist[user][typeloc]['upload'], typeloc))

    return os.path.abspath(os.path.dirname(
        __file__)) + "/../inputdata/%s.txt" % __name__.split('.')[-1]
def makeexpecteddata(hour=-1):
    resultfile = open(
        os.path.abspath(os.path.dirname(__file__)) +
        '/../outputdata/DailyStartTimeCountDB.txt', 'r')
    orglines = resultfile.readlines()
    resultfile.close()

    tempfile1 = open(
        os.path.abspath(os.path.dirname(__file__)) +
        '/../outputdata/DailyStartTimeCountDBWeek1.txt', 'w')
    for line in orglines:
        tmpstr = line.split(',')[1]
        tmpcount = line.split(',')[2]
        if int(tmpstr) > 3000:
            newline = line.replace(
                ',%s' % tmpcount,
                ',%s' % (int(tmpcount) + random.randint(0, 10)))
        else:
            newline = line.replace(
                ',%s' % tmpcount,
                ',%s' % (int(tmpcount) - random.randint(0, 6)))
        tempfile1.write(newline)
    tempfile1.close()

    tempfile2 = open(
        os.path.abspath(os.path.dirname(__file__)) +
        '/../outputdata/DailyStartTimeCountDBWeek2.txt', 'w')
    for line in orglines:
        tmpstr = line.split(',')[1]
        tmpcount = line.split(',')[2]
        if int(tmpstr) < 3000:
            newline = line.replace(
                ',%s' % tmpcount,
                ',%s' % (int(tmpcount) + random.randint(0, 10)))
        else:
            newline = line.replace(
                ',%s' % tmpcount,
                ',%s' % (int(tmpcount) - random.randint(0, 6)))
        tempfile2.write(newline)
    tempfile2.close()

    pipeofodps.uploaddatatoodps(
        'output_daily_start_time_count',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/DailyStartTimeCountDBWeek1.txt'), -7)

    pipeofodps.uploaddatatoodps(
        'output_daily_start_time_count',
        os.path.abspath(
            os.path.dirname(__file__) +
            '/../outputdata/DailyStartTimeCountDBWeek2.txt'), -13)

    resultfile1 = open(
        os.path.abspath(os.path.dirname(__file__)) +
        '/../outputdata/DailyStartTimeCountDBWeek1.txt', 'r')
    orglines1 = resultfile1.readlines()
    resultfile1.close()

    resultfile2 = open(
        os.path.abspath(os.path.dirname(__file__)) +
        '/../outputdata/DailyStartTimeCountDBWeek2.txt', 'r')
    orglines2 = resultfile2.readlines()
    resultfile2.close()

    expectedfile = open(
        os.path.abspath(os.path.dirname(__file__)) +
        "/../inputdata/%s.txt" % __name__.split('.')[-1], "w")
    resultlist = {}

    for line in orglines:
        username, period, count, _ = line.split(',')
        if username not in resultlist:
            resultlist[username] = {}
        if period not in resultlist[username]:
            resultlist[username][period] = int(count)
        else:
            resultlist[username][period] = resultlist[username][period] + int(
                count)

    for line in orglines1:
        username, period, count, _ = line.split(',')
        if username not in resultlist:
            resultlist[username] = {}
        if period not in resultlist[username]:
            resultlist[username][period] = int(count)
        else:
            resultlist[username][period] = resultlist[username][period] + int(
                count)

    for line in orglines2:
        username, period, count, _ = line.split(',')
        if username not in resultlist:
            resultlist[username] = {}
        if period not in resultlist[username]:
            resultlist[username][period] = int(count)
        else:
            resultlist[username][period] = resultlist[username][period] + int(
                count)

    for username, value in resultlist.items():
        for period, count in value.items():
            expectedfile.write('%s,%s,%d,%s\n' % (username, period, count, ''))

    expectedfile.close()

    return os.path.abspath(os.path.dirname(
        __file__)) + "/../inputdata/%s.txt" % __name__.split('.')[-1]