Example #1
0
 def getPackageToScore(mask=None, inOrOut=True):
     packToScore = {}
     f = SepFile(':')
     f.open(Qpackage.QPACKAGE_SCORE_TXT, mode='txt', flag='r')
     for line in f:
         if mask == None:
             packToScore[line[0]] = float(line[1])
         else:
             if inOrOut:
                 if line[0] in mask:
                     packToScore[line[0]] = float(line[1])
             else:
                 if line[0] not in mask:
                     packToScore[line[0]] = float(line[1])
     f.close()
     return packToScore
     
     
     
     
     
     
     
     
     
         
def getQuserOpenPackage(basePath='s3://datamining.ym/dmuser/ykang/results/qUserInLast5EachDay', 
                        beginDay='2016-01-24', 
                        interval_='30',
                        isForward='0',
                        s3DictBasePath='s3://datamining.ym/dmuser/ykang/data/spark.ouwan.qPackageToId',
                        isDownload=True):
    mconf = MissionConf().setAppName('getQuserOpenPackage')
    msc = MissionContext(conf=mconf)
    [_, appPath] = msc.getFolder()
    if isDownload:
        for theDay in getDaysGen(beginDay, int(interval_), int(isForward)):
            BashUtil.s3Cp(os.path.join(basePath,theDay), appPath+os.sep+theDay, recursived=True)
    openPackage = {}
    mask = {'imei=333333333333333':1, 'imei=123456789abcdef':1, 'imei=111111111111111':1, 'imei=012345678912345':1, 'imei=000000000000000':1, 'imei=00000000000000':1}
    for (filename, _, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename+os.sep+gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)), key=lambda k: openTimes[k], reverse=True)
    print 'sorted'
 
    writer = LineFile()
    writer.open(os.path.join(appPath, 'qUserOpenPackage.txt'), mode='txt', flag='w')
    for i in index:
        key = packs[i]
        value = openPackage[key]
        writer.writeLine(key + '|' + str(value))
    writer.close()

    #可以将qUserOpenPackageToOpenTimes写入到该位置Qpackage.QPACKAGE_ID_TXT
    index = 0; f = LineFile().open(Qpackage.QPACKAGE_ID_TXT, mode='txt', flag='w')
    for qPackage in openPackage:
        f.writeLine(qPackage + '|' + str(index))
        index += 1
    f.close()
    
    BashUtil.s3Cp(Qpackage.QPACKAGE_ID_TXT, dst=os.path.join(s3DictBasePath, 'qPackageToId.txt'), recursived=False)
    
    return openPackage
def getQpackageToOpenTimes(appPath): #idf
    qPackageToOpenTimes = {}
    for (filename, _, files) in os.walk(appPath):
        for gzfile in files:
            print gzfile
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                reader = SepFile('|')
                reader.open(os.path.join(filename, gzfile), mode='gzip', flag='rb')
                for line in reader:
                    qPackageToOpenTimes[line[0]] = int(line[1])
                reader.close()
    return qPackageToOpenTimes
Example #4
0
def getQpackageToOpenTimes(appPath):  #idf
    qPackageToOpenTimes = {}
    for (filename, _, files) in os.walk(appPath):
        for gzfile in files:
            print gzfile
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                reader = SepFile('|')
                reader.open(os.path.join(filename, gzfile),
                            mode='gzip',
                            flag='rb')
                for line in reader:
                    qPackageToOpenTimes[line[0]] = int(line[1])
                reader.close()
    return qPackageToOpenTimes
Example #5
0
 def getPackageToScore(mask=None, inOrOut=True):
     packToScore = {}
     f = SepFile(':')
     f.open(Qpackage.QPACKAGE_SCORE_TXT, mode='txt', flag='r')
     for line in f:
         if mask == None:
             packToScore[line[0]] = float(line[1])
         else:
             if inOrOut:
                 if line[0] in mask:
                     packToScore[line[0]] = float(line[1])
             else:
                 if line[0] not in mask:
                     packToScore[line[0]] = float(line[1])
     f.close()
     return packToScore
Example #6
0
def getUserTotalNumber(s3Path, isDownload=True):
    mconf = MissionConf().setAppName('userTotalNumber')
    msc = MissionContext(conf=mconf)
    [self, appPath] = msc.getFolder()
    if isDownload:
        BashUtil.s3Cp(s3Path, appPath, recursived=True)
    userTotalNumber = 0
    for (filename, _, files) in os.walk(appPath):
        for gzfile in files:
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                reader = SepFile('|')
                reader.open(os.path.join(filename, gzfile), mode='gzip', flag='rb')
                for line in reader:
                    userTotalNumber += 1
                reader.close()
    return userTotalNumber
    [_, appPath] = msc.getFolder()
    
    basePath = 's3://datamining.ym/dmuser/ykang/results/qUserInLast5EachDay'

    for theDay in getDaysGen('2016-01-24', 30, 0):
        BashUtil.s3Cp(basePath+os.sep+theDay, appPath+os.sep+theDay, recursived=True)
    openPackage = {}
    mask = {'imei=333333333333333':1, 'imei=123456789abcdef':1, 'imei=111111111111111':1, 'imei=012345678912345':1, 'imei=000000000000000':1, 'imei=00000000000000':1}
    #mask = {}
    for (filename, dirs, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [name, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename+os.sep+gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)), key=lambda k: openTimes[k], reverse=True)
    print 'sorted'
 
Example #8
0
def getQuserOpenPackage(
        basePath='s3://datamining.ym/dmuser/ykang/results/qUserInLast5EachDay',
        beginDay='2016-01-24',
        interval_='30',
        isForward='0',
        s3DictBasePath='s3://datamining.ym/dmuser/ykang/data/spark.ouwan.qUserOpenPackage',
        isDownload=True):
    mconf = MissionConf().setAppName('getQuserOpenPackage')
    msc = MissionContext(conf=mconf)
    [_, appPath] = msc.getFolder()
    if isDownload:
        for theDay in getDaysGen(beginDay, int(interval_), int(isForward)):
            BashUtil.s3Cp(os.path.join(basePath, theDay),
                          appPath + os.sep + theDay,
                          recursived=True)
    openPackage = {}
    mask = {
        'imei=333333333333333': 1,
        'imei=123456789abcdef': 1,
        'imei=111111111111111': 1,
        'imei=012345678912345': 1,
        'imei=000000000000000': 1,
        'imei=00000000000000': 1
    }
    for (filename, _, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename + os.sep + gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)),
                   key=lambda k: openTimes[k],
                   reverse=True)
    print 'sorted'

    writer = LineFile()
    writer.open(os.path.join(appPath, 'qUserOpenPackage.txt'),
                mode='txt',
                flag='w')
    for i in index:
        key = packs[i]
        value = openPackage[key]
        writer.writeLine(key + '|' + str(value))
    writer.close()

    BashUtil.s3Cp(os.path.join(appPath, 'qUserOpenPackage.txt'),
                  dst=os.path.join(s3DictBasePath, 'qUserOpenPackage.txt'),
                  recursived=False)
    return openPackage
Example #9
0
 mask = {
     'imei=333333333333333': 1,
     'imei=123456789abcdef': 1,
     'imei=111111111111111': 1,
     'imei=012345678912345': 1,
     'imei=000000000000000': 1,
     'imei=00000000000000': 1
 }
 #mask = {}
 for (filename, dirs, files) in os.walk(appPath):
     print filename
     for gzfile in files:
         [name, ext] = os.path.splitext(gzfile)
         if ext == '.gz':
             f = SepFile('|')
             f.open(filename + os.sep + gzfile, mode='gzip', flag='rb')
             for line in f:
                 if line[0] not in mask:
                     if line[1] not in openPackage:
                         openPackage[line[1]] = int(line[2])
                     else:
                         openPackage[line[1]] += int(line[2])
             f.close()
 openTimes = []
 print 'sorting'
 packs = openPackage.keys()
 for key in packs:
     openTimes.append(openPackage[key])
 index = sorted(range(len(openTimes)),
                key=lambda k: openTimes[k],
                reverse=True)