def procInterval(self, week):  #quser只能控制周为单位的,不能控制条为单位
     userToOpenPackage = {}
     match_dir = re.compile(
         os.path.join(self.qUserPath, '(' + '|'.join(week) + ')', '.*\.gz'))
     for (filename, _, files) in os.walk(self.qUserPath):
         for gzfile in files:
             gzfile_dir = os.path.join(filename, gzfile)
             if match_dir.search(gzfile_dir):
                 f = SepFile('|').open(gzfile_dir, mode='gzip', flag='rb')
                 for line in f:  #line[0] userId, line[1] packageName, line[2] openTimes
                     if line[0] not in userToOpenPackage:
                         userToOpenPackage[line[0]] = {}
                         if line[1] in self.qPackageToId:
                             if line[1] not in userToOpenPackage[line[0]]:
                                 userToOpenPackage[line[0]][line[1]] = int(
                                     line[2])
                             else:
                                 userToOpenPackage[line[0]][line[1]] += int(
                                     line[2])
                     else:
                         if line[1] in self.qPackageToId:
                             if line[1] not in userToOpenPackage[line[0]]:
                                 userToOpenPackage[line[0]][line[1]] = int(
                                     line[2])
                             else:
                                 userToOpenPackage[line[0]][line[1]] += int(
                                     line[2])
                 f.close()
     return userToOpenPackage
Exemple #2
0
    def getIdToQuser(mask=None):
        idToQuser = {}
        f = SepFile('|').open(Quser.QUSER_ID_TXT, 'txt', 'r')
        for line in f:
            if mask == None:
                idToQuser[line[1]] = line[0]
            else:
                if line[1] in mask:
                    idToQuser[line[1]] = line[0]
        f.close()
        return idToQuser


# if __name__ == '__main__':
#     user = set()
#     f = SepFile(',').open('/root/Downloads/look-alike/data/payQualityUsers/payQualityUsers.txt', 'txt', 'r')
#     for line in f:
#         username = line[0]
#         if len(line[0]) == 0 and len(line[1]) != 0:
#             username = line[1]
#         user.add(username)
#     f.close()
#     print len(user)
#     print 'return'


        
Exemple #3
0
 def getPackageToScore(mask=None, inOrOut=True):
     packToScore = {}
     f = SepFile(':')
     f.open(Qpackage.QPACKAGE_SCORE_TXT, mode='txt', flag='r')
     for line in f:
         if mask == None:
             packToScore[line[0]] = float(line[1])
         else:
             if inOrOut:
                 if line[0] in mask:
                     packToScore[line[0]] = float(line[1])
             else:
                 if line[0] not in mask:
                     packToScore[line[0]] = float(line[1])
     f.close()
     return packToScore
     
     
     
     
     
     
     
     
     
         
 def procInterval(self, weekInterval): #与Quser不一样因为我们不需要把7天的加起来
     userToOpenPackage = {}
     num_record = 0
     match_dir = re.compile(os.path.join(self.candPath, '(' + '|'.join(weekInterval) + ')', '.*\.gz'))
     for (filename, _, files) in os.walk(self.candPath):
         for gzfile in files:
             gzfile_dir = os.path.join(filename, gzfile)
             if match_dir.search(gzfile_dir):
                 f = SepFile('|').open(gzfile_dir, mode='gzip', flag='rb')
                 for line in f:
                     if line[0] not in userToOpenPackage:
                         if num_record >= self.max_record:
                             yield userToOpenPackage
                             userToOpenPackage = {}
                             userToOpenPackage[line[0]] = {}
                             if line[1] in self.qPackageToId:
                                 userToOpenPackage[line[0]][line[1]] = int(line[2])
                             num_record = 1
                         else:
                             userToOpenPackage[line[0]] = {}
                             if line[1] in self.qPackageToId:
                                 userToOpenPackage[line[0]][line[1]] = int(line[2])
                             num_record += 1
                     else:
                         if line[1] in self.qPackageToId:
                             if line[1] not in userToOpenPackage[line[0]]:
                                 userToOpenPackage[line[0]][line[1]] = int(line[2])
                             else:
                                 userToOpenPackage[line[0]][line[1]] += int(line[2])
                 f.close()
     yield userToOpenPackage
Exemple #5
0
 def writeQpackageToId():
     f = SepFile(':').open(Qpackage.QPACKAGE_SCORE_TXT, 'txt', 'r')
     idx = 0
     writer = LineFile().open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'w')
     for line in f:
         writer.writeLine(line[0] + '|' + str(idx))
         idx += 1
     writer.close()
     f.close()
Exemple #6
0
 def writeQpackageToId():
     f = SepFile(':').open(Qpackage.QPACKAGE_SCORE_TXT, 'txt', 'r')
     idx = 0
     writer = LineFile().open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'w')
     for line in f:
         writer.writeLine(line[0] + '|' + str(idx))
         idx += 1
     writer.close()
     f.close()
Exemple #7
0
 def getQuserToId(mask=None):
     qUserToId = {}
     f = SepFile('|').open(Quser.QUSER_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             qUserToId[line[0]] = line[1]
         else:
             if line[0] in mask:
                 qUserToId[line[0]] = line[1]
     f.close()
     return qUserToId
def getQuserOpenPackage(basePath='s3://datamining.ym/dmuser/ykang/results/qUserInLast5EachDay', 
                        beginDay='2016-01-24', 
                        interval_='30',
                        isForward='0',
                        s3DictBasePath='s3://datamining.ym/dmuser/ykang/data/spark.ouwan.qPackageToId',
                        isDownload=True):
    mconf = MissionConf().setAppName('getQuserOpenPackage')
    msc = MissionContext(conf=mconf)
    [_, appPath] = msc.getFolder()
    if isDownload:
        for theDay in getDaysGen(beginDay, int(interval_), int(isForward)):
            BashUtil.s3Cp(os.path.join(basePath,theDay), appPath+os.sep+theDay, recursived=True)
    openPackage = {}
    mask = {'imei=333333333333333':1, 'imei=123456789abcdef':1, 'imei=111111111111111':1, 'imei=012345678912345':1, 'imei=000000000000000':1, 'imei=00000000000000':1}
    for (filename, _, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename+os.sep+gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)), key=lambda k: openTimes[k], reverse=True)
    print 'sorted'
 
    writer = LineFile()
    writer.open(os.path.join(appPath, 'qUserOpenPackage.txt'), mode='txt', flag='w')
    for i in index:
        key = packs[i]
        value = openPackage[key]
        writer.writeLine(key + '|' + str(value))
    writer.close()

    #可以将qUserOpenPackageToOpenTimes写入到该位置Qpackage.QPACKAGE_ID_TXT
    index = 0; f = LineFile().open(Qpackage.QPACKAGE_ID_TXT, mode='txt', flag='w')
    for qPackage in openPackage:
        f.writeLine(qPackage + '|' + str(index))
        index += 1
    f.close()
    
    BashUtil.s3Cp(Qpackage.QPACKAGE_ID_TXT, dst=os.path.join(s3DictBasePath, 'qPackageToId.txt'), recursived=False)
    
    return openPackage
Exemple #9
0
 def getIdToPackage(mask=None):
     idToQpackage = {}
     f = SepFile('|').open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             idToQpackage[line[1]] = line[0]
         else:
             if line[1] in mask:
                 idToQpackage[line[1]] = line[0]
     f.close()
     return idToQpackage
Exemple #10
0
 def getQpackageToId(mask=None):
     print 'Info: loading QpackageToId'
     qPackageToId = {}
     f = SepFile('|').open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             qPackageToId[line[0]] = line[1]
         else:
             if line[0] in mask:
                 qPackageToId[line[0]] = line[1]
     f.close()
     return qPackageToId
Exemple #11
0
 def getCandidateToId(mask=None):
     print 'Info: Loading canditateToId'
     if mask == None:
         candToId = pickle.load(open(Candidate.CANDIDATES_ID_PICKLE, 'rb'))
     else:
         candToId = {}
         f = SepFile('|').open(Candidate.CANDIDATES_ID_TXT, 'txt', 'r')
         for line in f:
             if line[0] in mask:
                 candToId[line[0]] = line[1]
         f.close()
     return candToId
Exemple #12
0
 def getIdToCandidate(mask=None):
     print 'Info: Loading idToCandidate'
     idToCand = {}
     f = SepFile('|').open(Candidate.CANDIDATES_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             idToCand[line[1]] = line[0]
         else:
             if line[1] in mask:
                 idToCand[line[1]] = line[0]
     f.close()
     return idToCand
Exemple #13
0
 def getIdToCandidate(mask=None):
     print 'Info: Loading idToCandidate'
     idToCand = {}
     f = SepFile('|').open(Candidate.CANDIDATES_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             idToCand[line[1]] = line[0]
         else:
             if line[1] in mask:
                 idToCand[line[1]] = line[0]
     f.close()
     return idToCand
Exemple #14
0
 def getQpackageToId(mask=None):
     print 'Info: loading QpackageToId'
     qPackageToId = {}
     f = SepFile('|').open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             qPackageToId[line[0]] = line[1]
         else:
             if line[0] in mask:
                 qPackageToId[line[0]] = line[1]
     f.close()
     return qPackageToId
Exemple #15
0
 def getCandidateToId(mask=None):
     print 'Info: Loading canditateToId'
     if mask == None:
         candToId = pickle.load(open(Candidate.CANDIDATES_ID_PICKLE, 'rb'))
     else:
         candToId = {}
         f = SepFile('|').open(Candidate.CANDIDATES_ID_TXT, 'txt', 'r')
         for line in f:
             if line[0] in mask:
                 candToId[line[0]] = line[1]
         f.close()
     return candToId
def getQpackageToOpenTimes(appPath): #idf
    qPackageToOpenTimes = {}
    for (filename, _, files) in os.walk(appPath):
        for gzfile in files:
            print gzfile
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                reader = SepFile('|')
                reader.open(os.path.join(filename, gzfile), mode='gzip', flag='rb')
                for line in reader:
                    qPackageToOpenTimes[line[0]] = int(line[1])
                reader.close()
    return qPackageToOpenTimes
Exemple #17
0
 def writeQuserToId():
     qUser = set()
     f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r')
     for line in f:
         username = '******' + line[0]
         if len(line[0]) == 0 and len(line[1]) != 0:
             username = '******' + line[1]
         qUser.add(username)
     qUser = list(qUser)
     f.close()
     
     f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w')
     for i in range(len(qUser)):
         f.writeLine(qUser[i] + '|' + str(i))
     f.close()
Exemple #18
0
def getQpackageToOpenTimes(appPath):  #idf
    qPackageToOpenTimes = {}
    for (filename, _, files) in os.walk(appPath):
        for gzfile in files:
            print gzfile
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                reader = SepFile('|')
                reader.open(os.path.join(filename, gzfile),
                            mode='gzip',
                            flag='rb')
                for line in reader:
                    qPackageToOpenTimes[line[0]] = int(line[1])
                reader.close()
    return qPackageToOpenTimes
Exemple #19
0
 def getIdToQuser(mask=None, inOrOut=True):
     idToQuser = {}
     f = SepFile('|').open(Quser.QUSER_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             idToQuser[line[1]] = line[0]
         else:
             if inOrOut:
                 if line[1] in mask:
                     idToQuser[line[1]] = line[0]
             else:
                 if line[1] not in mask:
                     idToQuser[line[1]] = line[0]
     f.close()
     return idToQuser
Exemple #20
0
 def getIdToPackage(mask=None, inOrOut=True):
     idToQpackage = {}
     f = SepFile('|').open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             idToQpackage[line[1]] = line[0]
         else:
             if inOrOut:
                 if line[0] in mask:
                     idToQpackage[line[1]] = line[0]
             else:
                 if line[0] not in mask:
                     idToQpackage[line[1]] = line[0]
     f.close()
     return idToQpackage
Exemple #21
0
 def getIdToQuser(mask=None, inOrOut=True):
     idToQuser = {}
     f = SepFile('|').open(Quser.QUSER_ID_TXT, 'txt', 'r')
     for line in f:
         if mask == None:
             idToQuser[line[1]] = line[0]
         else:
             if inOrOut:
                 if line[1] in mask:
                     idToQuser[line[1]] = line[0]
             else:
                 if line[1] not in mask:
                     idToQuser[line[1]] = line[0]
     f.close()
     return idToQuser
Exemple #22
0
 def getPackageToScore(mask=None, inOrOut=True):
     packToScore = {}
     f = SepFile(':')
     f.open(Qpackage.QPACKAGE_SCORE_TXT, mode='txt', flag='r')
     for line in f:
         if mask == None:
             packToScore[line[0]] = float(line[1])
         else:
             if inOrOut:
                 if line[0] in mask:
                     packToScore[line[0]] = float(line[1])
             else:
                 if line[0] not in mask:
                     packToScore[line[0]] = float(line[1])
     f.close()
     return packToScore
Exemple #23
0
def getUserTotalNumber(s3Path, isDownload=True):
    mconf = MissionConf().setAppName('userTotalNumber')
    msc = MissionContext(conf=mconf)
    [self, appPath] = msc.getFolder()
    if isDownload:
        BashUtil.s3Cp(s3Path, appPath, recursived=True)
    userTotalNumber = 0
    for (filename, _, files) in os.walk(appPath):
        for gzfile in files:
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                reader = SepFile('|')
                reader.open(os.path.join(filename, gzfile), mode='gzip', flag='rb')
                for line in reader:
                    userTotalNumber += 1
                reader.close()
    return userTotalNumber
Exemple #24
0
 def writeQpackageToId(mask=None, inOrOut=True):
     f = SepFile(':').open(Qpackage.QPACKAGE_SCORE_TXT, 'txt', 'r')
     idx = 0
     writer = LineFile().open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'w')
     for line in f:
         if mask == None:
             writer.writeLine(line[0] + '|' + str(idx))
             idx += 1
         else:
             if inOrOut:
                 if line[0] in mask:
                     writer.writeLine(line[0] + '|' + str(idx))
                     idx += 1
             else:
                 if line[0] not in mask:
                     writer.writeLine(line[0] + '|' + str(idx))
                     idx += 1
     writer.close()
     f.close()
Exemple #25
0
 def writeQpackageToId(mask=None, inOrOut=True):
     f = SepFile(':').open(Qpackage.QPACKAGE_SCORE_TXT, 'txt', 'r')
     idx = 0
     writer = LineFile().open(Qpackage.QPACKAGE_ID_TXT, 'txt', 'w')
     for line in f:
         if mask == None:
             writer.writeLine(line[0] + '|' + str(idx))
             idx += 1
         else:
             if inOrOut:
                 if line[0] in mask:
                     writer.writeLine(line[0] + '|' + str(idx))
                     idx += 1
             else:
                 if line[0] not in mask:
                     writer.writeLine(line[0] + '|' + str(idx))
                     idx += 1
     writer.close()
     f.close()
Exemple #26
0
    def writeCandidateToId():
        print 'Info: Writing canditateToId'
        candidate = set()
        for pf in Candidate.PART_FILE_NAME:
            print 'Info: processing ' + pf
            f = SepFile('|').open(pf, 'gzip', 'rb')
            for line in f:
                candidate.add(line[0])
            f.close()
        candidate = list(candidate)
        writer = LineFile()
        writer.open(Candidate.CANDIDATES_ID_TXT, 'txt', 'w')
        candToId = {}
        for i in range(len(candidate)):
            candToId[candidate[i]] = str(i)
            writer.writeLine(candidate[i] + '|' + str(i))
        writer.close()

        del candidate
        gc.collect()
        pickle.dump(candToId, open(Candidate.CANDIDATES_ID_PICKLE, 'wb'), True)
Exemple #27
0
 def writeCandidateToId():
     print 'Info: Writing canditateToId'
     candidate = set()
     for pf in Candidate.PART_FILE_NAME:
         print 'Info: processing ' + pf
         f = SepFile('|').open(pf, 'gzip', 'rb')
         for line in f:
             candidate.add(line[0])
         f.close()
     candidate = list(candidate)
     writer = LineFile()
     writer.open(Candidate.CANDIDATES_ID_TXT, 'txt', 'w')
     candToId = {}
     for i in range(len(candidate)):
         candToId[candidate[i]] = str(i)
         writer.writeLine(candidate[i] + '|' + str(i))
     writer.close()
     
     del candidate
     gc.collect()
     pickle.dump(candToId, open(Candidate.CANDIDATES_ID_PICKLE, 'wb'), True)
Exemple #28
0
    def writeCandidateToId():
        candidates = set()
        match_dir = re.compile(os.path.join(Candidate.BASE_PATH, '.*\.gz'))
        for (filename, _, files) in os.walk(Candidate.BASE_PATH):
            for gzfile in files:
                gzfile_dir = os.path.join(filename, gzfile)
                if match_dir.search(gzfile_dir):
                    f = SepFile('|').open(gzfile_dir, mode='gzip', flag='r')
                    for line in f:
                        candidates.add(line[0])
                    f.close()
        candidates = list(candidates)
        writer = LineFile().open(Candidate.CANDIDATES_ID_TXT, mode='txt', flag='w')
        candToId = {}
        for i in range(len(candidates)):
            candToId[candidates[i]] = str(i)
            writer.writeLine(candidates[i] + '|' + str(i))
        writer.close()

        del candidates
        gc.collect()
        pickle.dump(candToId, open(Candidate.CANDIDATES_ID_PICKLE, 'wb'), True)
Exemple #29
0
    def writeQuserToId(mask=None, inOrOut=True):
        qUser = set()
        f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r')
        for line in f:
            username = '******' + line[0]
            if len(line[0]) == 0 and len(line[1]) != 0:
                username = '******' + line[1]
            if mask == None:
                qUser.add(username)
            else:
                if inOrOut:
                    if username in mask:
                        qUser.add(username)
                else:
                    if username not in mask:
                        qUser.add(username)
        qUser = list(qUser)
        f.close()

        f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w')
        for i in range(len(qUser)):
            f.writeLine(qUser[i] + '|' + str(i))
        f.close()
Exemple #30
0
 def writeQuserToId(mask=None, inOrOut=True):
     qUser = set()
     f = SepFile(',').open(Quser.TOTAL_QUSER_TXT, 'txt', 'r')
     for line in f:
         username = '******' + line[0]
         if len(line[0]) == 0 and len(line[1]) != 0:
             username = '******' + line[1]
         if mask == None:
             qUser.add(username)
         else:
             if inOrOut:
                 if username in mask:
                     qUser.add(username)
             else:
                 if username not in mask:
                     qUser.add(username)
     qUser = list(qUser)
     f.close()
     
     f = LineFile().open(Quser.QUSER_ID_TXT, 'txt', 'w')
     for i in range(len(qUser)):
         f.writeLine(qUser[i] + '|' + str(i))
     f.close()
 def procInterval(self, weekInterval):  #与Quser不一样因为我们不需要把7天的加起来
     userToOpenPackage = {}
     num_record = 0
     match_dir = re.compile(
         os.path.join(self.candPath, '(' + '|'.join(weekInterval) + ')',
                      '.*\.gz'))
     for (filename, _, files) in os.walk(self.candPath):
         for gzfile in files:
             gzfile_dir = os.path.join(filename, gzfile)
             if match_dir.search(gzfile_dir):
                 f = SepFile('|').open(gzfile_dir, mode='gzip', flag='rb')
                 for line in f:
                     if line[0] not in userToOpenPackage:
                         if num_record >= self.max_record:
                             yield userToOpenPackage
                             userToOpenPackage = {}
                             userToOpenPackage[line[0]] = {}
                             if line[1] in self.qPackageToId:
                                 userToOpenPackage[line[0]][line[1]] = int(
                                     line[2])
                             num_record = 1
                         else:
                             userToOpenPackage[line[0]] = {}
                             if line[1] in self.qPackageToId:
                                 userToOpenPackage[line[0]][line[1]] = int(
                                     line[2])
                             num_record += 1
                     else:
                         if line[1] in self.qPackageToId:
                             if line[1] not in userToOpenPackage[line[0]]:
                                 userToOpenPackage[line[0]][line[1]] = int(
                                     line[2])
                             else:
                                 userToOpenPackage[line[0]][line[1]] += int(
                                     line[2])
                 f.close()
     yield userToOpenPackage
Exemple #32
0
    def writeCandidateToId():
        candidates = set()
        match_dir = re.compile(os.path.join(Candidate.BASE_PATH, '.*\.gz'))
        for (filename, _, files) in os.walk(Candidate.BASE_PATH):
            for gzfile in files:
                gzfile_dir = os.path.join(filename, gzfile)
                if match_dir.search(gzfile_dir):
                    f = SepFile('|').open(gzfile_dir, mode='gzip', flag='r')
                    for line in f:
                        candidates.add(line[0])
                    f.close()
        candidates = list(candidates)
        writer = LineFile().open(Candidate.CANDIDATES_ID_TXT,
                                 mode='txt',
                                 flag='w')
        candToId = {}
        for i in range(len(candidates)):
            candToId[candidates[i]] = str(i)
            writer.writeLine(candidates[i] + '|' + str(i))
        writer.close()

        del candidates
        gc.collect()
        pickle.dump(candToId, open(Candidate.CANDIDATES_ID_PICKLE, 'wb'), True)
 def procInterval(self, week): #quser只能控制周为单位的,不能控制条为单位
     userToOpenPackage = {}
     match_dir = re.compile(os.path.join(self.qUserPath, '(' + '|'.join(week) + ')', '.*\.gz'))
     for (filename, _, files) in os.walk(self.qUserPath):
         for gzfile in files:
             gzfile_dir = os.path.join(filename, gzfile)
             if match_dir.search(gzfile_dir):
                 f = SepFile('|').open(gzfile_dir, mode='gzip', flag='rb')
                 for line in f: #line[0] userId, line[1] packageName, line[2] openTimes
                     if line[0] not in userToOpenPackage:
                         userToOpenPackage[line[0]] = {}
                         if line[1] in self.qPackageToId:
                             if line[1] not in userToOpenPackage[line[0]]:
                                 userToOpenPackage[line[0]][line[1]] = int(line[2])
                             else:
                                 userToOpenPackage[line[0]][line[1]] += int(line[2])
                     else:
                         if line[1] in self.qPackageToId:
                             if line[1] not in userToOpenPackage[line[0]]:
                                 userToOpenPackage[line[0]][line[1]] = int(line[2])
                             else:
                                 userToOpenPackage[line[0]][line[1]] += int(line[2])
                 f.close()
     return userToOpenPackage
Exemple #34
0
    }
    #mask = {}
    for (filename, dirs, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [name, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename + os.sep + gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)),
                   key=lambda k: openTimes[k],
                   reverse=True)
    print 'sorted'

    writer = LineFile()
    writer.open(appPath + os.sep + 'qUserOpenPackage.txt',
                mode='txt',
                flag='w')
    for i in index:
Exemple #35
0
def getQuserOpenPackage(
        basePath='s3://datamining.ym/dmuser/ykang/results/qUserInLast5EachDay',
        beginDay='2016-01-24',
        interval_='30',
        isForward='0',
        s3DictBasePath='s3://datamining.ym/dmuser/ykang/data/spark.ouwan.qUserOpenPackage',
        isDownload=True):
    mconf = MissionConf().setAppName('getQuserOpenPackage')
    msc = MissionContext(conf=mconf)
    [_, appPath] = msc.getFolder()
    if isDownload:
        for theDay in getDaysGen(beginDay, int(interval_), int(isForward)):
            BashUtil.s3Cp(os.path.join(basePath, theDay),
                          appPath + os.sep + theDay,
                          recursived=True)
    openPackage = {}
    mask = {
        'imei=333333333333333': 1,
        'imei=123456789abcdef': 1,
        'imei=111111111111111': 1,
        'imei=012345678912345': 1,
        'imei=000000000000000': 1,
        'imei=00000000000000': 1
    }
    for (filename, _, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [_, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename + os.sep + gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)),
                   key=lambda k: openTimes[k],
                   reverse=True)
    print 'sorted'

    writer = LineFile()
    writer.open(os.path.join(appPath, 'qUserOpenPackage.txt'),
                mode='txt',
                flag='w')
    for i in index:
        key = packs[i]
        value = openPackage[key]
        writer.writeLine(key + '|' + str(value))
    writer.close()

    BashUtil.s3Cp(os.path.join(appPath, 'qUserOpenPackage.txt'),
                  dst=os.path.join(s3DictBasePath, 'qUserOpenPackage.txt'),
                  recursived=False)
    return openPackage
    mask = {'imei=333333333333333':1, 'imei=123456789abcdef':1, 'imei=111111111111111':1, 'imei=012345678912345':1, 'imei=000000000000000':1, 'imei=00000000000000':1}
    #mask = {}
    for (filename, dirs, files) in os.walk(appPath):
        print filename
        for gzfile in files:
            [name, ext] = os.path.splitext(gzfile)
            if ext == '.gz':
                f = SepFile('|')
                f.open(filename+os.sep+gzfile, mode='gzip', flag='rb')
                for line in f:
                    if line[0] not in mask:
                        if line[1] not in openPackage:
                            openPackage[line[1]] = int(line[2])
                        else:
                            openPackage[line[1]] += int(line[2])
                f.close()
    openTimes = []
    print 'sorting'
    packs = openPackage.keys()
    for key in packs:
        openTimes.append(openPackage[key])
    index = sorted(range(len(openTimes)), key=lambda k: openTimes[k], reverse=True)
    print 'sorted'
 
    writer = LineFile()
    writer.open(appPath+os.sep+'qUserOpenPackage.txt', mode='txt', flag='w')
    for i in index:
        key = packs[i]
        value = openPackage[key]
        writer.writeLine(key + '|' + str(value))
    writer.close()