Beispiel #1
0
 def generate_nextround_task(self, cntRound):
     self.currentRound = cntRound
     roundPrefix = self.synctask + '/' + self.jobType + '/round' + str(
         self.currentRound)
     log.info('clean ' + roundPrefix + ' first')
     clean.clean_with_prefix(self.desClient, self.destination['bucketName'],
                             roundPrefix)
     log.info('start to generate tasks(round ' + str(self.currentRound) +
              ') for ' + self.jobType)
     taskNumber = 0
     taskPrefix = self.synctask + '/' + self.jobType + '/round' + str(
         self.currentRound) + '/task'
     currentSize = 0
     currentFileNumbers = 0
     filelist = list()
     oldTaskDirs = self.desClient.get_alldir(
         self.destination['bucketName'], self.synctask + '/' +
         self.jobType + '/round' + str(self.currentRound - 1) + '/', '')
     for oldTaskDir in oldTaskDirs:
         lines = readdata.readlines(self.desClient,
                                    self.destination['bucketName'],
                                    oldTaskDir + 'taskstatus')
         line = (lines.readline()).strip('\n').strip()
         if line == 'done':
             fileName = 'errorlist'
         else:
             fileName = 'filelist'
         lines = readdata.readlines(self.desClient,
                                    self.destination['bucketName'],
                                    oldTaskDir + fileName)
         line = lines.readline()
         line = lines.readline()
         while line:
             line = line.strip('\n')
             objectSize = line.split('\t')[1].strip()
             filelist.append(line)
             currentSize += long(objectSize)
             currentFileNumbers += 1
             if currentSize >= self.taskSize or currentFileNumbers >= self.taskFileNumbers:
                 self.generate_a_task(filelist, currentFileNumbers,
                                      currentSize, taskNumber, taskPrefix)
                 if taskNumber == 0 and self.is_user_set_task_attributes == False:
                     self.waitfor_task0_finished()
                 taskNumber += 1
                 currentFileNumbers = 0
                 currentSize = 0
                 filelist[:] = []
             line = lines.readline()
     if currentSize != 0 or currentFileNumbers != 0:
         self.generate_a_task(filelist, currentFileNumbers, currentSize,
                              taskNumber, taskPrefix)
     log.info('generate tasks successfully')
     return True
Beispiel #2
0
 def recover_generating_round0_task_info(self, taskPrefix):
     taskNumber = 0
     self.allFiles = 0
     self.allSize = 0
     while self.desClient.does_object_exists(
             self.destination['bucketName'],
             taskPrefix + str(taskNumber) + '/taskstatus'):
         lines = readdata.readlines(
             self.desClient, self.destination['bucketName'],
             taskPrefix + str(taskNumber) + '/filelist')
         fileNumbers, size = lines.readline().strip('\n').strip().split(
             '\t')
         self.taskFileNumbers = long(fileNumbers)
         self.taskSize = long(size)
         self.allFiles += long(fileNumbers)
         self.allSize += long(size)
         taskNumber += 1
     # 放弃恢复的内容
     if taskNumber == 1:
         self.nextmarker = ''
         taskNumber = 0
         self.allFiles = 0
         self.allSize = 0
     log.info(
         'recover generating round0 task info successfully, taskNumber:' +
         str(taskNumber) + ' allfiles:' + str(self.allFiles) + ' allsize:' +
         str(self.allSize) + ' nextmarker:' + self.nextmarker)
     return taskNumber
Beispiel #3
0
def generate_errorlist(client, bucketName,jobType,synctask,currentRound,errorFileName):
    log.info('start to generate errorlist for '+jobType)
    try:
        errorFile = open(errorFileName,'a')
        errorFile.write('#################### '+synctask+' failed list ####################\n')
        taskDirs = client.get_alldir(bucketName,synctask+'/'+jobType+'/round'+str(currentRound)+'/')
        existFailedFile = False
        for mydir in taskDirs:
            lines = readdata.readlines(client, bucketName, mydir+'errorlist')
            line = lines.readline()
            line = lines.readline()
            while line:
                errorFile.write(line)
                line = lines.readline()
                existFailedFile = True
            errorFile.write('\n')
                
        errorFile.close()
        if not existFailedFile:
            log.info('there are no file failed,congratulation!')
        else:
            log.info('some file failed,and we save them in '+errorFileName+'. you can find why failed with it.')
        log.info('generate errorlist success')
    except (ossexception.GetObjectFailed,IOError, Exception), e:
        log.info(e)
        log.info('generate errorlist failed')
Beispiel #4
0
 def get_this_round_status(self, bucketName, keyName):
     try:
         lines = readdata.readlines(self.desClient, bucketName, keyName)
         line = (lines.readline()).strip('\n').strip()
         return line
     except (ossexception.GetObjectFailed, Exception), e:
         log.info(e)
         log.info(
             'the upper means that round is not finished, we will do it')
         return None
Beispiel #5
0
 def upload_list(self):
     lines = readdata.readlines(self.desClient, self.destination['bucketName'],self.keyForTaskPath+'taskstatus')
     lines.readline()
     taskID = lines.readline()
     if taskID != self.taskID:
         raise taskexception.TaskIDIsNotTheSame(taskID + ' and ' + self.taskID + ' are not the same!')
     if self.errorFileList:
         self.errorFileList.insert(0, str(self.failed)+'\t'+str(self.failedSize))
         self.desClient.put_object(self.destination['bucketName'], self.keyForTaskPath+'errorlist','\n'.join(self.errorFileList))
                    
     if self.md5List and self.jobType == type.JobType.Check:
         self.desClient.put_object(self.destination['bucketName'], self.keyForTaskPath+'md5list','\n'.join(self.md5List))
Beispiel #6
0
 def recover_this_round_task(self, cntRound):
     self.currentRound = cntRound
     self.startRound = cntRound
     #从本轮中获取已完成的task和未完成的task
     log.info('start to recover the round' + str(self.currentRound) +
              ' for ' + self.jobType)
     taskDirs = self.desClient.get_alldir(
         self.destination['bucketName'], self.synctask + '/' +
         self.jobType + '/round' + str(self.startRound) + '/', '')
     for taskdir in taskDirs:
         pair = taskdir.split('/')
         taskNumber = pair[4].strip()
         lines = readdata.readlines(self.desClient,
                                    self.destination['bucketName'],
                                    taskdir + 'taskstatus')
         line = (lines.readline()).strip('\n').strip()
         if line == 'done':
             log.info(taskNumber + ',finished already')
         else:
             reset_taskinfo(self.desClient, self.destination['bucketName'],
                            taskdir)
             lines = readdata.readlines(self.desClient,
                                        self.destination['bucketName'],
                                        taskdir + 'filelist')
             line = (lines.readline()).strip().strip()
             pair = line.split('\t')
             self.allFiles += long(pair[0].strip())
             self.allSize += long(pair[1].strip())
             task = TransferOrCheckTask(self.synctask, self.jobType,
                                        self.source, self.destination,
                                        taskdir, self.master_ip_port,
                                        self.check['mode'], self.sync)
             task.set_status(status.TASK.ready)
             self.put(task)
             log.info(
                 str(taskNumber) + ',fileNumbers:' + str(pair[0].strip()) +
                 ',taskSize:' + str(pair[1].strip()))
     log.info('recover the round' + str(self.currentRound) +
              ' successfully')
     return True
Beispiel #7
0
 def is_this_job_has_finished(self, jobType):
     try:
         lines = readdata.readlines(
             self.desClient, self.destination['bucketName'],
             self.synctask + '/' + jobType + '/' + jobType + 'status')
         line = lines.readline()
         if line == 'done':
             return True
     except (ossexception.GetObjectFailed, Exception), e:
         log.info(e)
         log.info('the upper means the ' + jobType +
                  ' is not finished, we will do it')
         return False
Beispiel #8
0
 def __init__(self, task):
     self.taskID = task.get_ID()
     self.sync = task.get_sync()
     self.source = task.get_source()
     self.destination = task.get_destination()
     self.fileType = self.source['filetype']
     self.jobType = task.get_jobType()
     self.keyForTaskPath = task.get_keyForTaskPath()
     self.checkMode = task.get_checkmode()
     if self.fileType == type.FileType.DiskFile:
         self.absolutePath = self.source['absolutepath']
     [self.srcClient, self.desClient] = self.create_clients()
     self.lines = readdata.readlines(self.desClient, self.destination['bucketName'], task.get_keyForTaskPath()+"filelist")
     line = self.lines.readline()
     pair = line.split('\t')
     self.fileNumbers = long(pair[0].strip())
     self.taskSize = long(pair[1].strip())
     self.initialClassVar()
Beispiel #9
0
 def __recover_generateDiskFileListTask_info(self, desClient, bucketName, prefix):
     try:
         filelistNumber = 0
         queue = deque()
         if desClient.does_object_exists(bucketName, prefix+'dirqueue'):
             lines = readdata.readlines(desClient, bucketName, prefix + 'dirqueue')
             line = lines.readline().strip('\n').strip()
             filelistNumber = long(line)
             line = lines.readline()
             while line:
                 line = line.strip('\n').strip()
                 if line == '':
                     continue
                 queue.append(line)
                 line = lines.readline()
         return [filelistNumber, queue]
     except Exception, e:
         return [filelistNumber, queue]
Beispiel #10
0
 def generate_checktask_from_transfer_round0(self):
     log.info('generate checktask from transfer round0')
     log.info('start to generate tasks(round ' + str(self.currentRound) +
              ') for ' + self.jobType)
     taskNumber = 0
     taskPrefix = self.synctask + '/' + self.jobType + '/round0/task'
     currentSize = 0
     currentFileNumbers = 0
     filelist = list()
     transferTaskDirs = self.desClient.get_alldir(
         self.destination['bucketName'],
         self.synctask + '/' + type.JobType.Transfer + '/round0/', '')
     try:
         for transferTaskDir in transferTaskDirs:
             lines = readdata.readlines(self.desClient,
                                        self.destination['bucketName'],
                                        transferTaskDir + 'filelist')
             line = lines.readline()
             line = lines.readline()
             while line:
                 line = line.strip('\n')
                 objectSize = line.split('\t')[1].strip()
                 filelist.append(line)
                 currentSize += long(objectSize)
                 currentFileNumbers += 1
                 if currentSize >= self.taskSize or currentFileNumbers >= self.taskFileNumbers:
                     self.generate_a_task(filelist, currentFileNumbers,
                                          currentSize, taskNumber,
                                          taskPrefix)
                     taskNumber += 1
                     currentFileNumbers = 0
                     currentSize = 0
                     filelist[:] = []
                 line = lines.readline()
         if currentSize != 0 or currentFileNumbers != 0:
             self.generate_a_task(filelist, currentFileNumbers, currentSize,
                                  taskNumber, taskPrefix)
         if self.allFiles == 0:
             return False
         return True
     except ossexception.GetObjectFailed, e:
         log.info(e)
         return False
Beispiel #11
0
def generate_md5list(client, bucketName,synctask,md5FileName):
    log.info('start to generate md5list')
    try:
        success = 0
        md5File = open(md5FileName,'a')
        md5File.write('#################### '+synctask+' md5 list ####################\nls')
        roundDirs = client.get_alldir(bucketName,synctask+'/'+type.JobType.Check+'/')
        for roundDir in roundDirs:
            if 'round' in roundDir:
                taskDirs = client.get_alldir(bucketName,roundDir)
                for mydir in taskDirs:
                    lines = readdata.readlines(client, bucketName, mydir+'md5list')
                    line = lines.readline()
                    if line.strip('\n').strip() == '':
                        continue
                    while line:
                        success += 1
                        md5File.write(line)
                        line = lines.readline()
                    md5File.write('\n')
        md5File.close()
        log.info('generate md5list success')
        return success
    except (ossexception.GetObjectFailed,IOError, Exception), e:
        log.info(e)
        log.info('generate md5list failed')
        return 0

    
    
        
        
        

        
                
                
                
                
                
                
                
                
Beispiel #12
0
 def get_filelist(self, client, bucketname, filelistName,
                  filelistStatusName):
     while True:
         try:
             lines = readdata.readlines(client, bucketname, filelistName)
             return lines
         except ossexception.GetObjectFailed, e:
             if client.does_object_exists(bucketname,
                                          filelistStatusName) == True:
                 log.info(e)
                 log.info(
                     'no more filelist can be read, the last is the last one'
                 )
                 return None
             else:
                 log.info(e)
                 log.info(
                     'the upper means generating disk-filelist is not finished'
                 )
                 sleep(10)