def generate_nextround_task(self, cntRound): self.currentRound = cntRound roundPrefix = self.synctask + '/' + self.jobType + '/round' + str( self.currentRound) log.info('clean ' + roundPrefix + ' first') clean.clean_with_prefix(self.desClient, self.destination['bucketName'], roundPrefix) log.info('start to generate tasks(round ' + str(self.currentRound) + ') for ' + self.jobType) taskNumber = 0 taskPrefix = self.synctask + '/' + self.jobType + '/round' + str( self.currentRound) + '/task' currentSize = 0 currentFileNumbers = 0 filelist = list() oldTaskDirs = self.desClient.get_alldir( self.destination['bucketName'], self.synctask + '/' + self.jobType + '/round' + str(self.currentRound - 1) + '/', '') for oldTaskDir in oldTaskDirs: lines = readdata.readlines(self.desClient, self.destination['bucketName'], oldTaskDir + 'taskstatus') line = (lines.readline()).strip('\n').strip() if line == 'done': fileName = 'errorlist' else: fileName = 'filelist' lines = readdata.readlines(self.desClient, self.destination['bucketName'], oldTaskDir + fileName) line = lines.readline() line = lines.readline() while line: line = line.strip('\n') objectSize = line.split('\t')[1].strip() filelist.append(line) currentSize += long(objectSize) currentFileNumbers += 1 if currentSize >= self.taskSize or currentFileNumbers >= self.taskFileNumbers: self.generate_a_task(filelist, currentFileNumbers, currentSize, taskNumber, taskPrefix) if taskNumber == 0 and self.is_user_set_task_attributes == False: self.waitfor_task0_finished() taskNumber += 1 currentFileNumbers = 0 currentSize = 0 filelist[:] = [] line = lines.readline() if currentSize != 0 or currentFileNumbers != 0: self.generate_a_task(filelist, currentFileNumbers, currentSize, taskNumber, taskPrefix) log.info('generate tasks successfully') return True
def recover_generating_round0_task_info(self, taskPrefix): taskNumber = 0 self.allFiles = 0 self.allSize = 0 while self.desClient.does_object_exists( self.destination['bucketName'], taskPrefix + str(taskNumber) + '/taskstatus'): lines = readdata.readlines( self.desClient, self.destination['bucketName'], taskPrefix + str(taskNumber) + '/filelist') fileNumbers, size = lines.readline().strip('\n').strip().split( '\t') self.taskFileNumbers = long(fileNumbers) self.taskSize = long(size) self.allFiles += long(fileNumbers) self.allSize += long(size) taskNumber += 1 # 放弃恢复的内容 if taskNumber == 1: self.nextmarker = '' taskNumber = 0 self.allFiles = 0 self.allSize = 0 log.info( 'recover generating round0 task info successfully, taskNumber:' + str(taskNumber) + ' allfiles:' + str(self.allFiles) + ' allsize:' + str(self.allSize) + ' nextmarker:' + self.nextmarker) return taskNumber
def generate_errorlist(client, bucketName,jobType,synctask,currentRound,errorFileName): log.info('start to generate errorlist for '+jobType) try: errorFile = open(errorFileName,'a') errorFile.write('#################### '+synctask+' failed list ####################\n') taskDirs = client.get_alldir(bucketName,synctask+'/'+jobType+'/round'+str(currentRound)+'/') existFailedFile = False for mydir in taskDirs: lines = readdata.readlines(client, bucketName, mydir+'errorlist') line = lines.readline() line = lines.readline() while line: errorFile.write(line) line = lines.readline() existFailedFile = True errorFile.write('\n') errorFile.close() if not existFailedFile: log.info('there are no file failed,congratulation!') else: log.info('some file failed,and we save them in '+errorFileName+'. you can find why failed with it.') log.info('generate errorlist success') except (ossexception.GetObjectFailed,IOError, Exception), e: log.info(e) log.info('generate errorlist failed')
def get_this_round_status(self, bucketName, keyName): try: lines = readdata.readlines(self.desClient, bucketName, keyName) line = (lines.readline()).strip('\n').strip() return line except (ossexception.GetObjectFailed, Exception), e: log.info(e) log.info( 'the upper means that round is not finished, we will do it') return None
def upload_list(self): lines = readdata.readlines(self.desClient, self.destination['bucketName'],self.keyForTaskPath+'taskstatus') lines.readline() taskID = lines.readline() if taskID != self.taskID: raise taskexception.TaskIDIsNotTheSame(taskID + ' and ' + self.taskID + ' are not the same!') if self.errorFileList: self.errorFileList.insert(0, str(self.failed)+'\t'+str(self.failedSize)) self.desClient.put_object(self.destination['bucketName'], self.keyForTaskPath+'errorlist','\n'.join(self.errorFileList)) if self.md5List and self.jobType == type.JobType.Check: self.desClient.put_object(self.destination['bucketName'], self.keyForTaskPath+'md5list','\n'.join(self.md5List))
def recover_this_round_task(self, cntRound): self.currentRound = cntRound self.startRound = cntRound #从本轮中获取已完成的task和未完成的task log.info('start to recover the round' + str(self.currentRound) + ' for ' + self.jobType) taskDirs = self.desClient.get_alldir( self.destination['bucketName'], self.synctask + '/' + self.jobType + '/round' + str(self.startRound) + '/', '') for taskdir in taskDirs: pair = taskdir.split('/') taskNumber = pair[4].strip() lines = readdata.readlines(self.desClient, self.destination['bucketName'], taskdir + 'taskstatus') line = (lines.readline()).strip('\n').strip() if line == 'done': log.info(taskNumber + ',finished already') else: reset_taskinfo(self.desClient, self.destination['bucketName'], taskdir) lines = readdata.readlines(self.desClient, self.destination['bucketName'], taskdir + 'filelist') line = (lines.readline()).strip().strip() pair = line.split('\t') self.allFiles += long(pair[0].strip()) self.allSize += long(pair[1].strip()) task = TransferOrCheckTask(self.synctask, self.jobType, self.source, self.destination, taskdir, self.master_ip_port, self.check['mode'], self.sync) task.set_status(status.TASK.ready) self.put(task) log.info( str(taskNumber) + ',fileNumbers:' + str(pair[0].strip()) + ',taskSize:' + str(pair[1].strip())) log.info('recover the round' + str(self.currentRound) + ' successfully') return True
def is_this_job_has_finished(self, jobType): try: lines = readdata.readlines( self.desClient, self.destination['bucketName'], self.synctask + '/' + jobType + '/' + jobType + 'status') line = lines.readline() if line == 'done': return True except (ossexception.GetObjectFailed, Exception), e: log.info(e) log.info('the upper means the ' + jobType + ' is not finished, we will do it') return False
def __init__(self, task): self.taskID = task.get_ID() self.sync = task.get_sync() self.source = task.get_source() self.destination = task.get_destination() self.fileType = self.source['filetype'] self.jobType = task.get_jobType() self.keyForTaskPath = task.get_keyForTaskPath() self.checkMode = task.get_checkmode() if self.fileType == type.FileType.DiskFile: self.absolutePath = self.source['absolutepath'] [self.srcClient, self.desClient] = self.create_clients() self.lines = readdata.readlines(self.desClient, self.destination['bucketName'], task.get_keyForTaskPath()+"filelist") line = self.lines.readline() pair = line.split('\t') self.fileNumbers = long(pair[0].strip()) self.taskSize = long(pair[1].strip()) self.initialClassVar()
def __recover_generateDiskFileListTask_info(self, desClient, bucketName, prefix): try: filelistNumber = 0 queue = deque() if desClient.does_object_exists(bucketName, prefix+'dirqueue'): lines = readdata.readlines(desClient, bucketName, prefix + 'dirqueue') line = lines.readline().strip('\n').strip() filelistNumber = long(line) line = lines.readline() while line: line = line.strip('\n').strip() if line == '': continue queue.append(line) line = lines.readline() return [filelistNumber, queue] except Exception, e: return [filelistNumber, queue]
def generate_checktask_from_transfer_round0(self): log.info('generate checktask from transfer round0') log.info('start to generate tasks(round ' + str(self.currentRound) + ') for ' + self.jobType) taskNumber = 0 taskPrefix = self.synctask + '/' + self.jobType + '/round0/task' currentSize = 0 currentFileNumbers = 0 filelist = list() transferTaskDirs = self.desClient.get_alldir( self.destination['bucketName'], self.synctask + '/' + type.JobType.Transfer + '/round0/', '') try: for transferTaskDir in transferTaskDirs: lines = readdata.readlines(self.desClient, self.destination['bucketName'], transferTaskDir + 'filelist') line = lines.readline() line = lines.readline() while line: line = line.strip('\n') objectSize = line.split('\t')[1].strip() filelist.append(line) currentSize += long(objectSize) currentFileNumbers += 1 if currentSize >= self.taskSize or currentFileNumbers >= self.taskFileNumbers: self.generate_a_task(filelist, currentFileNumbers, currentSize, taskNumber, taskPrefix) taskNumber += 1 currentFileNumbers = 0 currentSize = 0 filelist[:] = [] line = lines.readline() if currentSize != 0 or currentFileNumbers != 0: self.generate_a_task(filelist, currentFileNumbers, currentSize, taskNumber, taskPrefix) if self.allFiles == 0: return False return True except ossexception.GetObjectFailed, e: log.info(e) return False
def generate_md5list(client, bucketName,synctask,md5FileName): log.info('start to generate md5list') try: success = 0 md5File = open(md5FileName,'a') md5File.write('#################### '+synctask+' md5 list ####################\nls') roundDirs = client.get_alldir(bucketName,synctask+'/'+type.JobType.Check+'/') for roundDir in roundDirs: if 'round' in roundDir: taskDirs = client.get_alldir(bucketName,roundDir) for mydir in taskDirs: lines = readdata.readlines(client, bucketName, mydir+'md5list') line = lines.readline() if line.strip('\n').strip() == '': continue while line: success += 1 md5File.write(line) line = lines.readline() md5File.write('\n') md5File.close() log.info('generate md5list success') return success except (ossexception.GetObjectFailed,IOError, Exception), e: log.info(e) log.info('generate md5list failed') return 0
def get_filelist(self, client, bucketname, filelistName, filelistStatusName): while True: try: lines = readdata.readlines(client, bucketname, filelistName) return lines except ossexception.GetObjectFailed, e: if client.does_object_exists(bucketname, filelistStatusName) == True: log.info(e) log.info( 'no more filelist can be read, the last is the last one' ) return None else: log.info(e) log.info( 'the upper means generating disk-filelist is not finished' ) sleep(10)