def down(target, inputFile, fileStart, fileEnd, N, outputFile): """Input is a file and a range into that file to sort and an output location in which to write the sorted file. If the range is larger than a threshold N the range is divided recursively and a follow on job is then created which merges back the results else the file is sorted and placed in the output. """ if random.random() > 0.5: raise RuntimeError( ) #This error is a test error, it does not mean the tests have failed. length = fileEnd - fileStart target.logToMaster( "Am running a down target with length: %i from input file: %s" % (length, inputFile)) assert length >= 0 if length > N: midPoint = getMidPoint(inputFile, fileStart, fileEnd) assert midPoint >= fileStart assert midPoint + 1 < fileEnd #We will subdivide the file tempFile1 = getTempFile(rootDir=target.getGlobalTempDir()) tempFile2 = getTempFile(rootDir=target.getGlobalTempDir()) target.addChildTargetFn( down, (inputFile, fileStart, midPoint + 1, N, tempFile1)) target.addChildTargetFn(down, (inputFile, midPoint + 1, fileEnd, N, tempFile2)) #Add one to avoid the newline target.setFollowOnTargetFn(up, (tempFile1, tempFile2, outputFile)) else: #We can sort this bit of the file copySubRangeOfFile(inputFile, fileStart, fileEnd, outputFile) sort(outputFile)
def down(target, inputFile, fileStart, fileEnd, N, outputFile): """Input is a file and a range into that file to sort and an output location in which to write the sorted file. If the range is larger than a threshold N the range is divided recursively and a follow on job is then created which merges back the results else the file is sorted and placed in the output. """ if random.random() > 0.5: raise RuntimeError() #This error is a test error, it does not mean the tests have failed. length = fileEnd - fileStart target.logToMaster("Am running a down target with length: %i from input file: %s" % (length, inputFile)) assert length >= 0 if length > N: midPoint = getMidPoint(inputFile, fileStart, fileEnd) assert midPoint >= fileStart assert midPoint+1 < fileEnd #We will subdivide the file tempFile1 = getTempFile(rootDir=target.getGlobalTempDir()) tempFile2 = getTempFile(rootDir=target.getGlobalTempDir()) target.addChildTargetFn(down, (inputFile, fileStart, midPoint+1, N, tempFile1)) target.addChildTargetFn(down, (inputFile, midPoint+1, fileEnd, N, tempFile2)) #Add one to avoid the newline target.setFollowOnTargetFn(up, (tempFile1, tempFile2, outputFile)) else: #We can sort this bit of the file copySubRangeOfFile(inputFile, fileStart, fileEnd, outputFile) sort(outputFile)
def testCopySubRangeOfFile(self): for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) tempFile = getTempFile(rootDir=tempDir) outputFile = getTempFile(rootDir=tempDir) makeFileToSort(tempFile) fileSize = os.path.getsize(tempFile) assert fileSize > 0 fileStart = random.choice(xrange(0, fileSize)) fileEnd = random.choice(xrange(fileStart, fileSize)) copySubRangeOfFile(tempFile, fileStart, fileEnd, outputFile) l = open(outputFile, 'r').read() l2 = open(tempFile, 'r').read()[fileStart:fileEnd] checkEqual(l, l2) system("rm -rf %s" % tempDir)
def run(self): length = self.fileEnd - self.fileStart self.logToMaster("Am running a down target with length: %i from input file: %s" % (length, self.inputFile)) assert length >= 0 if length > self.N: midPoint = getMidPoint(self.inputFile, self.fileStart, self.fileEnd) assert midPoint >= self.fileStart assert midPoint+1 < self.fileEnd #We will subdivide the file tempFile1 = getTempFile(rootDir=self.getGlobalTempDir()) tempFile2 = getTempFile(rootDir=self.getGlobalTempDir()) self.addChildTarget(Down(self.inputFile, self.fileStart, midPoint+1, self.N, tempFile1)) self.addChildTarget(Down(self.inputFile, midPoint+1, self.fileEnd, self.N, tempFile2)) #Add one to avoid the newline self.setFollowOnTarget(Up(tempFile1, tempFile2, self.outputFile)) else: #We can sort this bit of the file copySubRangeOfFile(self.inputFile, self.fileStart, self.fileEnd, self.outputFile) sort(self.outputFile)