def down(target, inputFile, fileStart, fileEnd, N, outputFile): """Input is a file and a range into that file to sort and an output location in which to write the sorted file. If the range is larger than a threshold N the range is divided recursively and a follow on job is then created which merges back the results else the file is sorted and placed in the output. """ if random.random() > 0.5: raise RuntimeError( ) #This error is a test error, it does not mean the tests have failed. length = fileEnd - fileStart target.logToMaster( "Am running a down target with length: %i from input file: %s" % (length, inputFile)) assert length >= 0 if length > N: midPoint = getMidPoint(inputFile, fileStart, fileEnd) assert midPoint >= fileStart assert midPoint + 1 < fileEnd #We will subdivide the file tempFile1 = getTempFile(rootDir=target.getGlobalTempDir()) tempFile2 = getTempFile(rootDir=target.getGlobalTempDir()) target.addChildTargetFn( down, (inputFile, fileStart, midPoint + 1, N, tempFile1)) target.addChildTargetFn(down, (inputFile, midPoint + 1, fileEnd, N, tempFile2)) #Add one to avoid the newline target.setFollowOnTargetFn(up, (tempFile1, tempFile2, outputFile)) else: #We can sort this bit of the file copySubRangeOfFile(inputFile, fileStart, fileEnd, outputFile) sort(outputFile)
def down(target, inputFile, fileStart, fileEnd, N, outputFile): """Input is a file and a range into that file to sort and an output location in which to write the sorted file. If the range is larger than a threshold N the range is divided recursively and a follow on job is then created which merges back the results else the file is sorted and placed in the output. """ if random.random() > 0.5: raise RuntimeError() #This error is a test error, it does not mean the tests have failed. length = fileEnd - fileStart target.logToMaster("Am running a down target with length: %i from input file: %s" % (length, inputFile)) assert length >= 0 if length > N: midPoint = getMidPoint(inputFile, fileStart, fileEnd) assert midPoint >= fileStart assert midPoint+1 < fileEnd #We will subdivide the file tempFile1 = getTempFile(rootDir=target.getGlobalTempDir()) tempFile2 = getTempFile(rootDir=target.getGlobalTempDir()) target.addChildTargetFn(down, (inputFile, fileStart, midPoint+1, N, tempFile1)) target.addChildTargetFn(down, (inputFile, midPoint+1, fileEnd, N, tempFile2)) #Add one to avoid the newline target.setFollowOnTargetFn(up, (tempFile1, tempFile2, outputFile)) else: #We can sort this bit of the file copySubRangeOfFile(inputFile, fileStart, fileEnd, outputFile) sort(outputFile)
def testSort(self): for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) tempFile1 = getTempFile(rootDir=tempDir) makeFileToSort(tempFile1) lines1 = loadFile(tempFile1) lines1.sort() sort(tempFile1) lines2 = loadFile(tempFile1) checkEqual(lines1, lines2) system("rm -rf %s" % tempDir)
def run(self): length = self.fileEnd - self.fileStart self.logToMaster("Am running a down target with length: %i from input file: %s" % (length, self.inputFile)) assert length >= 0 if length > self.N: midPoint = getMidPoint(self.inputFile, self.fileStart, self.fileEnd) assert midPoint >= self.fileStart assert midPoint+1 < self.fileEnd #We will subdivide the file tempFile1 = getTempFile(rootDir=self.getGlobalTempDir()) tempFile2 = getTempFile(rootDir=self.getGlobalTempDir()) self.addChildTarget(Down(self.inputFile, self.fileStart, midPoint+1, self.N, tempFile1)) self.addChildTarget(Down(self.inputFile, midPoint+1, self.fileEnd, self.N, tempFile2)) #Add one to avoid the newline self.setFollowOnTarget(Up(tempFile1, tempFile2, self.outputFile)) else: #We can sort this bit of the file copySubRangeOfFile(self.inputFile, self.fileStart, self.fileEnd, self.outputFile) sort(self.outputFile)