def get_last_step_directory(total_partitions, local_directory=None): """ Gets the directory associated with the most recent step executed in this job, or None if none exists. """ last_step, filenames = \ PartitionUtilities.__get_last_step_filenames(total_partitions) local_directory = (local_directory or tempfile.mkdtemp(suffix='.%d')) % \ (last_step if not last_step is None else -1) HDFSUtilities.download_files(filenames, local_directory) return local_directory if not last_step is None else None
def get_step_dictionary(total_partitions): """ Gets a dictionary of steps and the partition files associated with each. """ raw_output = HDFSUtilities.list_files('/') filenames = imap(lambda f: f.strip(), re.findall(PartitionUtilities.filename_pattern, raw_output, re.MULTILINE)) pairs = imap(lambda filename: \ (int(re.match(PartitionUtilities.part_pattern, filename).group(1)), filename), filenames) raw_dictionary = reduce(lambda d, p: \ PartitionUtilities.__insert(d, *p), pairs, dict()) return dict(filter(lambda (_, v): len(v) == total_partitions, raw_dictionary.items()))
def get_step_dictionary(total_partitions): """ Gets a dictionary of steps and the partition files associated with each. """ raw_output = HDFSUtilities.list_files('/') filenames = imap( lambda f: f.strip(), re.findall(PartitionUtilities.filename_pattern, raw_output, re.MULTILINE)) pairs = imap(lambda filename: \ (int(re.match(PartitionUtilities.part_pattern, filename).group(1)), filename), filenames) raw_dictionary = reduce(lambda d, p: \ PartitionUtilities.__insert(d, *p), pairs, dict()) return dict( filter(lambda (_, v): len(v) == total_partitions, raw_dictionary.items()))