Ejemplo n.º 1
0
def discover_files(output_name, tool_provided_metadata, extra_file_collectors,
                   job_working_directory, matchable):
    extra_file_collectors = extra_file_collectors
    if extra_file_collectors and extra_file_collectors[
            0].discover_via == "tool_provided_metadata":
        # just load entries from tool provided metadata...
        assert len(extra_file_collectors) == 1
        extra_file_collector = extra_file_collectors[0]
        target_directory = discover_target_directory(
            extra_file_collector.directory, job_working_directory)
        for dataset in tool_provided_metadata.get_new_datasets(output_name):
            filename = dataset["filename"]
            path = os.path.join(target_directory, filename)
            yield DiscoveredFile(
                path, extra_file_collector,
                JsonCollectedDatasetMatch(dataset,
                                          extra_file_collector,
                                          filename,
                                          path=path))
    else:
        for (match,
             collector) in walk_over_file_collectors(extra_file_collectors,
                                                     job_working_directory,
                                                     matchable):
            yield DiscoveredFile(match.path, collector, match)
Ejemplo n.º 2
0
def walk_over_extra_files(target_dir, extra_file_collector,
                          job_working_directory, matchable):
    """
    Walks through all files in a given directory, and returns all files that
    match the given collector's match criteria. If the collector has the
    recurse flag enabled, will also recursively descend into child folders.
    """
    matches = []
    directory = discover_target_directory(target_dir, job_working_directory)
    if os.path.isdir(directory):
        for filename in os.listdir(directory):
            path = os.path.join(directory, filename)
            if os.path.isdir(path) and extra_file_collector.recurse:
                # The current directory is already validated, so use that as the next job_working_directory when recursing
                for match in walk_over_extra_files(filename,
                                                   extra_file_collector,
                                                   directory, matchable):
                    yield match
            else:
                match = extra_file_collector.match(matchable,
                                                   filename,
                                                   path=path)
                if match:
                    matches.append(match)

    for match in extra_file_collector.sort(matches):
        yield match
Ejemplo n.º 3
0
 def _walk(target_dir, extra_file_collector, job_working_directory, matchable, parent_paths):
     directory = discover_target_directory(target_dir, job_working_directory)
     if os.path.isdir(directory):
         for filename in os.listdir(directory):
             path = os.path.join(directory, filename)
             if os.path.isdir(path):
                 if extra_file_collector.recurse:
                     new_parent_paths = parent_paths[:]
                     new_parent_paths.append(filename)
                     # The current directory is already validated, so use that as the next job_working_directory when recursing
                     yield from _walk(
                         filename, extra_file_collector, directory, matchable, parent_paths=new_parent_paths
                     )
             else:
                 match = extra_file_collector.match(matchable, filename, path=path, parent_paths=parent_paths)
                 if match:
                     yield match