Esempio n. 1
0
def test_constructor__requirements():
    node = Node(requirements=id)
    assert_equal(node.requirements, frozenset([id]))
    node = Node(requirements=[id])
    assert_equal(node.requirements, frozenset([id]))
    node = Node(requirements=[id, str])
    assert_equal(node.requirements, frozenset([id, str]))
Esempio n. 2
0
    def __init__(self, fasta_files, sequences, destination, dependencies=()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1, ... }
        """

        self._infiles = copy.deepcopy(fasta_files)
        self._sequences = utilities.safe_coerce_to_frozenset(sequences)
        self._destination = copy.copy(destination)
        self._outfiles = [
            os.path.join(destination, name + ".fasta")
            for name in self._sequences
        ]

        input_files = list(self._infiles.itervalues())
        for filename in self._infiles.itervalues():
            input_files.append(filename + ".fai")

        desc = "<CollectSequences: %i sequences from %i files -> '%s'>" \
               % (len(self._sequences), len(self._infiles), self._destination)
        Node.__init__(self,
                      description=desc,
                      input_files=input_files,
                      output_files=self._outfiles,
                      dependencies=dependencies)
Esempio n. 3
0
    def __init__(self, config, target_name, input_files, output_file, intervals_file = None, print_stats = False, max_contigs = _MAX_CONTIGS, dependencies = ()):
        self._target_name = target_name
        self._input_files = safe_coerce_to_tuple(input_files)
        self._output_file = output_file
        self._intervals   = intervals_file
        self._print_stats = print_stats
        self._max_contigs = max_contigs
        self._max_contigs_reached = False

        input_files = []
        input_files.extend(self._input_files)
        input_files.extend(swap_ext(input_file, ".bai") for input_file in self._input_files)
        if intervals_file:
            input_files.append(intervals_file)

        executables = ["coverageBed"] if intervals_file else ["genomeCoverageBed"]
        auxiliary_files = []
        for cmd in concatenate_input_bams(config, self._input_files)[0]:
            executables.extend(cmd.executables)
            auxiliary_files.extend(cmd.auxiliary_files)

        Node.__init__(self,
                      description  = "<DepthHistogram: %s -> '%s'>" \
                        % (describe_files(self._input_files),
                           self._output_file),
                      input_files  = input_files,
                      output_files = self._output_file,
                      dependencies = dependencies,
                      executables  = executables,
                      auxiliary_files = auxiliary_files)
Esempio n. 4
0
def test_is_done__subnode_output_changes(temp_folder):
    temp_file = os.path.join(temp_folder, "file.txt")
    subnode   = Node(output_files = temp_file)
    my_node   = Node(subnodes = subnode)
    assert not my_node.is_done
    set_file_contents(temp_file, "foo")
    assert my_node.is_done
Esempio n. 5
0
def test_is_outdated__updates():
    my_node = Node(input_files  = "tests/data/timestamp_a_older",
                   output_files = "tests/data/timestamp_a_younger")
    assert not my_node.is_outdated
    my_node = Node(input_files  = "tests/data/timestamp_a_younger",
                   output_files = "tests/data/timestamp_a_older")
    assert my_node.is_outdated
Esempio n. 6
0
    def _teardown(self, config, temp):
        temp_filename = reroot_path(temp, self._input_file)
        os.remove(temp_filename)
        os.remove(temp_filename + ".bai")

        move_file(reroot_path(temp, self._output_file), self._output_file)
        Node._teardown(self, config, temp)
Esempio n. 7
0
 def __init__(self, input_files, output_file, dependencies=()):
     Node.__init__(self,
                   description="<Detect Input Duplication: %s>" %
                   (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
Esempio n. 8
0
    def __init__(self, infiles, out_prefix, exclude_groups=(), reduce=False,
                 dependencies=(), file_dependencies=()):
        """
        infiles = {names : {"partitions" : ..., "filenames" : [...]}}
        """
        if not (isinstance(infiles, dict)
                and all(isinstance(dd, dict) for dd in infiles.values())):
            raise TypeError("'infiles' must be a dictionary of dictionaries")

        input_filenames = []
        for (name, subdd) in infiles.iteritems():
            if set(subdd) - _VALID_KEYS:
                raise ValueError("Invalid keys found for %r: %s"
                                 % (name, ", ".join(set(subdd) - _VALID_KEYS)))
            elif not isinstance(subdd["filenames"], list):
                raise ValueError("filenames must be a list of strings")
            input_filenames.extend(subdd["filenames"])
        # Optional file dependencies; used to depend on the list of sequcences
        input_filenames.extend(safe_coerce_to_tuple(file_dependencies))

        self._reduce = bool(reduce)
        self._infiles = copy.deepcopy(infiles)
        self._out_prefix = out_prefix
        self._excluded = safe_coerce_to_frozenset(exclude_groups)

        description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % \
            (" (reducing)" if reduce else "", len(infiles), out_prefix)

        Node.__init__(self,
                      description=description,
                      input_files=input_filenames,
                      output_files=[out_prefix + ".phy",
                                    out_prefix + ".partitions"],
                      dependencies=dependencies)
Esempio n. 9
0
 def __init__(self, input_files, output_file, dependencies=()):
     Node.__init__(self,
                   description="<Detect Input Duplication: %s>"
                   % (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
Esempio n. 10
0
    def __init__(self,
                 infiles,
                 out_partitions,
                 partition_by="123",
                 dependencies=()):
        if (len(partition_by) != 3):
            raise ValueError("Default 'partition_by' must be 3 entires long!")
        elif not isinstance(infiles, dict):
            raise TypeError("'infiles' must be a dictionary")
        elif any(
                len(dd.get("partition_by", "123")) != 3
                for dd in infiles.itervalues()):
            raise ValueError("'partition_by' must be 3 entires long!")
        elif not all(isinstance(dd, dict) for dd in infiles.values()):
            raise TypeError("'infiles' must be a dictionary of dictionaries")
        elif not any(("name" in dd) for dd in infiles.values()):
            raise ValueError("'name' must be specified for all input files")
        elif any((set(dd) - _VALID_KEYS) for dd in infiles.values()):
            raise ValueError("Invalid keys found: %s" %
                             ", ".join(set(dd) - _VALID_KEYS))

        self._infiles = infiles
        self._out_part = out_partitions
        self._part_by = partition_by

        description  = "<FastaToPartitions (default: %s): %i file(s) -> '%s'>" % \
            (partition_by, len(infiles), out_partitions)

        Node.__init__(self,
                      description=description,
                      input_files=infiles.keys(),
                      output_files=out_partitions,
                      dependencies=dependencies)
Esempio n. 11
0
    def __init__(self, infiles, out_prefix, partition_by = "123", add_flag = False, exclude_groups = (), dependencies = ()):
        if (len(partition_by) != 3):
            raise ValueError("Default 'partition_by' must be 3 entires long!")
        elif not isinstance(infiles, dict):
            raise TypeError("'infiles' must be a dictionary")
        elif any(len(dd.get("partition_by", "123")) != 3 for dd in infiles.itervalues()):
            raise ValueError("'partition_by' must be 3 entires long!")
        elif not all(isinstance(dd, dict) for dd in infiles.values()):
            raise TypeError("'infiles' must be a dictionary of dictionaries")
        elif not any(("name" in dd) for dd in infiles.values()):
            raise ValueError("'name' must be specified for all input files")
        elif any((set(dd) - _VALID_KEYS) for dd in infiles.values()):
            raise ValueError("Invalid keys found: %s" % ", ".join(set(dd) - _VALID_KEYS))

        self._infiles    = infiles
        self._out_prefix = out_prefix
        self._part_by    = partition_by
        self._add_flag   = add_flag
        self._excluded   = exclude_groups

        description  = "<FastaToPartitionedPhy (default: %s): %i file(s) -> '%s.*'>" % \
            (partition_by, len(infiles), out_prefix)

        Node.__init__(self,
                      description  = description,
                      input_files  = infiles,
                      output_files = [out_prefix + ".phy", out_prefix + ".partitions"],
                      dependencies = dependencies)
Esempio n. 12
0
    def __init__(self, infiles, out_partitions, partition_by = "123", dependencies = ()):
        if (len(partition_by) != 3):
            raise ValueError("Default 'partition_by' must be 3 entires long!")
        elif not isinstance(infiles, dict):
            raise TypeError("'infiles' must be a dictionary")
        elif any(len(dd.get("partition_by", "123")) != 3 for dd in infiles.itervalues()):
            raise ValueError("'partition_by' must be 3 entires long!")
        elif not all(isinstance(dd, dict) for dd in infiles.values()):
            raise TypeError("'infiles' must be a dictionary of dictionaries")
        elif not any(("name" in dd) for dd in infiles.values()):
            raise ValueError("'name' must be specified for all input files")
        elif any((set(dd) - _VALID_KEYS) for dd in infiles.values()):
            raise ValueError("Invalid keys found: %s" % ", ".join(set(dd) - _VALID_KEYS))

        self._infiles   = infiles
        self._out_part  = out_partitions
        self._part_by   = partition_by

        description  = "<FastaToPartitions (default: %s): %i file(s) -> '%s'>" % \
            (partition_by, len(infiles), out_partitions)

        Node.__init__(self,
                      description  = description,
                      input_files  = infiles.keys(),
                      output_files = out_partitions,
                      dependencies = dependencies)
Esempio n. 13
0
def test_metanode__nodes():
    subnodes = [Node(), Node()]
    dependencies = [Node(), Node()]
    node = MetaNode(subnodes = iter(subnodes),
                    dependencies = iter(dependencies))
    assert_equal(node.subnodes, frozenset(subnodes))
    assert_equal(node.dependencies, frozenset(dependencies))
Esempio n. 14
0
 def __init__(self, input_files, output_file, offset, dependencies=()):
     self._offset = offset
     Node.__init__(self,
                   description="<Validate FASTQ Files: %s>"
                   % (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
Esempio n. 15
0
 def __init__(self, input_files, output_file, offset, dependencies=()):
     self._offset = offset
     Node.__init__(self,
                   description="<Validate FASTQ Files: %s>" %
                   (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
Esempio n. 16
0
    def __init__(self, input_files, output_file, dependencies=()):
        self._output_file = output_file

        Node.__init__(self,
                      description  = "<MergeCoverage: '%s' -> '%s'>" \
                          % (describe_files(input_files), self._output_file),
                      input_files  = input_files,
                      output_files = self._output_file,
                      dependencies = dependencies)
Esempio n. 17
0
    def __init__(self, input_files, output_file, dependencies=()):
        Node.__init__(self,
                      description="<Validate FASTA Files: %s>"
                      % (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)

        assert len(self.output_files) == 1, self.output_files
Esempio n. 18
0
    def __init__(self, input_files, output_file, dependencies=()):
        Node.__init__(self,
                      description="<Validate FASTA Files: %s>" %
                      (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)

        assert len(self.output_files) == 1, self.output_files
Esempio n. 19
0
    def __init__(self, input_files, output_file, dependencies=()):
        self._output_file = output_file

        Node.__init__(
            self,
            description="<MergeCoverage: '%s' -> '%s'>" % (describe_files(input_files), self._output_file),
            input_files=input_files,
            output_files=self._output_file,
            dependencies=dependencies,
        )
Esempio n. 20
0
    def __init__(self, reference, bedfile, outfile, dependencies=()):
        self._reference = reference
        self._bedfile = bedfile
        self._outfile = outfile

        description = "<ExtractReference: '%s' -> '%s'>" \
            % (reference, outfile)
        Node.__init__(self,
                      description=description,
                      input_files=[reference, bedfile],
                      output_files=[outfile],
                      dependencies=dependencies)
Esempio n. 21
0
    def __init__(self, reference, bedfile, outfile, dependencies=()):
        self._reference = reference
        self._bedfile = bedfile
        self._outfile = outfile

        description = "<ExtractReference: '%s' -> '%s'>" \
            % (reference, outfile)
        Node.__init__(self,
                      description=description,
                      input_files=[reference, bedfile],
                      output_files=[outfile],
                      dependencies=dependencies)
Esempio n. 22
0
    def __init__(self, input_file, output_file, exclude_groups, dependencies = ()):
        self._input_file  = input_file
        self._output_file = output_file
        self._excluded = safe_coerce_to_tuple(exclude_groups)
        description  = "<FastaToPAMLPhy: '%s' -> '%s'>" % \
            (input_file, output_file)

        Node.__init__(self,
                      description  = description,
                      input_files  = [input_file],
                      output_files = [output_file],
                      dependencies = dependencies)
Esempio n. 23
0
    def __init__(self, infiles, out_phy, add_flag = False, dependencies = ()):
        self._add_flag  = add_flag
        self._out_phy   = out_phy

        description  = "<FastaToInterleavedPhy: %i file(s) -> '%s'%s>" % \
            (len(infiles), out_phy, (" (w/ flag)" if add_flag else ""))

        Node.__init__(self,
                      description  = description,
                      input_files  = infiles,
                      output_files = [out_phy],
                      dependencies = dependencies)
Esempio n. 24
0
    def __init__(self, infiles, out_phy, add_flag=False, dependencies=()):
        self._add_flag = add_flag
        self._out_phy = out_phy

        description  = "<FastaToInterleavedPhy: %i file(s) -> '%s'%s>" % \
            (len(infiles), out_phy, (" (w/ flag)" if add_flag else ""))

        Node.__init__(self,
                      description=description,
                      input_files=infiles,
                      output_files=[out_phy],
                      dependencies=dependencies)
Esempio n. 25
0
    def __init__(self, input_alignment, input_partition, output_alignment,
                 seed = None, dependencies = ()):
        self._input_phy  = input_alignment
        self._input_part = input_partition
        self._output_phy = output_alignment
        self._seed       = seed

        Node.__init__(self,
                      description  = "<PHYLIPBootstrap: %r -> %r>" \
                        % (input_alignment, output_alignment),
                      input_files  = (input_alignment, input_partition),
                      output_files = (output_alignment,),
                      dependencies = dependencies)
Esempio n. 26
0
    def __init__(self, main_tree_files, support_tree_files, output_file, dependencies = ()):
        self._output_file        = output_file
        self._main_tree_files    = safe_coerce_to_tuple(main_tree_files)
        self._support_tree_files = safe_coerce_to_tuple(support_tree_files)
        input_files = self._main_tree_files + self._support_tree_files

        description  = "<NewickSupport: %s>" % \
          (describe_files(main_tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = input_files,
                      output_files = output_file,
                      dependencies = dependencies)
Esempio n. 27
0
    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files  = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [reroot_path(destination, fpath) for fpath in input_files]
        self._files  = zip(input_files, output_files)

        Node.__init__(self,
                      description  = "<Copy %s output to %r>" % (description, destination),
                      input_files  = input_files,
                      output_files = output_files,
                      dependencies = source_nodes)
Esempio n. 28
0
    def __init__(self, d_bam, rl, halfresolution, dependencies=()):
        self.halfresolution = halfresolution
        self.dest = os.path.join(d_bam.bam_temp_local,
                                 d_bam.bam_name + GC_NAME + '_' + str(rl))
        self.rl, self.d_bam = rl, d_bam
        description = (
            "<Gccorrect: '%s' window length: '%s' based on chromosome '%s' >" %
            (self.dest, rl, self.d_bam.opts['GCcorrect']['--ChromUsed']))

        Node.__init__(self,
                      description=description,
                      input_files=self.d_bam.baminfo["BamPath"],
                      output_files=self.dest,
                      dependencies=dependencies)
        assert len(self.output_files) == 1, self.output_files
Esempio n. 29
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file      = input_file
        self._output_file     = output_file
        self._filter_by       = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            groups = set(groups) | set([to_filter])
            if len(groups) == 1:
                raise RuntimeError("Singleton filtering must involve at least one other group")
            self._filter_by[to_filter] = groups

        Node.__init__(self,
                      description  = "<FilterSingleton: '%s' -> '%s'>" \
                            % (input_file, output_file),
                      input_files  = [input_file],
                      output_files = [output_file],
                      dependencies = dependencies)
Esempio n. 30
0
 def __init__(self, config, d_make, bedn, dependencies=()):
     self.temp_local = config.temp_local
     self.inbedfile = d_make.bedfiles[bedn]
     self.no_subbed = config.max_threads
     self.outputnames = self._createbednames()
     d_make.bedfiles[bedn] = self.outputnames
     description = "<SplitBedFile: '%s' to '%s',"\
                   " Splitted in %s sub bed files" \
                   % (self.inbedfile, self.temp_local, str(self.no_subbed))
     Node.__init__(self,
                   description=description,
                   input_files=self.inbedfile,
                   output_files=self.outputnames,
                   dependencies=dependencies)
     assert isinstance(self.outputnames, list), \
         "output has to be a list of strings"
Esempio n. 31
0
    def __init__(self,
                 input_file,
                 output_file,
                 exclude_groups,
                 dependencies=()):
        self._input_file = input_file
        self._output_file = output_file
        self._excluded = safe_coerce_to_tuple(exclude_groups)
        description  = "<FastaToPAMLPhy: '%s' -> '%s'>" % \
            (input_file, output_file)

        Node.__init__(self,
                      description=description,
                      input_files=[input_file],
                      output_files=[output_file],
                      dependencies=dependencies)
Esempio n. 32
0
def test_run__exception__create_temp_dir():
    cfg_mock = flexmock(temp_root=_DUMMY_TEMP_ROOT)
    node_mock = flexmock(Node())
    node_mock.should_receive('_create_temp_dir').with_args(cfg_mock) \
      .and_raise(OSError()).ordered.once

    assert_raises(NodeUnhandledException, node_mock.run, cfg_mock)  # pylint: disable=E1103
Esempio n. 33
0
    def __init__(self, fasta_files, sequences, destination, dependencies = ()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1 : { taxon_name_1 : interval_name_1.1, ... }, ...
        """

        self._infiles     = copy.deepcopy(fasta_files)
        self._sequences   = copy.deepcopy(sequences)
        self._destination = copy.copy(destination)
        self._outfiles    = [os.path.join(destination, name + ".fasta") for name in self._sequences]

        Node.__init__(self,
                      description  = "<CollectSequences: %i sequences from %i files -> '%s'>" \
                            % (len(self._sequences), len(self._infiles), self._destination),
                      input_files  = self._infiles.values(),
                      output_files = self._outfiles,
                      dependencies = dependencies)
Esempio n. 34
0
        def test_function():
            node_mock = flexmock(Node())
            node_mock.should_receive('_create_temp_dir').with_args(cfg_mock) \
              .and_return(_DUMMY_TEMP).ordered.once
            node_mock.should_receive(key).and_raise(exception).ordered.once
            node_mock.should_receive('_remove_temp_dir').never

            assert_raises(expectation, node_mock.run, cfg_mock)  # pylint: disable=E1103
Esempio n. 35
0
    def __init__(self, tree_files, output_file, taxa = (), dependencies = ()):
        self._output_file    = output_file
        self._tree_files     = safe_coerce_to_tuple(tree_files)
        self._reroot_on_taxa = safe_coerce_to_tuple(taxa)

        reroot_on = "midpoint"
        if self._reroot_on_taxa:
            reroot_on = repr("', '".join(sorted(self._reroot_on_taxa)))

        description  = "<NewickReroot (on %s): %s>" % \
          (reroot_on, describe_files(tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = self._tree_files,
                      output_files = self._output_file,
                      dependencies = dependencies)
Esempio n. 36
0
    def __init__(self,
                 input_alignment,
                 input_partition,
                 output_alignment,
                 seed=None,
                 dependencies=()):
        self._input_phy = input_alignment
        self._input_part = input_partition
        self._output_phy = output_alignment
        self._seed = seed

        Node.__init__(self,
                      description  = "<PHYLIPBootstrap: %r -> %r>" \
                        % (input_alignment, output_alignment),
                      input_files  = (input_alignment, input_partition),
                      output_files = (output_alignment,),
                      dependencies = dependencies)
Esempio n. 37
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file = input_file
        self._output_file = output_file
        self._filter_by = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            groups = set(groups) | set([to_filter])
            if len(groups) == 1:
                raise RuntimeError(
                    "Singleton filtering must involve at least one other group"
                )
            self._filter_by[to_filter] = groups

        Node.__init__(self,
                      description  = "<FilterSingleton: '%s' -> '%s'>" \
                            % (input_file, output_file),
                      input_files  = [input_file],
                      output_files = [output_file],
                      dependencies = dependencies)
Esempio n. 38
0
def test_is_done__output_changes(temp_folder):
    temp_file_1 = os.path.join(temp_folder, "file_1.txt")
    temp_file_2 = os.path.join(temp_folder, "file_2.txt")
    my_node   = Node(output_files = (temp_file_1, temp_file_2))
    assert not my_node.is_done
    set_file_contents(temp_file_1, "foo")
    assert not my_node.is_done
    set_file_contents(temp_file_2, "bar")
    assert my_node.is_done
Esempio n. 39
0
def test_run__order():
    cfg_mock  = flexmock(temp_root = "/tmp")
    node_mock = flexmock(Node())
    node_mock.should_receive("_setup").with_args(cfg_mock, "/tmp/xTMPx").ordered.once
    node_mock.should_receive("_run").with_args(cfg_mock, "/tmp/xTMPx").ordered.once
    node_mock.should_receive("_teardown").with_args(cfg_mock, "/tmp/xTMPx").ordered.once

    with MonkeypatchCreateTempDir():
        node_mock.run(cfg_mock) # pylint: disable=E1103
Esempio n. 40
0
    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [
            reroot_path(destination, fpath) for fpath in input_files
        ]
        self._files = zip(input_files, output_files)

        Node.__init__(self,
                      description="<Copy %s output to %r>" %
                      (description, destination),
                      input_files=input_files,
                      output_files=output_files,
                      dependencies=source_nodes)
Esempio n. 41
0
    def __init__(self,
                 config,
                 makefile,
                 target,
                 cov_for_lanes,
                 cov_for_libs,
                 dependencies=()):
        self._target = target.name
        self._output_file = os.path.join(config.destination,
                                         self._target + ".summary")
        self._prefixes = makefile["Prefixes"]
        self._makefile = makefile["Statistics"]

        self._in_raw_bams = cov_for_lanes
        self._in_lib_bams = cov_for_libs
        input_files = set()
        input_files.update(sum(map(list, self._in_raw_bams.values()), []))
        input_files.update(sum(map(list, self._in_lib_bams.values()), []))

        self._in_raw_read = collections.defaultdict(list)
        for prefix in target.prefixes:
            for sample in prefix.samples:
                for library in sample.libraries:
                    for lane in library.lanes:
                        if lane.reads:
                            if lane.reads.stats:
                                value = lane.reads.stats
                                input_files.add(value)
                            elif set(lane.reads.files) & _PE_READS:
                                value = _PE_READS
                            elif set(lane.reads.files) & _SE_READS:
                                value = _SE_READS
                            else:
                                assert False
                        else:
                            value = _BAMS
                        self._in_raw_read[(sample.name, library.name,
                                           lane.name)] = value

        Node.__init__(self,
                      description="<Summary: %s>" % self._output_file,
                      input_files=filter(None, input_files),
                      output_files=[self._output_file],
                      dependencies=dependencies)
Esempio n. 42
0
 def __init__(self, d_bam, anal, bedn, dependencies=()):
     self.infiles = [''.join(n.output_files) for n in dependencies]
     self.anal = anal
     analname = self._check_gccorr_name(self.infiles[0])
     self.dest = os.path.join(
         d_bam.bam_output, d_bam.fmt.format(d_bam.bam_name, analname, bedn))
     assert self.infiles, "No temporary files to merge"
     if len(self.infiles) > 1:
         description = "<MergeDataFiles: '%s' ... '%s' ->  '%s'" % \
                       (os.path.basename(self.infiles[0]),
                        os.path.basename(self.infiles[-1]),
                        self.dest)
     else:
         description = "<MergeDataFiles: '%s' -> '%s'" % \
                       (os.path.basename(self.infiles[0]), self.dest)
     Node.__init__(self,
                   description=description,
                   input_files=self.infiles,
                   output_files=self.dest,
                   dependencies=dependencies)
Esempio n. 43
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file = input_file
        self._output_file = output_file
        self._filter_by = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            # The taxa to be filtered is implied to be part of the group,
            # but is not needed when actually carrying out the filtering
            groups = utilities.safe_coerce_to_frozenset(groups) - utilities.safe_coerce_to_frozenset(to_filter)

            if not groups:
                raise RuntimeError("Singleton filtering must involve at least " "one other taxa")
            self._filter_by[to_filter] = groups

        Node.__init__(
            self,
            description="<FilterSingleton: '%s' -> '%s'>" % (input_file, output_file),
            input_files=[input_file],
            output_files=[output_file],
            dependencies=dependencies,
        )
Esempio n. 44
0
    def __init__(self, fasta_files, sequences, destination, dependencies=()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1 : { taxon_name_1 : interval_name_1.1, ... }, ...
        """

        self._infiles = copy.deepcopy(fasta_files)
        self._sequences = copy.deepcopy(sequences)
        self._destination = copy.copy(destination)
        self._outfiles = [
            os.path.join(destination, name + ".fasta")
            for name in self._sequences
        ]

        Node.__init__(self,
                      description  = "<CollectSequences: %i sequences from %i files -> '%s'>" \
                            % (len(self._sequences), len(self._infiles), self._destination),
                      input_files  = self._infiles.values(),
                      output_files = self._outfiles,
                      dependencies = dependencies)
Esempio n. 45
0
    def __init__(self,
                 input_file,
                 target_name,
                 output_file=None,
                 intervals_file=None,
                 max_contigs=_MAX_CONTIGS,
                 dependencies=()):
        self._target_name = target_name
        self._input_file = input_file
        self._output_file = output_file or swap_ext(input_file, ".coverage")
        self._intervals_file = intervals_file
        self._max_contigs = max_contigs
        self._max_contigs_reached = False

        Node.__init__(self,
                      description  = "<Coverage: '%s' -> '%s'>" \
                          % (input_file, self._output_file),
                      input_files  = (input_file, swap_ext(input_file, ".bai")),
                      output_files = self._output_file,
                      dependencies = dependencies)
Esempio n. 46
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file = input_file
        self._output_file = output_file
        self._filter_by = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            # The taxa to be filtered is implied to be part of the group,
            # but is not needed when actually carrying out the filtering
            groups = utilities.safe_coerce_to_frozenset(groups) \
                - utilities.safe_coerce_to_frozenset(to_filter)

            if not groups:
                raise RuntimeError("Singleton filtering must involve at least "
                                   "one other taxa")
            self._filter_by[to_filter] = groups

        Node.__init__(self,
                      description="<FilterSingleton: '%s' -> '%s'>" %
                      (input_file, output_file),
                      input_files=[input_file],
                      output_files=[output_file],
                      dependencies=dependencies)
Esempio n. 47
0
def test_run__temp_dirs():
    def assert_dir(_, path):
        assert_equal(path, "/tmp/xTMPx")

    cfg_mock  = flexmock(temp_root = "/tmp")
    node_mock = flexmock(Node(),
                         _setup    = assert_dir,
                         _run      = assert_dir,
                         _teardown = assert_dir)

    with MonkeypatchCreateTempDir():
        node_mock.run(cfg_mock) # pylint: disable=E1103
Esempio n. 48
0
    def __init__(self, config, makefile, target, cov_for_lanes, cov_for_libs, dependencies=()):
        self._target = target.name
        self._output_file = os.path.join(config.destination, self._target + ".summary")
        self._prefixes = makefile["Prefixes"]
        self._makefile = makefile["Statistics"]

        self._in_raw_bams = cov_for_lanes
        self._in_lib_bams = cov_for_libs
        input_files = set()
        input_files.update(sum(map(list, self._in_raw_bams.values()), []))
        input_files.update(sum(map(list, self._in_lib_bams.values()), []))

        self._in_raw_read = collections.defaultdict(list)
        for prefix in target.prefixes:
            for sample in prefix.samples:
                for library in sample.libraries:
                    for lane in library.lanes:
                        if lane.reads:
                            if lane.reads.stats:
                                value = lane.reads.stats
                                input_files.add(value)
                            elif set(lane.reads.files) & _PE_READS:
                                value = _PE_READS
                            elif set(lane.reads.files) & _SE_READS:
                                value = _SE_READS
                            else:
                                assert False
                        else:
                            value = _BAMS
                        self._in_raw_read[(sample.name, library.name, lane.name)] = value

        Node.__init__(
            self,
            description="<Summary: %s>" % self._output_file,
            input_files=filter(None, input_files),
            output_files=[self._output_file],
            dependencies=dependencies,
        )
Esempio n. 49
0
    def __init__(self, fasta_files, sequences, destination, dependencies=()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1, ... }
        """

        self._infiles = copy.deepcopy(fasta_files)
        self._sequences = utilities.safe_coerce_to_frozenset(sequences)
        self._destination = copy.copy(destination)
        self._outfiles = [os.path.join(destination, name + ".fasta") for name in self._sequences]

        input_files = list(self._infiles.itervalues())
        for filename in self._infiles.itervalues():
            input_files.append(filename + ".fai")

        desc = "<CollectSequences: %i sequences from %i files -> '%s'>" % (
            len(self._sequences),
            len(self._infiles),
            self._destination,
        )
        Node.__init__(
            self, description=desc, input_files=input_files, output_files=self._outfiles, dependencies=dependencies
        )
Esempio n. 50
0
 def __init__(self):
     self.a_property = lambda: None # pragma: no coverage
     Node.__init__(self)