Ejemplo n.º 1
0
def _build_coverage(config, makefile, target, make_summary):
    merged_nodes = []
    coverage = _build_coverage_nodes(target)
    for prefix in target.prefixes:
        for (aoi_name, aoi_filename) in _get_aoi(prefix):
            label = _get_prefix_label(prefix.label, aoi_name)
            postfix = prefix.name if (not aoi_name) else (
                "%s.%s" % (prefix.name, aoi_name))

            files_and_nodes = _aggregate_for_prefix(coverage["Libraries"],
                                                    label)
            output_filename = os.path.join(
                config.destination, "%s.%s.coverage" % (target.name, postfix))
            merged = MergeCoverageNode(input_files=files_and_nodes.keys(),
                                       output_file=output_filename,
                                       dependencies=files_and_nodes.values())

            merged_nodes.append(merged)

    description = "Libraries"
    files_and_nodes = _aggregate_for_prefix(coverage["Libraries"], None)
    if make_summary:
        description = "Lanes and libraries"
        files_and_nodes = _aggregate_for_prefix(coverage["Lanes"],
                                                None,
                                                into=files_and_nodes)

    partial_nodes = MetaNode(description=description,
                             subnodes=files_and_nodes.values())
    final_nodes = MetaNode(description="Final coverage", subnodes=merged_nodes)

    coverage["Node"] = MetaNode(description="Coverage",
                                subnodes=(partial_nodes, final_nodes))

    return coverage
Ejemplo n.º 2
0
    def __init__(self, config, prefix, samples, features, target):
        self.name = prefix["Name"]
        self.label = prefix.get("Label") or self.name
        self.reference = prefix["Reference"]
        self.aoi = prefix.get("AreasOfInterest", {})

        self.samples = safe_coerce_to_tuple(samples)
        self.bams = {}
        self.folder = config.destination
        self.target = target

        files_and_nodes = {}
        for sample in self.samples:
            files_and_nodes.update(sample.bams.iteritems())

        if "Raw BAM" in features:
            self.bams.update(
                self._build_raw_bam(config, prefix, files_and_nodes))
        if "Realigned BAM" in features:
            self.bams.update(
                self._build_realigned_bam(config, prefix, files_and_nodes))

        sample_nodes = [sample.node for sample in self.samples]
        if not self.bams:
            for sample in self.samples:
                self.bams.update(sample.bams)

            self.node = MetaNode(description="Prefix: %s" % prefix["Name"],
                                 dependencies=sample_nodes)
        else:
            self.node = MetaNode(description="Final BAMs: %s" % prefix["Name"],
                                 subnodes=self.bams.values(),
                                 dependencies=sample_nodes)
Ejemplo n.º 3
0
    def __init__(self, config, prefix, node, log_file=None):
        input_file, has_index = self._get_input_file(node)
        subnodes, dependencies = [node], node.dependencies
        if not has_index:
            node = BAMIndexNode(infile=input_file, dependencies=node)
            subnodes.append(node)

        validation_params = ValidateBAMNode.customize(config=config,
                                                      input_bam=input_file,
                                                      output_log=log_file,
                                                      dependencies=node)
        # Check MD tags against reference sequence
        # FIXME: Disabled due to issues with Picard/Samtools disagreeing, backwards compatibility.
        #        validation_params.command.set_kwargs(IN_REFERENCE = prefix["Reference"])
        #        validation_params.command.add_option("R", "%(IN_REFERENCE)s", sep = "=")
        # Ignored since we filter out misses and low-quality hits during mapping, which
        # leads to a large proportion of missing mates for PE reads.
        validation_params.command.add_option("IGNORE",
                                             "MATE_NOT_FOUND",
                                             sep="=")
        # Ignored due to high rate of false positives for lanes with few hits, where
        # high-quality reads may case ValidateSamFile to mis-identify the qualities
        validation_params.command.add_option("IGNORE",
                                             "INVALID_QUALITY_FORMAT",
                                             sep="=")
        subnodes.append(validation_params.build_node())

        description = "<w/Validation: " + str(subnodes[0])[1:]
        MetaNode.__init__(self,
                          description=description,
                          subnodes=subnodes,
                          dependencies=dependencies)
Ejemplo n.º 4
0
def build_phylogeny_nodes(options, settings, filtering, dependencies):
    nodes = []
    for (run_name, run_dd) in settings["PhylogeneticInference"].iteritems():
        destination = os.path.join(options.destination, "phylogenies",
                                   run_name)

        if run_dd["PerGeneTrees"]:
            run_nodes = []
            for roi in run_dd["RegionsOfInterest"].itervalues():
                roi_destination = os.path.join(destination, roi["Name"])
                run_nodes.extend(
                    _build_examl_per_gene_nodes(options, settings, run_dd, roi,
                                                roi_destination, filtering,
                                                dependencies))
            nodes.append(
                MetaNode(description=run_name,
                         subnodes=run_nodes,
                         dependencies=dependencies))
        else:
            nodes.extend(
                _build_examl_regions_nodes(options, settings, run_dd,
                                           destination, filtering,
                                           dependencies))

    return MetaNode("Phylogenetic Inference", dependencies=nodes)
Ejemplo n.º 5
0
    def __init__(self, config, reference, infiles, outfile, intervals=None,
                 dependencies=()):
        if not intervals:
            intervals = outfile + ".intervals"

        infiles = safe_coerce_to_tuple(infiles)
        trainer = _IndelTrainerNode(config=config,
                                    reference=reference,
                                    infiles=infiles,
                                    outfile=intervals,
                                    dependencies=dependencies)
        aligner = _IndelRealignerNode(config=config,
                                      reference=reference,
                                      intervals=intervals,
                                      infiles=infiles,
                                      outfile=outfile,
                                      dependencies=trainer)

        description = "<GATK Indel Realigner: %i files -> '%s'>" \
            % (len(infiles), outfile)

        MetaNode.__init__(self,
                          description=description,
                          subnodes=[trainer, aligner],
                          dependencies=dependencies)
Ejemplo n.º 6
0
    def __init__(self, config, prefix, node, log_file=None):
        input_file, has_index = self._get_input_file(node)
        subnodes, dependencies = [node], node.dependencies
        if not has_index:
            node = BAMIndexNode(infile=input_file,
                                dependencies=node)
            subnodes.append(node)

        validation_params = ValidateBAMNode.customize(config=config,
                                                      input_bam=input_file,
                                                      output_log=log_file,
                                                      dependencies=node)
        # Check MD tags against reference sequence
        # FIXME: Disabled due to issues with Picard/Samtools disagreeing,
        #   backwards compatibility. See the discussion at
        #     http://sourceforge.net/mailarchive/message.php?msg_id=31348639
        # validation_params.command.set_kwargs(IN_REF=prefix["Reference"])
        # validation_params.command.add_option("R", "%(IN_REF)s", sep="=")

        # Ignored since we may filter out misses and low-quality hits during
        # mapping, which leads to a large proportion of missing PE mates.
        validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND",
                                             sep="=")
        # Ignored due to high rate of false positives for lanes with few hits,
        # where high-quality reads may cause mis-identification of qualities
        validation_params.command.add_option("IGNORE",
                                             "INVALID_QUALITY_FORMAT", sep="=")
        subnodes.append(validation_params.build_node())

        description = "<w/Validation: " + str(subnodes[0])[1:]
        MetaNode.__init__(self,
                          description=description,
                          subnodes=subnodes,
                          dependencies=dependencies)
Ejemplo n.º 7
0
def build_codeml_nodes(options, settings, interval, taxa, filtering,
                       dependencies):
    in_postfix, out_postfix, afa_ext = "", "", ".afa"
    if any(filtering.itervalues()):
        in_postfix = out_postfix = ".filtered"
    if not settings["MSAlignment"]["Enabled"]:
        out_postfix = ".unaligned" + out_postfix
        afa_ext = ".fasta"

    paml = settings["PAML"]
    sequences = common.collect_sequences(options, interval, taxa)
    sequencedir = os.path.join(options.destination, "alignments",
                               interval["Name"] + in_postfix)
    destination = os.path.join(options.destination, "paml", "codeml",
                               interval["Name"] + out_postfix)

    # Build meta-node for sequence conversion to PHYLIP format accepted by codeml
    phylip_nodes = {}
    for sequence in sequences:
        input_file = os.path.join(sequencedir, sequence + afa_ext)
        output_file = os.path.join(destination, sequence + ".phy")

        phylip_nodes[sequence] = FastaToPAMLPhyNode(
            input_file=input_file,
            output_file=output_file,
            exclude_groups=paml["codeml"]["ExcludeGroups"],
            dependencies=dependencies)

    phylip_meta = MetaNode(description  = "<FastaToPAMLPhyNodes: '%s/*.%s' -> '%s/*.phy'>" \
                           % (sequencedir, afa_ext, destination),
                           subnodes     = phylip_nodes.values(),
                           dependencies = dependencies)

    codeml_nodes = []
    for (ctl_name, ctl_file) in paml["codeml"]["Control Files"].iteritems():
        for (sequence, node) in phylip_nodes.iteritems():
            output_prefix = os.path.join(destination,
                                         sequence + ".%s" % (ctl_name, ))

            codeml = CodemlNode(control_file=ctl_file,
                                trees_file=paml["codeml"]["Tree File"],
                                sequence_file=iter(node.output_files).next(),
                                output_prefix=output_prefix,
                                dependencies=node)
            codeml_nodes.append(codeml)

    return MetaNode(description="<CodemlNodes>",
                    subnodes=codeml_nodes,
                    dependencies=phylip_meta)
Ejemplo n.º 8
0
    def __init__(self, input_files, destination, filter_by, dependencies=()):
        subnodes = []
        filter_by = dict(filter_by)
        for (filename, node) in input_files.iteritems():
            output_filename = fileutils.reroot_path(destination, filename)
            subnodes.append(FilterSingletonsNode(input_file=filename,
                                                 output_file=output_filename,
                                                 filter_by=filter_by,
                                                 dependencies=node))

        MetaNode.__init__(self,
                          description="<FilterSingleton: %i files -> '%s'>"
                          % (len(subnodes), destination),
                          subnodes=subnodes,
                          dependencies=dependencies)
Ejemplo n.º 9
0
    def __init__(self, input_files, destination, filter_by, dependencies=()):
        subnodes = []
        filter_by = dict(filter_by)
        for (filename, node) in input_files.iteritems():
            output_filename = fileutils.reroot_path(destination, filename)
            subnodes.append(FilterSingletonsNode(input_file=filename,
                                                 output_file=output_filename,
                                                 filter_by=filter_by,
                                                 dependencies=node))

        MetaNode.__init__(self,
                          description="<FilterSingleton: %i files -> '%s'>"
                          % (len(subnodes), destination),
                          subnodes=subnodes,
                          dependencies=dependencies)
Ejemplo n.º 10
0
    def __init__(self, config, prefixes, name):
        self.name     = name
        self.prefixes = safe_coerce_to_tuple(prefixes)

        self._nodes_alignment = MetaNode(description  = "Alignments:",
                                          dependencies = [prefix.node for prefix in self.prefixes])
        self._nodes_extras    = {}
Ejemplo n.º 11
0
    def __init__(self, rootdir, sequences, preset = "auto", subnodes = (), dependencies = ()):
        subnodes = []
        for sequence in sequences:
            prefix  = os.path.join(rootdir, sequence)
            node    = MAFFTNode(infile       = prefix + ".fasta",
                                outfile      = prefix + ".afa",
                                preset       = preset,
                                dependencies = dependencies)

            subnodes.append(node)

        MetaNode.__init__(self,
                          description  = "<MAFFTAlignSequences (%s): In '%s'>" \
                              % (preset, rootdir),
                          subnodes     = subnodes,
                          dependencies = dependencies)
Ejemplo n.º 12
0
def build_taxa_nodes(options, genotyping, intervals, taxa, dependencies=()):
    nodes = []
    for interval in intervals.itervalues():
        interval = deepcopy(interval)
        # Override default genome (BAM file) if specified
        interval["Genome"] = common.get_genome_for_interval(interval, taxa)
        # Enforce homozygous contigs based on gender tag
        #        interval[
        interval["Homozygous Contigs"] = interval["Homozygous Contigs"][
            taxa["Gender"]]

        genotyping_method = taxa.get("Genotyping Method", "samtools").lower()
        if genotyping_method == "reference sequence":
            nodes.extend(
                build_reference_nodes(options, taxa, interval, dependencies))
        elif genotyping_method == "random sampling":
            nodes.extend(
                build_sampling_nodes(options, genotyping, taxa, interval,
                                     dependencies))
        elif genotyping_method == "samtools":
            nodes.extend(
                build_genotyping_nodes(options, genotyping, taxa, interval,
                                       dependencies))

    return MetaNode(description=taxa["Name"], dependencies=nodes)
Ejemplo n.º 13
0
def _build_depth(config, target):
    nodes = []
    for prefix in target.prefixes:
        for (roi_name, roi_filename) in _get_roi(prefix, name_prefix="."):
            if roi_filename is not None:
                # ROIs require indexed access, and hence that the final BAM
                # (either raw or realigned) has been built. By default, the
                # the realigned BAM is used (based on lexical order).
                bam_files = tuple(sorted(prefix.bams.items()))
                input_files, dependencies = bam_files[-1]
            else:
                input_files = {}
                for sample in prefix.samples:
                    input_files.update(sample.bams)
                dependencies = input_files.values()
                input_files = input_files.keys()

            output_filename = "%s.%s%s.depths" % (target.name, prefix.name,
                                                  roi_name)
            output_fpath = os.path.join(config.destination, output_filename)

            node = DepthHistogramNode(config=config,
                                      target_name=target.name,
                                      input_files=input_files,
                                      regions_file=roi_filename,
                                      output_file=output_fpath,
                                      dependencies=dependencies)
            nodes.append(node)

    return MetaNode(description="DepthHistograms", subnodes=nodes)
Ejemplo n.º 14
0
    def __init__(self, config, target, prefix, lanes, name):
        self.name = name
        self.lanes = safe_coerce_to_tuple(lanes)
        self.options = lanes[0].options
        self.folder = os.path.dirname(self.lanes[0].folder)
        self.bams = None
        self.mapdamage = None

        assert all((self.folder == os.path.dirname(lane.folder))
                   for lane in self.lanes)
        assert all((self.options == lane.options) for lane in self.lanes)

        lane_bams = self._collect_bams_by_type(self.lanes)
        self.datadup_check = self._build_dataduplication_node(lane_bams)
        self.duphist = \
            self._build_duphist_nodes(config, target, prefix, lane_bams)
        pcr_duplicates = self.options["PCRDuplicates"]
        if pcr_duplicates:
            lane_bams = self._remove_pcr_duplicates(config, prefix, lane_bams,
                                                    pcr_duplicates)

        # At this point we no longer need to differentiate between types of reads
        files_and_nodes = self._collect_files_and_nodes(lane_bams)

        self.bams, self.mapdamage = \
          self._build_mapdamage_nodes(config, target, prefix, files_and_nodes)

        self.node = MetaNode(
            description="Library: %s" % os.path.basename(self.folder),
            dependencies=self.bams.values() + [self.datadup_check])
Ejemplo n.º 15
0
def test_metanode__nodes():
    subnodes = [Node(), Node()]
    dependencies = [Node(), Node()]
    node = MetaNode(subnodes = iter(subnodes),
                    dependencies = iter(dependencies))
    assert_equal(node.subnodes, frozenset(subnodes))
    assert_equal(node.dependencies, frozenset(dependencies))
Ejemplo n.º 16
0
def build_sample_nodes(options,
                       genotyping,
                       regions_sets,
                       sample,
                       dependencies=()):
    nodes = []
    for regions in regions_sets.itervalues():
        regions = deepcopy(regions)

        # Enforce homozygous contigs based on gender tag
        regions["HomozygousContigs"] \
            = regions["HomozygousContigs"][sample["Gender"]]

        genotyping_method = sample["GenotypingMethod"].lower()
        if genotyping_method not in _GENOTYPING_METHODS:
            assert False, "Unexpected genotyping method %r for sample %r" \
                          % (genotyping_method, sample["Name"])

        genotyping_function = _GENOTYPING_METHODS[genotyping_method]
        node = genotyping_function(options=options,
                                   genotyping=genotyping[regions["Name"]],
                                   sample=sample["Name"],
                                   regions=regions,
                                   dependencies=dependencies)
        nodes.extend(node)

    return MetaNode(description=sample["Name"], dependencies=nodes)
Ejemplo n.º 17
0
def test_metanode__properties():
    node = MetaNode()
    assert_equal(node.input_files, frozenset())
    assert_equal(node.output_files, frozenset())
    assert_equal(node.executables, frozenset())
    assert_equal(node.auxiliary_files, frozenset())
    assert_equal(node.requirements, frozenset())
Ejemplo n.º 18
0
def test_bwa(config):
    index = _bwa_index(config)
    aln_se = _bwa_aln_se(config, index)
    aln_pe = _bwa_aln_pe(config, index)
    sw_se = _bwa_sw_se(config, index)
    sw_pe = _bwa_sw_pe(config, index)

    return MetaNode(description="BWA",
                    dependencies=(aln_se, aln_pe, sw_se, sw_pe))
Ejemplo n.º 19
0
def test_bwa(config):
    index = _bwa_index(config)
    dependencies = []
    for algorithm in ("backtrack", "bwasw", "mem"):
        dependencies.append(_bwa_se(config, index, algorithm))
        dependencies.append(_bwa_pe(config, index, algorithm))

    return MetaNode(description="BWA",
                    dependencies=dependencies)
Ejemplo n.º 20
0
def index_references(config, makefiles):
    references = {}
    references_bwa = {}
    references_bowtie2 = {}
    for makefile in makefiles:
        for subdd in makefile["Prefixes"].itervalues():
            reference = subdd["Reference"]
            if reference not in references:
                # Validation of the FASTA file; not blocking for the other
                # steps, as it is only expected to fail very rarely, but will
                # block subsequent analyses depending on the FASTA.
                valid_node = ValidateFASTAFilesNode(input_files=reference,
                                                    output_file=reference +
                                                    ".validated")
                # Indexing of FASTA file using 'samtools faidx'
                faidx_node = FastaIndexNode(reference)
                # Indexing of FASTA file using 'BuildSequenceDictionary.jar'
                dict_node = BuildSequenceDictNode(config=config,
                                                  reference=reference,
                                                  dependencies=(valid_node, ))

                # Indexing of FASTA file using 'bwa index'
                bwa_node = BWAIndexNode(input_file=reference,
                                        dependencies=(valid_node, ))
                # Indexing of FASTA file using ''
                bowtie2_node = Bowtie2IndexNode(input_file=reference,
                                                dependencies=(valid_node, ))

                references[reference] = \
                    MetaNode(description="Reference Sequence",
                             dependencies=(valid_node, faidx_node, dict_node))
                references_bwa[reference] = \
                    MetaNode(description="Reference Sequence",
                             dependencies=(valid_node, faidx_node,
                                           dict_node, bwa_node))
                references_bowtie2[reference] = \
                    MetaNode(description="Reference Sequence",
                             dependencies=(valid_node, faidx_node,
                                           dict_node, bowtie2_node))

            subdd["Node"] = references[reference]
            subdd["Node:BWA"] = references_bwa[reference]
            subdd["Node:Bowtie2"] = references_bowtie2[reference]
Ejemplo n.º 21
0
    def __init__(self, config, prefixes, name):
        self.name     = name
        self.prefixes = safe_coerce_to_tuple(prefixes)

        self._nodes_extras    = {}
        self._nodes_alignment = MetaNode(description  = "Alignments:",
                                          dependencies = [prefix.node for prefix in self.prefixes])

        self._setup_extra_nodes("mapDamage", "mapdamage")
        self._setup_extra_nodes("Duplicate Histogram", "duphist")
Ejemplo n.º 22
0
def build_variant_nodes(options,reference, group, dependencies = ()):
    gatk_outfile = os.path.join(
        options.makefile['OutDir'],"gatk.{}.{}.raw.vcf".format(
            group['Group'],reference['Label'])
    ) 
    # Build the GATK Variant Calling Node
    gatk_variants = UnifiedGenotyperNode.customize(
        reference = reference['Path'],
        infiles = [	os.path.join(options.makefile['BaseDir'],
            ind,
			ind + "."+ reference['Label'] + ".realigned.bam") 
			for ind in group['Inds']],
        outfile = gatk_outfile,
        options = options
    )

    # Build the SAMTOOLs Variant Calling Node
    samtools_outfile = os.path.join(options.makefile['OutDir'],
        "samtools.{}.{}.raw.vcf".format(
        group['Group'],reference['Label'])
    ) 
    samtools_variants = VariantNode.customize(
        reference = reference['Path'],
        infiles = [     os.path.join(options.makefile['BaseDir'],
            ind,
			ind + "."+ reference['Label'] + ".realigned.bam") 
			for ind in group['Inds']],
        outfile = samtools_outfile,
        options = options
    )
    samtools_variants = samtools_variants.build_node()
    gatk_variants = gatk_variants.build_node()
 
    # Build the Variant Filtering Nodes
    intersect_variants = VariantFilterNode.customize(
        reference = reference['Path'],
        infile = gatk_outfile,
        outfile = os.path.join(
            gatk_outfile.replace(".raw.vcf",".gatk_samtools_intersect.vcf")
        ),
        filters = {
            "--intersect_vcf" : gatk_outfile.replace(
                options.makefile['intersect_vcf']['replace'],
                options.makefile['intersect_vcf']['with']
            ),
            "--emit": "pass"
        },
        options = options,
        dependencies = [gatk_variants,samtools_variants]
    )
    intersect_variants = intersect_variants.build_node()

    return MetaNode(description = "Variant Intersection",
                dependencies = [intersect_variants]
    )
Ejemplo n.º 23
0
    def __init__(self, config, reference, infiles, outfile, intervals=None, dependencies=()):
        if not intervals:
            intervals = outfile + ".intervals"

        infiles = safe_coerce_to_tuple(infiles)
        trainer = _IndelTrainerNode(
            config=config, reference=reference, infiles=infiles, outfile=intervals, dependencies=dependencies
        )
        aligner = _IndelRealignerNode(
            config=config,
            reference=reference,
            intervals=intervals,
            infiles=infiles,
            outfile=outfile,
            dependencies=trainer,
        )

        description = "<GATK Indel Realigner: %i files -> '%s'>" % (len(infiles), outfile)

        MetaNode.__init__(self, description=description, subnodes=[trainer, aligner], dependencies=dependencies)
Ejemplo n.º 24
0
def index_references(config, makefiles):
    references = {}
    for makefile in makefiles:
        for dd in makefile["Prefixes"].itervalues():
            reference = os.path.realpath(dd["Reference"])
            if reference not in references:
                references[reference] = \
                  MetaNode(description = "Reference Sequence",
                           dependencies = (FastaIndexNode(dd["Reference"]),
                                           BuildSequenceDictNode(config    = config,
                                                                 reference = dd["Reference"])))
            dd["Node"] = references[reference]
Ejemplo n.º 25
0
    def __init__(self, config, prefix, libraries, name):
        self.name = name
        self.bams = {}
        self.libraries = safe_coerce_to_tuple(libraries)

        for library in self.libraries:
            self.bams.update(library.bams.iteritems())
        self.folder = os.path.dirname(self.libraries[0].folder)

        self.node = MetaNode(
            description="Sample: %s" % os.path.basename(self.folder),
            dependencies=[library.node for library in self.libraries])
Ejemplo n.º 26
0
def _adapterremoval_se(config):
    node_params = {"input_files"   : ("tests/data/raw_reads/se_reads_R1_001.fastq.gz",
                                      "tests/data/raw_reads/se_reads_R1_002.fastq.gz"),
                   "dependencies"  : config.dependencies}

    standard = SE_AdapterRemovalNode(output_prefix = os.path.join(config.destination, "se_standard"),
                                     **node_params)
    custom   = SE_AdapterRemovalNode.customize(output_prefix = os.path.join(config.destination, "se_custom"),
                                               **node_params)
    custom.command.set_option("--minlength", 30)

    return MetaNode(description  = "AdapterRemoval_SE",
                    dependencies = [standard, custom.build_node()])
Ejemplo n.º 27
0
    def _setup_extra_nodes(self, name, key):
        nodes = []
        for prefix in self.prefixes:
            prefix_nodes = []
            for sample in prefix.samples:
                for library in sample.libraries:
                    value = getattr(library, key)
                    if value:
                        prefix_nodes.append(value)

            if any(prefix_nodes):
                node = MetaNode(description=prefix.name,
                                subnodes=prefix_nodes)
                nodes.append(node)

        if nodes:
            self.add_extra_nodes(name, nodes)
Ejemplo n.º 28
0
def _bwa_se(config, index, algorithm):
    node_params = {"input_file_1": "tests/data/sim_reads/mate_1.fastq.gz",
                   "prefix": os.path.join(config.destination, "rCRS"),
                   "reference": "tests/data/rCRS.fasta",
                   "algorithm": algorithm,
                   "dependencies": (config.dependencies, index)}
    template = os.path.join(config.destination, "%s_se_%s", "output.bam")

    std_bam = template % (algorithm, "standard")
    standard = BWANode(output_file=std_bam, **node_params).build_node()

    cus_bam = template % (algorithm, "custom")
    custom = BWANode(output_file=cus_bam, **node_params)
    custom.commands["convert"].add_option("--rg-id", "myRG")
    custom.commands["convert"].add_option("--rg", "PL:Illumina")
    custom = custom.build_node()

    return MetaNode(description="BWA %s SE" % algorithm,
                    dependencies=[standard, custom])
Ejemplo n.º 29
0
def _bwa_sw_se(config, index):
    node_params = {
        "input_file_1": "tests/data/sim_reads/mate_1.fastq.gz",
        "prefix": os.path.join(config.destination, "rCRS"),
        "reference": "tests/data/rCRS.fasta",
        "dependencies": (config.dependencies, index)
    }

    standard = BWASWNode(output_file=os.path.join(config.destination,
                                                  "sw_se_standard",
                                                  "output.bam"),
                         **node_params)
    custom = BWASWNode.customize(output_file=os.path.join(
        config.destination, "sw_se_custom", "output.bam"),
                                 **node_params)
    custom.commands["aln"].set_option("-z", 10)

    return MetaNode(description="BWA SW SE",
                    dependencies=[standard, custom.build_node()])
Ejemplo n.º 30
0
def _bowtie2_aln_pe(config, index):
    node_params = {
        "input_file_1": "tests/data/sim_reads/mate_1.fastq.gz",
        "input_file_2": "tests/data/sim_reads/mate_2.fastq.gz",
        "prefix": os.path.join(config.destination, "rCRS"),
        "reference": "tests/data/rCRS.fasta",
        "dependencies": (config.dependencies, index)
    }

    standard = Bowtie2Node(output_file=os.path.join(config.destination,
                                                    "aln_pe_standard",
                                                    "output.bam"),
                           **node_params)
    custom = Bowtie2Node.customize(output_file=os.path.join(
        config.destination, "aln_pe_custom", "output.bam"),
                                   **node_params)
    custom.commands["aln"].set_option("--very-sensitive")

    return MetaNode(description="Bowtie2 aln PE",
                    dependencies=[standard, custom.build_node()])
Ejemplo n.º 31
0
def _add_mapdamage_nodes(config, makefile, target):
    if "mapDamage" not in makefile["Options"]["Features"]:
        return

    nodes = []
    for prefix in target.prefixes:
        libraries = []
        for sample in prefix.samples:
            for library in sample.libraries:
                folder = os.path.join(config.destination, "%s.%s.mapDamage" % (target.name, prefix.name), library.name)
                libraries.append(MapDamageNode(config           = config,
                                               reference        = prefix.reference,
                                               input_files      = library.bams.keys(),
                                               output_directory = folder,
                                               dependencies     = library.bams.values()))

        nodes.append(MetaNode(description = prefix.name,
                              subnodes    = libraries))

    target.add_extra_nodes("mapDamage", nodes)
Ejemplo n.º 32
0
def _bwa_aln_se(config, index):
    node_params = {
        "input_file": "tests/data/sim_reads/mate_1.fastq.gz",
        "prefix": os.path.join(config.destination, "rCRS"),
        "reference": "tests/data/rCRS.fasta",
        "dependencies": (config.dependencies, index)
    }

    standard = SEBWANode(output_file=os.path.join(config.destination,
                                                  "aln_se_standard",
                                                  "output.bam"),
                         **node_params)
    custom = SEBWANode.customize(output_file=os.path.join(
        config.destination, "aln_se_custom", "output.bam"),
                                 **node_params)
    custom.commands["samse"].set_option(
        "-r", "@RG\tID:1\tPL:Illumina\tPU:123456\tLB:Library_1\tSM:Sample_1")

    return MetaNode(description="BWA aln SE",
                    dependencies=[standard, custom.build_node()])
Ejemplo n.º 33
0
def build_snp_list(groups,prefix,options,dependencies = ()):
    recal_files = glob.glob(
        os.path.join(options.makefile['RecalDir'],"gatk*group_only*.vcf")
    )
    VQSLOD_cutoff = float(options.makefile['VQSLOD_cutoff'])
    
    recal_merge = VariantMergeNode.customize(
        vcf_list = recal_files,
        outfile = os.path.join(options.makefile['RecalDir'],
            'RECAL_MERGED.{}.vcf'.format(prefix['Label'])
        ),
        reference = prefix['Path'],
        options = options,
        dependencies = dependencies
    )
    recal_merge = recal_merge.build_node()

    recal_filter = VariantFilterNode.customize(
         reference = prefix['Path'],
         infile = os.path.join(options.makefile['RecalDir'],
            'RECAL_MERGED.{}.vcf'.format(prefix['Label'])
         ), 
         outfile = os.path.join(options.makefile['RecalDir'],
             "RECAL_FILTERED.{}.vcf".format(prefix['Label'])
         ),
         filters = {
             "--not_within" :  '20',
             "--allelic" :  '2',
             "--repetitive" : "Repeat_Regions.txt",
             "--emit" : "pass",
             "--field" : 'AC',
             "--thresh" : '2'
         },
         options = options,
         dependencies = dependencies + [recal_merge]
    )
    recal_filter = recal_filter.build_node()

    return MetaNode(description="SNP List Node",
        dependencies = [recal_filter]
    )