Exemple #1
0
    def __init__(self, config, prefix, samples, features, target):
        self.name = prefix["Name"]
        self.label = prefix.get("Label") or self.name
        self.reference = prefix["Reference"]
        self.aoi = prefix.get("AreasOfInterest", {})

        self.samples = safe_coerce_to_tuple(samples)
        self.bams = {}
        self.folder = config.destination
        self.target = target

        files_and_nodes = {}
        for sample in self.samples:
            files_and_nodes.update(sample.bams.iteritems())

        if "Raw BAM" in features:
            self.bams.update(
                self._build_raw_bam(config, prefix, files_and_nodes))
        if "Realigned BAM" in features:
            self.bams.update(
                self._build_realigned_bam(config, prefix, files_and_nodes))

        sample_nodes = [sample.node for sample in self.samples]
        if not self.bams:
            for sample in self.samples:
                self.bams.update(sample.bams)

            self.node = MetaNode(description="Prefix: %s" % prefix["Name"],
                                 dependencies=sample_nodes)
        else:
            self.node = MetaNode(description="Final BAMs: %s" % prefix["Name"],
                                 subnodes=self.bams.values(),
                                 dependencies=sample_nodes)
Exemple #2
0
def _build_coverage(config, makefile, target, make_summary):
    merged_nodes = []
    coverage = _build_coverage_nodes(target)
    for prefix in target.prefixes:
        for (aoi_name, aoi_filename) in _get_aoi(prefix):
            label = _get_prefix_label(prefix.label, aoi_name)
            postfix = prefix.name if (not aoi_name) else (
                "%s.%s" % (prefix.name, aoi_name))

            files_and_nodes = _aggregate_for_prefix(coverage["Libraries"],
                                                    label)
            output_filename = os.path.join(
                config.destination, "%s.%s.coverage" % (target.name, postfix))
            merged = MergeCoverageNode(input_files=files_and_nodes.keys(),
                                       output_file=output_filename,
                                       dependencies=files_and_nodes.values())

            merged_nodes.append(merged)

    description = "Libraries"
    files_and_nodes = _aggregate_for_prefix(coverage["Libraries"], None)
    if make_summary:
        description = "Lanes and libraries"
        files_and_nodes = _aggregate_for_prefix(coverage["Lanes"],
                                                None,
                                                into=files_and_nodes)

    partial_nodes = MetaNode(description=description,
                             subnodes=files_and_nodes.values())
    final_nodes = MetaNode(description="Final coverage", subnodes=merged_nodes)

    coverage["Node"] = MetaNode(description="Coverage",
                                subnodes=(partial_nodes, final_nodes))

    return coverage
Exemple #3
0
def build_phylogeny_nodes(options, settings, filtering, dependencies):
    nodes = []
    for (run_name, run_dd) in settings["PhylogeneticInference"].iteritems():
        destination = os.path.join(options.destination, "phylogenies",
                                   run_name)

        if run_dd["PerGeneTrees"]:
            run_nodes = []
            for roi in run_dd["RegionsOfInterest"].itervalues():
                roi_destination = os.path.join(destination, roi["Name"])
                run_nodes.extend(
                    _build_examl_per_gene_nodes(options, settings, run_dd, roi,
                                                roi_destination, filtering,
                                                dependencies))
            nodes.append(
                MetaNode(description=run_name,
                         subnodes=run_nodes,
                         dependencies=dependencies))
        else:
            nodes.extend(
                _build_examl_regions_nodes(options, settings, run_dd,
                                           destination, filtering,
                                           dependencies))

    return MetaNode("Phylogenetic Inference", dependencies=nodes)
Exemple #4
0
def build_codeml_nodes(options, settings, interval, taxa, filtering,
                       dependencies):
    in_postfix, out_postfix, afa_ext = "", "", ".afa"
    if any(filtering.itervalues()):
        in_postfix = out_postfix = ".filtered"
    if not settings["MSAlignment"]["Enabled"]:
        out_postfix = ".unaligned" + out_postfix
        afa_ext = ".fasta"

    paml = settings["PAML"]
    sequences = common.collect_sequences(options, interval, taxa)
    sequencedir = os.path.join(options.destination, "alignments",
                               interval["Name"] + in_postfix)
    destination = os.path.join(options.destination, "paml", "codeml",
                               interval["Name"] + out_postfix)

    # Build meta-node for sequence conversion to PHYLIP format accepted by codeml
    phylip_nodes = {}
    for sequence in sequences:
        input_file = os.path.join(sequencedir, sequence + afa_ext)
        output_file = os.path.join(destination, sequence + ".phy")

        phylip_nodes[sequence] = FastaToPAMLPhyNode(
            input_file=input_file,
            output_file=output_file,
            exclude_groups=paml["codeml"]["ExcludeGroups"],
            dependencies=dependencies)

    phylip_meta = MetaNode(description  = "<FastaToPAMLPhyNodes: '%s/*.%s' -> '%s/*.phy'>" \
                           % (sequencedir, afa_ext, destination),
                           subnodes     = phylip_nodes.values(),
                           dependencies = dependencies)

    codeml_nodes = []
    for (ctl_name, ctl_file) in paml["codeml"]["Control Files"].iteritems():
        for (sequence, node) in phylip_nodes.iteritems():
            output_prefix = os.path.join(destination,
                                         sequence + ".%s" % (ctl_name, ))

            codeml = CodemlNode(control_file=ctl_file,
                                trees_file=paml["codeml"]["Tree File"],
                                sequence_file=iter(node.output_files).next(),
                                output_prefix=output_prefix,
                                dependencies=node)
            codeml_nodes.append(codeml)

    return MetaNode(description="<CodemlNodes>",
                    subnodes=codeml_nodes,
                    dependencies=phylip_meta)
Exemple #5
0
    def __init__(self, config, target, prefix, lanes, name):
        self.name = name
        self.lanes = safe_coerce_to_tuple(lanes)
        self.options = lanes[0].options
        self.folder = os.path.dirname(self.lanes[0].folder)
        self.bams = None
        self.mapdamage = None

        assert all((self.folder == os.path.dirname(lane.folder))
                   for lane in self.lanes)
        assert all((self.options == lane.options) for lane in self.lanes)

        lane_bams = self._collect_bams_by_type(self.lanes)
        self.datadup_check = self._build_dataduplication_node(lane_bams)
        self.duphist = \
            self._build_duphist_nodes(config, target, prefix, lane_bams)
        pcr_duplicates = self.options["PCRDuplicates"]
        if pcr_duplicates:
            lane_bams = self._remove_pcr_duplicates(config, prefix, lane_bams,
                                                    pcr_duplicates)

        # At this point we no longer need to differentiate between types of reads
        files_and_nodes = self._collect_files_and_nodes(lane_bams)

        self.bams, self.mapdamage = \
          self._build_mapdamage_nodes(config, target, prefix, files_and_nodes)

        self.node = MetaNode(
            description="Library: %s" % os.path.basename(self.folder),
            dependencies=self.bams.values() + [self.datadup_check])
Exemple #6
0
    def __init__(self, config, prefixes, name):
        self.name     = name
        self.prefixes = safe_coerce_to_tuple(prefixes)

        self._nodes_alignment = MetaNode(description  = "Alignments:",
                                          dependencies = [prefix.node for prefix in self.prefixes])
        self._nodes_extras    = {}
Exemple #7
0
def test_metanode__nodes():
    subnodes = [Node(), Node()]
    dependencies = [Node(), Node()]
    node = MetaNode(subnodes = iter(subnodes),
                    dependencies = iter(dependencies))
    assert_equal(node.subnodes, frozenset(subnodes))
    assert_equal(node.dependencies, frozenset(dependencies))
Exemple #8
0
def build_taxa_nodes(options, genotyping, intervals, taxa, dependencies=()):
    nodes = []
    for interval in intervals.itervalues():
        interval = deepcopy(interval)
        # Override default genome (BAM file) if specified
        interval["Genome"] = common.get_genome_for_interval(interval, taxa)
        # Enforce homozygous contigs based on gender tag
        #        interval[
        interval["Homozygous Contigs"] = interval["Homozygous Contigs"][
            taxa["Gender"]]

        genotyping_method = taxa.get("Genotyping Method", "samtools").lower()
        if genotyping_method == "reference sequence":
            nodes.extend(
                build_reference_nodes(options, taxa, interval, dependencies))
        elif genotyping_method == "random sampling":
            nodes.extend(
                build_sampling_nodes(options, genotyping, taxa, interval,
                                     dependencies))
        elif genotyping_method == "samtools":
            nodes.extend(
                build_genotyping_nodes(options, genotyping, taxa, interval,
                                       dependencies))

    return MetaNode(description=taxa["Name"], dependencies=nodes)
Exemple #9
0
def _build_depth(config, target):
    nodes = []
    for prefix in target.prefixes:
        for (roi_name, roi_filename) in _get_roi(prefix, name_prefix="."):
            if roi_filename is not None:
                # ROIs require indexed access, and hence that the final BAM
                # (either raw or realigned) has been built. By default, the
                # the realigned BAM is used (based on lexical order).
                bam_files = tuple(sorted(prefix.bams.items()))
                input_files, dependencies = bam_files[-1]
            else:
                input_files = {}
                for sample in prefix.samples:
                    input_files.update(sample.bams)
                dependencies = input_files.values()
                input_files = input_files.keys()

            output_filename = "%s.%s%s.depths" % (target.name, prefix.name,
                                                  roi_name)
            output_fpath = os.path.join(config.destination, output_filename)

            node = DepthHistogramNode(config=config,
                                      target_name=target.name,
                                      input_files=input_files,
                                      regions_file=roi_filename,
                                      output_file=output_fpath,
                                      dependencies=dependencies)
            nodes.append(node)

    return MetaNode(description="DepthHistograms", subnodes=nodes)
Exemple #10
0
def build_sample_nodes(options,
                       genotyping,
                       regions_sets,
                       sample,
                       dependencies=()):
    nodes = []
    for regions in regions_sets.itervalues():
        regions = deepcopy(regions)

        # Enforce homozygous contigs based on gender tag
        regions["HomozygousContigs"] \
            = regions["HomozygousContigs"][sample["Gender"]]

        genotyping_method = sample["GenotypingMethod"].lower()
        if genotyping_method not in _GENOTYPING_METHODS:
            assert False, "Unexpected genotyping method %r for sample %r" \
                          % (genotyping_method, sample["Name"])

        genotyping_function = _GENOTYPING_METHODS[genotyping_method]
        node = genotyping_function(options=options,
                                   genotyping=genotyping[regions["Name"]],
                                   sample=sample["Name"],
                                   regions=regions,
                                   dependencies=dependencies)
        nodes.extend(node)

    return MetaNode(description=sample["Name"], dependencies=nodes)
Exemple #11
0
def test_metanode__properties():
    node = MetaNode()
    assert_equal(node.input_files, frozenset())
    assert_equal(node.output_files, frozenset())
    assert_equal(node.executables, frozenset())
    assert_equal(node.auxiliary_files, frozenset())
    assert_equal(node.requirements, frozenset())
Exemple #12
0
def test_bwa(config):
    index = _bwa_index(config)
    dependencies = []
    for algorithm in ("backtrack", "bwasw", "mem"):
        dependencies.append(_bwa_se(config, index, algorithm))
        dependencies.append(_bwa_pe(config, index, algorithm))

    return MetaNode(description="BWA",
                    dependencies=dependencies)
Exemple #13
0
def test_bwa(config):
    index = _bwa_index(config)
    aln_se = _bwa_aln_se(config, index)
    aln_pe = _bwa_aln_pe(config, index)
    sw_se = _bwa_sw_se(config, index)
    sw_pe = _bwa_sw_pe(config, index)

    return MetaNode(description="BWA",
                    dependencies=(aln_se, aln_pe, sw_se, sw_pe))
Exemple #14
0
def index_references(config, makefiles):
    references = {}
    references_bwa = {}
    references_bowtie2 = {}
    for makefile in makefiles:
        for subdd in makefile["Prefixes"].itervalues():
            reference = subdd["Reference"]
            if reference not in references:
                # Validation of the FASTA file; not blocking for the other
                # steps, as it is only expected to fail very rarely, but will
                # block subsequent analyses depending on the FASTA.
                valid_node = ValidateFASTAFilesNode(input_files=reference,
                                                    output_file=reference +
                                                    ".validated")
                # Indexing of FASTA file using 'samtools faidx'
                faidx_node = FastaIndexNode(reference)
                # Indexing of FASTA file using 'BuildSequenceDictionary.jar'
                dict_node = BuildSequenceDictNode(config=config,
                                                  reference=reference,
                                                  dependencies=(valid_node, ))

                # Indexing of FASTA file using 'bwa index'
                bwa_node = BWAIndexNode(input_file=reference,
                                        dependencies=(valid_node, ))
                # Indexing of FASTA file using ''
                bowtie2_node = Bowtie2IndexNode(input_file=reference,
                                                dependencies=(valid_node, ))

                references[reference] = \
                    MetaNode(description="Reference Sequence",
                             dependencies=(valid_node, faidx_node, dict_node))
                references_bwa[reference] = \
                    MetaNode(description="Reference Sequence",
                             dependencies=(valid_node, faidx_node,
                                           dict_node, bwa_node))
                references_bowtie2[reference] = \
                    MetaNode(description="Reference Sequence",
                             dependencies=(valid_node, faidx_node,
                                           dict_node, bowtie2_node))

            subdd["Node"] = references[reference]
            subdd["Node:BWA"] = references_bwa[reference]
            subdd["Node:Bowtie2"] = references_bowtie2[reference]
Exemple #15
0
    def __init__(self, config, prefixes, name):
        self.name     = name
        self.prefixes = safe_coerce_to_tuple(prefixes)

        self._nodes_extras    = {}
        self._nodes_alignment = MetaNode(description  = "Alignments:",
                                          dependencies = [prefix.node for prefix in self.prefixes])

        self._setup_extra_nodes("mapDamage", "mapdamage")
        self._setup_extra_nodes("Duplicate Histogram", "duphist")
Exemple #16
0
def build_variant_nodes(options,reference, group, dependencies = ()):
    gatk_outfile = os.path.join(
        options.makefile['OutDir'],"gatk.{}.{}.raw.vcf".format(
            group['Group'],reference['Label'])
    ) 
    # Build the GATK Variant Calling Node
    gatk_variants = UnifiedGenotyperNode.customize(
        reference = reference['Path'],
        infiles = [	os.path.join(options.makefile['BaseDir'],
            ind,
			ind + "."+ reference['Label'] + ".realigned.bam") 
			for ind in group['Inds']],
        outfile = gatk_outfile,
        options = options
    )

    # Build the SAMTOOLs Variant Calling Node
    samtools_outfile = os.path.join(options.makefile['OutDir'],
        "samtools.{}.{}.raw.vcf".format(
        group['Group'],reference['Label'])
    ) 
    samtools_variants = VariantNode.customize(
        reference = reference['Path'],
        infiles = [     os.path.join(options.makefile['BaseDir'],
            ind,
			ind + "."+ reference['Label'] + ".realigned.bam") 
			for ind in group['Inds']],
        outfile = samtools_outfile,
        options = options
    )
    samtools_variants = samtools_variants.build_node()
    gatk_variants = gatk_variants.build_node()
 
    # Build the Variant Filtering Nodes
    intersect_variants = VariantFilterNode.customize(
        reference = reference['Path'],
        infile = gatk_outfile,
        outfile = os.path.join(
            gatk_outfile.replace(".raw.vcf",".gatk_samtools_intersect.vcf")
        ),
        filters = {
            "--intersect_vcf" : gatk_outfile.replace(
                options.makefile['intersect_vcf']['replace'],
                options.makefile['intersect_vcf']['with']
            ),
            "--emit": "pass"
        },
        options = options,
        dependencies = [gatk_variants,samtools_variants]
    )
    intersect_variants = intersect_variants.build_node()

    return MetaNode(description = "Variant Intersection",
                dependencies = [intersect_variants]
    )
Exemple #17
0
def index_references(config, makefiles):
    references = {}
    for makefile in makefiles:
        for dd in makefile["Prefixes"].itervalues():
            reference = os.path.realpath(dd["Reference"])
            if reference not in references:
                references[reference] = \
                  MetaNode(description = "Reference Sequence",
                           dependencies = (FastaIndexNode(dd["Reference"]),
                                           BuildSequenceDictNode(config    = config,
                                                                 reference = dd["Reference"])))
            dd["Node"] = references[reference]
Exemple #18
0
    def __init__(self, config, prefix, libraries, name):
        self.name = name
        self.bams = {}
        self.libraries = safe_coerce_to_tuple(libraries)

        for library in self.libraries:
            self.bams.update(library.bams.iteritems())
        self.folder = os.path.dirname(self.libraries[0].folder)

        self.node = MetaNode(
            description="Sample: %s" % os.path.basename(self.folder),
            dependencies=[library.node for library in self.libraries])
def _adapterremoval_se(config):
    node_params = {"input_files"   : ("tests/data/raw_reads/se_reads_R1_001.fastq.gz",
                                      "tests/data/raw_reads/se_reads_R1_002.fastq.gz"),
                   "dependencies"  : config.dependencies}

    standard = SE_AdapterRemovalNode(output_prefix = os.path.join(config.destination, "se_standard"),
                                     **node_params)
    custom   = SE_AdapterRemovalNode.customize(output_prefix = os.path.join(config.destination, "se_custom"),
                                               **node_params)
    custom.command.set_option("--minlength", 30)

    return MetaNode(description  = "AdapterRemoval_SE",
                    dependencies = [standard, custom.build_node()])
Exemple #20
0
    def _setup_extra_nodes(self, name, key):
        nodes = []
        for prefix in self.prefixes:
            prefix_nodes = []
            for sample in prefix.samples:
                for library in sample.libraries:
                    value = getattr(library, key)
                    if value:
                        prefix_nodes.append(value)

            if any(prefix_nodes):
                node = MetaNode(description=prefix.name,
                                subnodes=prefix_nodes)
                nodes.append(node)

        if nodes:
            self.add_extra_nodes(name, nodes)
Exemple #21
0
def _bwa_sw_se(config, index):
    node_params = {
        "input_file_1": "tests/data/sim_reads/mate_1.fastq.gz",
        "prefix": os.path.join(config.destination, "rCRS"),
        "reference": "tests/data/rCRS.fasta",
        "dependencies": (config.dependencies, index)
    }

    standard = BWASWNode(output_file=os.path.join(config.destination,
                                                  "sw_se_standard",
                                                  "output.bam"),
                         **node_params)
    custom = BWASWNode.customize(output_file=os.path.join(
        config.destination, "sw_se_custom", "output.bam"),
                                 **node_params)
    custom.commands["aln"].set_option("-z", 10)

    return MetaNode(description="BWA SW SE",
                    dependencies=[standard, custom.build_node()])
Exemple #22
0
def _bwa_se(config, index, algorithm):
    node_params = {"input_file_1": "tests/data/sim_reads/mate_1.fastq.gz",
                   "prefix": os.path.join(config.destination, "rCRS"),
                   "reference": "tests/data/rCRS.fasta",
                   "algorithm": algorithm,
                   "dependencies": (config.dependencies, index)}
    template = os.path.join(config.destination, "%s_se_%s", "output.bam")

    std_bam = template % (algorithm, "standard")
    standard = BWANode(output_file=std_bam, **node_params).build_node()

    cus_bam = template % (algorithm, "custom")
    custom = BWANode(output_file=cus_bam, **node_params)
    custom.commands["convert"].add_option("--rg-id", "myRG")
    custom.commands["convert"].add_option("--rg", "PL:Illumina")
    custom = custom.build_node()

    return MetaNode(description="BWA %s SE" % algorithm,
                    dependencies=[standard, custom])
Exemple #23
0
def _bowtie2_aln_pe(config, index):
    node_params = {
        "input_file_1": "tests/data/sim_reads/mate_1.fastq.gz",
        "input_file_2": "tests/data/sim_reads/mate_2.fastq.gz",
        "prefix": os.path.join(config.destination, "rCRS"),
        "reference": "tests/data/rCRS.fasta",
        "dependencies": (config.dependencies, index)
    }

    standard = Bowtie2Node(output_file=os.path.join(config.destination,
                                                    "aln_pe_standard",
                                                    "output.bam"),
                           **node_params)
    custom = Bowtie2Node.customize(output_file=os.path.join(
        config.destination, "aln_pe_custom", "output.bam"),
                                   **node_params)
    custom.commands["aln"].set_option("--very-sensitive")

    return MetaNode(description="Bowtie2 aln PE",
                    dependencies=[standard, custom.build_node()])
Exemple #24
0
def _bwa_aln_se(config, index):
    node_params = {
        "input_file": "tests/data/sim_reads/mate_1.fastq.gz",
        "prefix": os.path.join(config.destination, "rCRS"),
        "reference": "tests/data/rCRS.fasta",
        "dependencies": (config.dependencies, index)
    }

    standard = SEBWANode(output_file=os.path.join(config.destination,
                                                  "aln_se_standard",
                                                  "output.bam"),
                         **node_params)
    custom = SEBWANode.customize(output_file=os.path.join(
        config.destination, "aln_se_custom", "output.bam"),
                                 **node_params)
    custom.commands["samse"].set_option(
        "-r", "@RG\tID:1\tPL:Illumina\tPU:123456\tLB:Library_1\tSM:Sample_1")

    return MetaNode(description="BWA aln SE",
                    dependencies=[standard, custom.build_node()])
Exemple #25
0
def _add_mapdamage_nodes(config, makefile, target):
    if "mapDamage" not in makefile["Options"]["Features"]:
        return

    nodes = []
    for prefix in target.prefixes:
        libraries = []
        for sample in prefix.samples:
            for library in sample.libraries:
                folder = os.path.join(config.destination, "%s.%s.mapDamage" % (target.name, prefix.name), library.name)
                libraries.append(MapDamageNode(config           = config,
                                               reference        = prefix.reference,
                                               input_files      = library.bams.keys(),
                                               output_directory = folder,
                                               dependencies     = library.bams.values()))

        nodes.append(MetaNode(description = prefix.name,
                              subnodes    = libraries))

    target.add_extra_nodes("mapDamage", nodes)
Exemple #26
0
def build_snp_list(groups,prefix,options,dependencies = ()):
    recal_files = glob.glob(
        os.path.join(options.makefile['RecalDir'],"gatk*group_only*.vcf")
    )
    VQSLOD_cutoff = float(options.makefile['VQSLOD_cutoff'])
    
    recal_merge = VariantMergeNode.customize(
        vcf_list = recal_files,
        outfile = os.path.join(options.makefile['RecalDir'],
            'RECAL_MERGED.{}.vcf'.format(prefix['Label'])
        ),
        reference = prefix['Path'],
        options = options,
        dependencies = dependencies
    )
    recal_merge = recal_merge.build_node()

    recal_filter = VariantFilterNode.customize(
         reference = prefix['Path'],
         infile = os.path.join(options.makefile['RecalDir'],
            'RECAL_MERGED.{}.vcf'.format(prefix['Label'])
         ), 
         outfile = os.path.join(options.makefile['RecalDir'],
             "RECAL_FILTERED.{}.vcf".format(prefix['Label'])
         ),
         filters = {
             "--not_within" :  '20',
             "--allelic" :  '2',
             "--repetitive" : "Repeat_Regions.txt",
             "--emit" : "pass",
             "--field" : 'AC',
             "--thresh" : '2'
         },
         options = options,
         dependencies = dependencies + [recal_merge]
    )
    recal_filter = recal_filter.build_node()

    return MetaNode(description="SNP List Node",
        dependencies = [recal_filter]
    )
Exemple #27
0
def _build_depth(config, makefile, target):
    nodes = []
    for prefix in target.prefixes:
        input_files = {}
        for sample in prefix.samples:
            input_files.update(sample.bams)

        for (aoi_name, aoi_filename) in _get_aoi(prefix, name_prefix="."):
            output_filename = os.path.join(
                config.destination,
                "%s.%s%s.depths" % (target.name, prefix.name, aoi_name))

            node = DepthHistogramNode(config=config,
                                      target_name=target.name,
                                      input_files=input_files.keys(),
                                      intervals_file=aoi_filename,
                                      output_file=output_filename,
                                      dependencies=input_files.values())
            nodes.append(node)

    return MetaNode(description="DepthHistograms", subnodes=nodes)
Exemple #28
0
def build_msa_nodes(options, settings, regions, filtering, dependencies):
    if settings["Program"].lower() != "mafft":
        raise RuntimeError("Only MAFFT support has been implemented!")

    sequencedir = os.path.join(options.destination, "alignments",
                               regions["Name"])
    # Run on full set of sequences
    sequences = regions["Sequences"][None]

    node = CollectSequencesNode(fasta_files=regions["Genotypes"],
                                destination=sequencedir,
                                sequences=sequences,
                                dependencies=dependencies)
    fasta_files = dict((filename, node) for filename in node.output_files)

    if settings["Enabled"]:
        fasta_files = {}
        algorithm = settings["MAFFT"]["Algorithm"]
        for sequence in sequences:
            input_file = os.path.join(sequencedir, sequence + ".fasta")
            output_file = os.path.join(sequencedir, sequence + ".afa")

            mafft = MAFFTNode.customize(input_file=input_file,
                                        output_file=output_file,
                                        algorithm=algorithm,
                                        dependencies=node)
            apply_options(mafft.command, settings["MAFFT"])
            fasta_files[output_file] = mafft.build_node()

        node = MetaNode(description="MAFFT",
                        subnodes=fasta_files.values(),
                        dependencies=node)

    if any(filtering.itervalues()):
        node = FilterSingletonsMetaNode(input_files=fasta_files,
                                        destination=sequencedir + ".filtered",
                                        filter_by=filtering,
                                        dependencies=node)

    return node
Exemple #29
0
    def __init__(self, config, prefix, lanes, name):
        self.name    = name
        self.lanes   = safe_coerce_to_tuple(lanes)
        self.options = lanes[0].options
        self.folder  = os.path.dirname(self.lanes[0].folder)
        self.is_rmdupped = self.options["PCRDuplicates"]
        self.is_rescaled = self.options["RescaleQualities"]
        assert all((self.folder  == os.path.dirname(lane.folder)) for lane in self.lanes)

        bams = self._collect_bams(self.lanes)
        if self.is_rmdupped:
            bams = self._remove_pcr_duplicates(config, prefix, bams)

        if self.is_rescaled:
            bams = self._rescale_quality_scores(config, prefix, bams)

        self.bams = {}
        for files_and_nodes in bams.itervalues():
            self.bams.update(files_and_nodes)

        self.node = MetaNode(description  = "Library: %s" % os.path.basename(self.folder),
                             dependencies = self.bams.values())
Exemple #30
0
def _build_examl_bootstraps(options, phylo, destination, input_alignment,
                            input_partition, dependencies):
    bootstraps = []
    num_bootstraps = phylo["ExaML"]["Bootstraps"]
    bootstrap_destination = os.path.join(destination, "bootstraps")
    bootstrap_template = os.path.join(bootstrap_destination,
                                      "bootstrap.%04i.phy")

    for bootstrap_num in xrange(num_bootstraps):
        bootstrap_alignment = bootstrap_template % (bootstrap_num, )
        bootstrap = PHYLIPBootstrapNode(input_alignment=input_alignment,
                                        input_partition=input_partition,
                                        output_alignment=bootstrap_alignment,
                                        seed=random.randint(1, 2**32 - 1),
                                        dependencies=dependencies)

        bootstrap_binary = swap_ext(bootstrap_alignment, ".binary")
        bootstrap_final = swap_ext(bootstrap_alignment, ".%s")
        bs_binary = ExaMLParserNode(input_alignment=bootstrap_alignment,
                                    input_partition=input_partition,
                                    output_file=bootstrap_binary,
                                    dependencies=bootstrap)

        bootstraps.append(
            _examl_nodes(options=options,
                         settings=phylo,
                         input_alignment=bootstrap_alignment,
                         input_partitions=input_partition,
                         input_binary=bootstrap_binary,
                         output_template=bootstrap_final,
                         dependencies=bs_binary))

    if bootstraps:
        meta = MetaNode(description="Bootstraps",
                        subnodes=bootstraps,
                        dependencies=dependencies)
        return _build_rerooted_trees(meta, phylo["RootTreesOn"])
    return None