Example #1
0
def _determine_lane_type(prefixes, data, path):
    if isinstance(data, types.StringTypes):
        return "Raw"
    elif isinstance(data, types.DictType):
        if all((key in _READ_TYPES) for key in data):
            for (key, files) in data.iteritems():
                is_paired = paths.is_paired_end(files)

                if is_paired and (key != "Paired"):
                    raise MakefileError("Error at Barcode level; Path "
                                        "includes {Pair} key, but read-type "
                                        "is not Paired:\n    "
                                        "%s:%s" % (":".join(path), key))
                elif not is_paired and (key == "Paired"):
                    raise MakefileError("Error at Barcode level; Paired pre-"
                                        "trimmed reads specified, but path "
                                        "does not contain {Pair} key:\n    "
                                        "%s:%s" % (":".join(path), key))

            return "Trimmed"
        elif all((key in prefixes) for key in data):
            return "BAMs"

    raise MakefileError("Error at Barcode level; keys must either be "
                        "prefix-names, OR 'Paired', 'Single' or 'Collapsed'. "
                        "Found: %s" % (", ".join(data),))
Example #2
0
def _build_bwa_nodes(config, parameters, input_filename, tags, options):
    if paths.is_paired_end(input_filename):
        input_file_1 = input_file_2 = input_filename
        aln_keys, sam_key = ("aln_1", "aln_2"), "sampe"
    else:
        input_file_1, input_file_2 = input_filename, ""
        aln_keys, sam_key = ("aln",), "samse"

    params = BWANode.customize(input_file_1 = input_file_1.format(Pair = 1),
                               input_file_2 = input_file_2.format(Pair = 2),
                               threads      = config.bwa_max_threads,
                               **parameters)

    for aln_key in aln_keys:
        if not options["Aligners"]["BWA"]["UseSeed"]:
            params.commands[aln_key].set_option("-l", 2**16 - 1)
        if options["QualityOffset"] in (64, "Solexa"):
            params.commands[aln_key].set_option("-I")
    _apply_aln_user_parameters(options["Aligners"]["BWA"], params, aln_keys)

    read_group = "@RG\tID:{ID}\tSM:{SM}\tLB:{LB}\tPU:{PU_src}\tPL:{PL}\tPG:{PG_lc}".format(PG_lc = tags["PG"].lower(), **tags)
    params.commands[sam_key].set_option("-r", read_group)
    params.commands["convert"].set_option('-q', options["Aligners"]["BWA"]["MinQuality"])

    cl_tag  = _build_bwa_cl_tag(options, sam_key)
    pg_tags = ("bwa:CL:%s" % (cl_tag.replace("%", "%%"),))
    params.commands["convert"].add_option("--update-pg-tag", pg_tags)

    return params.build_node()
Example #3
0
def _build_bowtie2_nodes(config, parameters, input_filename, tags, options):
    if paths.is_paired_end(input_filename):
        input_filename_1 = input_filename_2 = input_filename
    else:
        input_filename_1,  input_filename_2 = input_filename, ""

    params = Bowtie2Node.customize(input_file_1    = input_filename_1.format(Pair = 1),
                                   input_file_2    = input_filename_2.format(Pair = 2),
                                   threads         = config.bowtie2_max_threads,
                                   **parameters)

    params.commands["convert"].set_option('-q', options["Aligners"]["Bowtie2"]["MinQuality"])
    if options["QualityOffset"] == 64:
        params.commands["aln"].set_option("--phred64")
    elif options["QualityOffset"] == 33:
        params.commands["aln"].set_option("--phred33")
    else:
        params.commands["aln"].set_option("--solexa-quals")
    _apply_aln_user_parameters(options["Aligners"]["Bowtie2"], params, ("aln",))

    pg_tag = "bowtie2:CL:%s" % (_build_bowtie_cl_tag(options).replace("%", "%%"),)
    params.commands["convert"].add_option("--update-pg-tag", pg_tag)

    params.commands["aln"].set_option("--rg-id", tags["ID"])
    for tag_name in ("SM", "LB", "PU", "PL", "PG"):
        if tag_name == "PG":
            tag_value = tags["PG"].lower()
        elif tag_name == "PU":
            tag_value = tags["PU_src"]
        else:
            tag_value = tags[tag_name]
        params.commands["aln"].add_option("--rg", "%s:%s" % (tag_name, tag_value))

    return params.build_node()
Example #4
0
def _determine_lane_type(prefixes, data, path):
    if isinstance(data, types.StringTypes):
        return "Raw"
    elif isinstance(data, types.DictType):
        if all((key in _READ_TYPES) for key in data):
            for (key, files) in data.iteritems():
                is_paired = paths.is_paired_end(files)

                if is_paired and (key != "Paired"):
                    raise MakefileError("Error at Barcode level; Path "
                                        "includes {Pair} key, but read-type "
                                        "is not Paired:\n    "
                                        "%s:%s" % (":".join(path), key))
                elif not is_paired and (key == "Paired"):
                    raise MakefileError("Error at Barcode level; Paired pre-"
                                        "trimmed reads specified, but path "
                                        "does not contain {Pair} key:\n    "
                                        "%s:%s" % (":".join(path), key))

            return "Trimmed"
        elif all((key in prefixes) for key in data):
            return "BAMs"

    raise MakefileError("Error at Barcode level; keys must either be "
                        "prefix-names, OR 'Paired', 'Single' or 'Collapsed'. "
                        "Found: %s" % (", ".join(data), ))
Example #5
0
def _build_bwa_nodes(config, parameters, input_filename, tags, options):
    if paths.is_paired_end(input_filename):
        input_file_1 = input_file_2 = input_filename
        aln_keys, sam_key = ("aln_1", "aln_2"), "sampe"
    else:
        input_file_1, input_file_2 = input_filename, ""
        aln_keys, sam_key = ("aln", ), "samse"

    params = BWANode.customize(input_file_1=input_file_1.format(Pair=1),
                               input_file_2=input_file_2.format(Pair=2),
                               threads=config.bwa_max_threads,
                               **parameters)

    for aln_key in aln_keys:
        if not options["Aligners"]["BWA"]["UseSeed"]:
            params.commands[aln_key].set_option("-l", 2**16 - 1)
        if options["QualityOffset"] in (64, "Solexa"):
            params.commands[aln_key].set_option("-I")
    _apply_aln_user_parameters(options["Aligners"]["BWA"], params, aln_keys)

    read_group = "@RG\tID:{ID}\tSM:{SM}\tLB:{LB}\tPU:{PU_src}\tPL:{PL}\tPG:{PG_lc}".format(
        PG_lc=tags["PG"].lower(), **tags)
    params.commands[sam_key].set_option("-r", read_group)
    params.commands["convert"].set_option(
        '-q', options["Aligners"]["BWA"]["MinQuality"])

    cl_tag = _build_bwa_cl_tag(options, sam_key)
    pg_tags = ("bwa:CL:%s" % (cl_tag.replace("%", "%%"), ))
    params.commands["convert"].add_option("--update-pg-tag", pg_tags)

    return params.build_node()
Example #6
0
    def _init_unaligned_lane(self, config, prefix, record):
        aln_key, aln_func = _select_aligner(record["Options"])
        prefix_key = "Node:%s" % (aln_key, )

        postfix = [
            "minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"]
        ]
        if not record["Options"]["Aligners"][aln_key].get("UseSeed", True):
            postfix.append("noSeed")

        for (key, input_filename) in self.reads.files.iteritems():
            # Common parameters between BWA / Bowtie2
            output_filename = os.path.join(
                self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix)))
            parameters = {
                "output_file": output_filename,
                "prefix": prefix["Path"],
                "reference": prefix["Reference"],
                "input_file_1": input_filename,
                "input_file_2": None,
                "dependencies": self.reads.nodes + (prefix[prefix_key], )
            }

            if paths.is_paired_end(input_filename):
                parameters["input_file_1"] = input_filename.format(Pair=1)
                parameters["input_file_2"] = input_filename.format(Pair=2)

            alignment_obj = aln_func(config=config,
                                     parameters=parameters,
                                     tags=self.tags,
                                     options=record["Options"])

            alignment_opts = record["Options"]["Aligners"][aln_key]
            alignment_obj.commands["convert"].set_option(
                '-q', alignment_opts["MinQuality"])
            if alignment_opts["FilterUnmappedReads"]:
                alignment_obj.commands["convert"].set_option('-F', "0x4")

            alignment_node = alignment_obj.build_node()
            validated_node = index_and_validate_bam(config, prefix,
                                                    alignment_node)

            self.bams[key] = {output_filename: validated_node}
Example #7
0
def _build_bowtie2_nodes(config, parameters, input_filename, tags, options):
    if paths.is_paired_end(input_filename):
        input_filename_1 = input_filename_2 = input_filename
    else:
        input_filename_1, input_filename_2 = input_filename, ""

    params = Bowtie2Node.customize(
        input_file_1=input_filename_1.format(Pair=1),
        input_file_2=input_filename_2.format(Pair=2),
        threads=config.bowtie2_max_threads,
        **parameters)

    params.commands["convert"].set_option(
        '-q', options["Aligners"]["Bowtie2"]["MinQuality"])
    if options["QualityOffset"] == 64:
        params.commands["aln"].set_option("--phred64")
    elif options["QualityOffset"] == 33:
        params.commands["aln"].set_option("--phred33")
    else:
        params.commands["aln"].set_option("--solexa-quals")
    _apply_aln_user_parameters(options["Aligners"]["Bowtie2"], params,
                               ("aln", ))

    pg_tag = "bowtie2:CL:%s" % (_build_bowtie_cl_tag(options).replace(
        "%", "%%"), )
    params.commands["convert"].add_option("--update-pg-tag", pg_tag)

    params.commands["aln"].set_option("--rg-id", tags["ID"])
    for tag_name in ("SM", "LB", "PU", "PL", "PG"):
        if tag_name == "PG":
            tag_value = tags["PG"].lower()
        elif tag_name == "PU":
            tag_value = tags["PU_src"]
        else:
            tag_value = tags[tag_name]
        params.commands["aln"].add_option("--rg",
                                          "%s:%s" % (tag_name, tag_value))

    return params.build_node()
Example #8
0
    def _init_unaligned_lane(self, config, prefix, record):
        aln_key, aln_func = _select_aligner(record["Options"])
        prefix_key = "Node:%s" % (aln_key,)

        postfix = ["minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"]]
        if not record["Options"]["Aligners"][aln_key].get("UseSeed", True):
            postfix.append("noSeed")

        for (key, input_filename) in self.reads.files.iteritems():
            # Common parameters between BWA / Bowtie2
            output_filename = os.path.join(self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix)))
            parameters = {"output_file"  : output_filename,
                          "prefix"       : prefix["Path"],
                          "reference"    : prefix["Reference"],
                          "input_file_1" : input_filename,
                          "input_file_2" : None,
                          "dependencies" : self.reads.nodes + (prefix[prefix_key],)}

            if paths.is_paired_end(input_filename):
                parameters["input_file_1"] = input_filename.format(Pair = 1)
                parameters["input_file_2"] = input_filename.format(Pair = 2)

            alignment_obj  = aln_func(config           = config,
                                      parameters       = parameters,
                                      tags             = self.tags,
                                      options          = record["Options"])

            alignment_opts = record["Options"]["Aligners"][aln_key]
            alignment_obj.commands["convert"].set_option('-q', alignment_opts["MinQuality"])
            if alignment_opts["FilterUnmappedReads"]:
                alignment_obj.commands["convert"].set_option('-F', "0x4")

            alignment_node = alignment_obj.build_node()
            validated_node = index_and_validate_bam(config, prefix, alignment_node)

            self.bams[key] = {output_filename : validated_node}