def _determine_lane_type(prefixes, data, path): if isinstance(data, types.StringTypes): return "Raw" elif isinstance(data, types.DictType): if all((key in _READ_TYPES) for key in data): for (key, files) in data.iteritems(): is_paired = paths.is_paired_end(files) if is_paired and (key != "Paired"): raise MakefileError("Error at Barcode level; Path " "includes {Pair} key, but read-type " "is not Paired:\n " "%s:%s" % (":".join(path), key)) elif not is_paired and (key == "Paired"): raise MakefileError("Error at Barcode level; Paired pre-" "trimmed reads specified, but path " "does not contain {Pair} key:\n " "%s:%s" % (":".join(path), key)) return "Trimmed" elif all((key in prefixes) for key in data): return "BAMs" raise MakefileError("Error at Barcode level; keys must either be " "prefix-names, OR 'Paired', 'Single' or 'Collapsed'. " "Found: %s" % (", ".join(data),))
def _build_bwa_nodes(config, parameters, input_filename, tags, options): if paths.is_paired_end(input_filename): input_file_1 = input_file_2 = input_filename aln_keys, sam_key = ("aln_1", "aln_2"), "sampe" else: input_file_1, input_file_2 = input_filename, "" aln_keys, sam_key = ("aln",), "samse" params = BWANode.customize(input_file_1 = input_file_1.format(Pair = 1), input_file_2 = input_file_2.format(Pair = 2), threads = config.bwa_max_threads, **parameters) for aln_key in aln_keys: if not options["Aligners"]["BWA"]["UseSeed"]: params.commands[aln_key].set_option("-l", 2**16 - 1) if options["QualityOffset"] in (64, "Solexa"): params.commands[aln_key].set_option("-I") _apply_aln_user_parameters(options["Aligners"]["BWA"], params, aln_keys) read_group = "@RG\tID:{ID}\tSM:{SM}\tLB:{LB}\tPU:{PU_src}\tPL:{PL}\tPG:{PG_lc}".format(PG_lc = tags["PG"].lower(), **tags) params.commands[sam_key].set_option("-r", read_group) params.commands["convert"].set_option('-q', options["Aligners"]["BWA"]["MinQuality"]) cl_tag = _build_bwa_cl_tag(options, sam_key) pg_tags = ("bwa:CL:%s" % (cl_tag.replace("%", "%%"),)) params.commands["convert"].add_option("--update-pg-tag", pg_tags) return params.build_node()
def _build_bowtie2_nodes(config, parameters, input_filename, tags, options): if paths.is_paired_end(input_filename): input_filename_1 = input_filename_2 = input_filename else: input_filename_1, input_filename_2 = input_filename, "" params = Bowtie2Node.customize(input_file_1 = input_filename_1.format(Pair = 1), input_file_2 = input_filename_2.format(Pair = 2), threads = config.bowtie2_max_threads, **parameters) params.commands["convert"].set_option('-q', options["Aligners"]["Bowtie2"]["MinQuality"]) if options["QualityOffset"] == 64: params.commands["aln"].set_option("--phred64") elif options["QualityOffset"] == 33: params.commands["aln"].set_option("--phred33") else: params.commands["aln"].set_option("--solexa-quals") _apply_aln_user_parameters(options["Aligners"]["Bowtie2"], params, ("aln",)) pg_tag = "bowtie2:CL:%s" % (_build_bowtie_cl_tag(options).replace("%", "%%"),) params.commands["convert"].add_option("--update-pg-tag", pg_tag) params.commands["aln"].set_option("--rg-id", tags["ID"]) for tag_name in ("SM", "LB", "PU", "PL", "PG"): if tag_name == "PG": tag_value = tags["PG"].lower() elif tag_name == "PU": tag_value = tags["PU_src"] else: tag_value = tags[tag_name] params.commands["aln"].add_option("--rg", "%s:%s" % (tag_name, tag_value)) return params.build_node()
def _determine_lane_type(prefixes, data, path): if isinstance(data, types.StringTypes): return "Raw" elif isinstance(data, types.DictType): if all((key in _READ_TYPES) for key in data): for (key, files) in data.iteritems(): is_paired = paths.is_paired_end(files) if is_paired and (key != "Paired"): raise MakefileError("Error at Barcode level; Path " "includes {Pair} key, but read-type " "is not Paired:\n " "%s:%s" % (":".join(path), key)) elif not is_paired and (key == "Paired"): raise MakefileError("Error at Barcode level; Paired pre-" "trimmed reads specified, but path " "does not contain {Pair} key:\n " "%s:%s" % (":".join(path), key)) return "Trimmed" elif all((key in prefixes) for key in data): return "BAMs" raise MakefileError("Error at Barcode level; keys must either be " "prefix-names, OR 'Paired', 'Single' or 'Collapsed'. " "Found: %s" % (", ".join(data), ))
def _build_bwa_nodes(config, parameters, input_filename, tags, options): if paths.is_paired_end(input_filename): input_file_1 = input_file_2 = input_filename aln_keys, sam_key = ("aln_1", "aln_2"), "sampe" else: input_file_1, input_file_2 = input_filename, "" aln_keys, sam_key = ("aln", ), "samse" params = BWANode.customize(input_file_1=input_file_1.format(Pair=1), input_file_2=input_file_2.format(Pair=2), threads=config.bwa_max_threads, **parameters) for aln_key in aln_keys: if not options["Aligners"]["BWA"]["UseSeed"]: params.commands[aln_key].set_option("-l", 2**16 - 1) if options["QualityOffset"] in (64, "Solexa"): params.commands[aln_key].set_option("-I") _apply_aln_user_parameters(options["Aligners"]["BWA"], params, aln_keys) read_group = "@RG\tID:{ID}\tSM:{SM}\tLB:{LB}\tPU:{PU_src}\tPL:{PL}\tPG:{PG_lc}".format( PG_lc=tags["PG"].lower(), **tags) params.commands[sam_key].set_option("-r", read_group) params.commands["convert"].set_option( '-q', options["Aligners"]["BWA"]["MinQuality"]) cl_tag = _build_bwa_cl_tag(options, sam_key) pg_tags = ("bwa:CL:%s" % (cl_tag.replace("%", "%%"), )) params.commands["convert"].add_option("--update-pg-tag", pg_tags) return params.build_node()
def _init_unaligned_lane(self, config, prefix, record): aln_key, aln_func = _select_aligner(record["Options"]) prefix_key = "Node:%s" % (aln_key, ) postfix = [ "minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"] ] if not record["Options"]["Aligners"][aln_key].get("UseSeed", True): postfix.append("noSeed") for (key, input_filename) in self.reads.files.iteritems(): # Common parameters between BWA / Bowtie2 output_filename = os.path.join( self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix))) parameters = { "output_file": output_filename, "prefix": prefix["Path"], "reference": prefix["Reference"], "input_file_1": input_filename, "input_file_2": None, "dependencies": self.reads.nodes + (prefix[prefix_key], ) } if paths.is_paired_end(input_filename): parameters["input_file_1"] = input_filename.format(Pair=1) parameters["input_file_2"] = input_filename.format(Pair=2) alignment_obj = aln_func(config=config, parameters=parameters, tags=self.tags, options=record["Options"]) alignment_opts = record["Options"]["Aligners"][aln_key] alignment_obj.commands["convert"].set_option( '-q', alignment_opts["MinQuality"]) if alignment_opts["FilterUnmappedReads"]: alignment_obj.commands["convert"].set_option('-F', "0x4") alignment_node = alignment_obj.build_node() validated_node = index_and_validate_bam(config, prefix, alignment_node) self.bams[key] = {output_filename: validated_node}
def _build_bowtie2_nodes(config, parameters, input_filename, tags, options): if paths.is_paired_end(input_filename): input_filename_1 = input_filename_2 = input_filename else: input_filename_1, input_filename_2 = input_filename, "" params = Bowtie2Node.customize( input_file_1=input_filename_1.format(Pair=1), input_file_2=input_filename_2.format(Pair=2), threads=config.bowtie2_max_threads, **parameters) params.commands["convert"].set_option( '-q', options["Aligners"]["Bowtie2"]["MinQuality"]) if options["QualityOffset"] == 64: params.commands["aln"].set_option("--phred64") elif options["QualityOffset"] == 33: params.commands["aln"].set_option("--phred33") else: params.commands["aln"].set_option("--solexa-quals") _apply_aln_user_parameters(options["Aligners"]["Bowtie2"], params, ("aln", )) pg_tag = "bowtie2:CL:%s" % (_build_bowtie_cl_tag(options).replace( "%", "%%"), ) params.commands["convert"].add_option("--update-pg-tag", pg_tag) params.commands["aln"].set_option("--rg-id", tags["ID"]) for tag_name in ("SM", "LB", "PU", "PL", "PG"): if tag_name == "PG": tag_value = tags["PG"].lower() elif tag_name == "PU": tag_value = tags["PU_src"] else: tag_value = tags[tag_name] params.commands["aln"].add_option("--rg", "%s:%s" % (tag_name, tag_value)) return params.build_node()
def _init_unaligned_lane(self, config, prefix, record): aln_key, aln_func = _select_aligner(record["Options"]) prefix_key = "Node:%s" % (aln_key,) postfix = ["minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"]] if not record["Options"]["Aligners"][aln_key].get("UseSeed", True): postfix.append("noSeed") for (key, input_filename) in self.reads.files.iteritems(): # Common parameters between BWA / Bowtie2 output_filename = os.path.join(self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix))) parameters = {"output_file" : output_filename, "prefix" : prefix["Path"], "reference" : prefix["Reference"], "input_file_1" : input_filename, "input_file_2" : None, "dependencies" : self.reads.nodes + (prefix[prefix_key],)} if paths.is_paired_end(input_filename): parameters["input_file_1"] = input_filename.format(Pair = 1) parameters["input_file_2"] = input_filename.format(Pair = 2) alignment_obj = aln_func(config = config, parameters = parameters, tags = self.tags, options = record["Options"]) alignment_opts = record["Options"]["Aligners"][aln_key] alignment_obj.commands["convert"].set_option('-q', alignment_opts["MinQuality"]) if alignment_opts["FilterUnmappedReads"]: alignment_obj.commands["convert"].set_option('-F', "0x4") alignment_node = alignment_obj.build_node() validated_node = index_and_validate_bam(config, prefix, alignment_node) self.bams[key] = {output_filename : validated_node}