def __init__(self, config, prefix, node, log_file=None): input_file, has_index = self._get_input_file(node) subnodes, dependencies = [node], node.dependencies if not has_index: node = BAMIndexNode(infile=input_file, dependencies=node) subnodes.append(node) validation_params = ValidateBAMNode.customize(config=config, input_bam=input_file, output_log=log_file, dependencies=node) # Check MD tags against reference sequence # FIXME: Disabled due to issues with Picard/Samtools disagreeing, # backwards compatibility. See the discussion at # http://sourceforge.net/mailarchive/message.php?msg_id=31348639 # validation_params.command.set_kwargs(IN_REF=prefix["Reference"]) # validation_params.command.add_option("R", "%(IN_REF)s", sep="=") # Ignored since we may filter out misses and low-quality hits during # mapping, which leads to a large proportion of missing PE mates. validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND", sep="=") # Ignored due to high rate of false positives for lanes with few hits, # where high-quality reads may cause mis-identification of qualities validation_params.command.add_option("IGNORE", "INVALID_QUALITY_FORMAT", sep="=") subnodes.append(validation_params.build_node()) description = "<w/Validation: " + str(subnodes[0])[1:] MetaNode.__init__(self, description=description, subnodes=subnodes, dependencies=dependencies)
def __init__(self, config, prefix, node, log_file=None): input_file, has_index = self._get_input_file(node) subnodes, dependencies = [node], node.dependencies if not has_index: node = BAMIndexNode(infile=input_file, dependencies=node) subnodes.append(node) validation_params = ValidateBAMNode.customize(config=config, input_bam=input_file, output_log=log_file, dependencies=node) # Check MD tags against reference sequence # FIXME: Disabled due to issues with Picard/Samtools disagreeing, backwards compatibility. # validation_params.command.set_kwargs(IN_REFERENCE = prefix["Reference"]) # validation_params.command.add_option("R", "%(IN_REFERENCE)s", sep = "=") # Ignored since we filter out misses and low-quality hits during mapping, which # leads to a large proportion of missing mates for PE reads. validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND", sep="=") # Ignored due to high rate of false positives for lanes with few hits, where # high-quality reads may case ValidateSamFile to mis-identify the qualities validation_params.command.add_option("IGNORE", "INVALID_QUALITY_FORMAT", sep="=") subnodes.append(validation_params.build_node()) description = "<w/Validation: " + str(subnodes[0])[1:] MetaNode.__init__(self, description=description, subnodes=subnodes, dependencies=dependencies)
def index_and_validate_bam(config, prefix, node, log_file=None): input_file, has_index = _get_input_file(node) if not has_index: node = BAMIndexNode(infile=input_file, dependencies=node) validation_params = ValidateBAMNode.customize(config=config, input_bam=input_file, output_log=log_file, dependencies=node) # Check MD tags against reference sequence # FIXME: Disabled due to issues with Picard/Samtools disagreeing, # backwards compatibility. See the discussion at # http://sourceforge.net/mailarchive/message.php?msg_id=31348639 # validation_params.command.set_kwargs(IN_REF=prefix["Reference"]) # validation_params.command.add_option("R", "%(IN_REF)s", sep="=") # Ignored since we may filter out misses and low-quality hits during # mapping, which leads to a large proportion of missing PE mates. validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND", sep="=") # Ignored due to high rate of false positives for lanes with few hits, # where high-quality reads may cause mis-identification of qualities validation_params.command.add_option("IGNORE", "INVALID_QUALITY_FORMAT", sep="=") node = validation_params.build_node() return node
def _test_validate_bams(config): node_params = {"config" : config, "input_bam" : "tests/data/alignments/library_1.bam", "dependencies" : config.dependencies} standard = ValidateBAMNode(output_log = os.path.join(config.destination, "validate_standard", "log.txt"), **node_params) custom = ValidateBAMNode.customize(output_log = os.path.join(config.destination, "validate_custom", "log.txt"), **node_params) custom.command.set_option("IGNORE_WARNINGS", "True", sep = "=") return MetaNode(description = "ValidateSamFile", dependencies = [standard, custom.build_node()])