Beispiel #1
0
    def __init__(self, config, prefix, node, log_file=None):
        input_file, has_index = self._get_input_file(node)
        subnodes, dependencies = [node], node.dependencies
        if not has_index:
            node = BAMIndexNode(infile=input_file,
                                dependencies=node)
            subnodes.append(node)

        validation_params = ValidateBAMNode.customize(config=config,
                                                      input_bam=input_file,
                                                      output_log=log_file,
                                                      dependencies=node)
        # Check MD tags against reference sequence
        # FIXME: Disabled due to issues with Picard/Samtools disagreeing,
        #   backwards compatibility. See the discussion at
        #     http://sourceforge.net/mailarchive/message.php?msg_id=31348639
        # validation_params.command.set_kwargs(IN_REF=prefix["Reference"])
        # validation_params.command.add_option("R", "%(IN_REF)s", sep="=")

        # Ignored since we may filter out misses and low-quality hits during
        # mapping, which leads to a large proportion of missing PE mates.
        validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND",
                                             sep="=")
        # Ignored due to high rate of false positives for lanes with few hits,
        # where high-quality reads may cause mis-identification of qualities
        validation_params.command.add_option("IGNORE",
                                             "INVALID_QUALITY_FORMAT", sep="=")
        subnodes.append(validation_params.build_node())

        description = "<w/Validation: " + str(subnodes[0])[1:]
        MetaNode.__init__(self,
                          description=description,
                          subnodes=subnodes,
                          dependencies=dependencies)
Beispiel #2
0
    def __init__(self, config, prefix, node, log_file=None):
        input_file, has_index = self._get_input_file(node)
        subnodes, dependencies = [node], node.dependencies
        if not has_index:
            node = BAMIndexNode(infile=input_file, dependencies=node)
            subnodes.append(node)

        validation_params = ValidateBAMNode.customize(config=config,
                                                      input_bam=input_file,
                                                      output_log=log_file,
                                                      dependencies=node)
        # Check MD tags against reference sequence
        # FIXME: Disabled due to issues with Picard/Samtools disagreeing, backwards compatibility.
        #        validation_params.command.set_kwargs(IN_REFERENCE = prefix["Reference"])
        #        validation_params.command.add_option("R", "%(IN_REFERENCE)s", sep = "=")
        # Ignored since we filter out misses and low-quality hits during mapping, which
        # leads to a large proportion of missing mates for PE reads.
        validation_params.command.add_option("IGNORE",
                                             "MATE_NOT_FOUND",
                                             sep="=")
        # Ignored due to high rate of false positives for lanes with few hits, where
        # high-quality reads may case ValidateSamFile to mis-identify the qualities
        validation_params.command.add_option("IGNORE",
                                             "INVALID_QUALITY_FORMAT",
                                             sep="=")
        subnodes.append(validation_params.build_node())

        description = "<w/Validation: " + str(subnodes[0])[1:]
        MetaNode.__init__(self,
                          description=description,
                          subnodes=subnodes,
                          dependencies=dependencies)
Beispiel #3
0
def index_and_validate_bam(config, prefix, node, log_file=None):
    input_file, has_index = _get_input_file(node)
    if not has_index:
        node = BAMIndexNode(infile=input_file, dependencies=node)

    validation_params = ValidateBAMNode.customize(config=config,
                                                  input_bam=input_file,
                                                  output_log=log_file,
                                                  dependencies=node)
    # Check MD tags against reference sequence
    # FIXME: Disabled due to issues with Picard/Samtools disagreeing,
    #   backwards compatibility. See the discussion at
    #     http://sourceforge.net/mailarchive/message.php?msg_id=31348639
    # validation_params.command.set_kwargs(IN_REF=prefix["Reference"])
    # validation_params.command.add_option("R", "%(IN_REF)s", sep="=")

    # Ignored since we may filter out misses and low-quality hits during
    # mapping, which leads to a large proportion of missing PE mates.
    validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND", sep="=")
    # Ignored due to high rate of false positives for lanes with few hits,
    # where high-quality reads may cause mis-identification of qualities
    validation_params.command.add_option("IGNORE",
                                         "INVALID_QUALITY_FORMAT",
                                         sep="=")

    node = validation_params.build_node()
    return node
Beispiel #4
0
def _test_validate_bams(config):
    node_params = {"config" : config,
                   "input_bam"     : "tests/data/alignments/library_1.bam",
                   "dependencies"  : config.dependencies}


    standard = ValidateBAMNode(output_log = os.path.join(config.destination, "validate_standard", "log.txt"),
                               **node_params)

    custom   = ValidateBAMNode.customize(output_log = os.path.join(config.destination, "validate_custom", "log.txt"),
                                         **node_params)
    custom.command.set_option("IGNORE_WARNINGS", "True", sep = "=")


    return MetaNode(description  = "ValidateSamFile",
                    dependencies = [standard, custom.build_node()])