def __init__(self, config, prefix, node, log_file=None): input_file, has_index = self._get_input_file(node) subnodes, dependencies = [node], node.dependencies if not has_index: node = BAMIndexNode(infile=input_file, dependencies=node) subnodes.append(node) validation_params = ValidateBAMNode.customize(config=config, input_bam=input_file, output_log=log_file, dependencies=node) # Check MD tags against reference sequence # FIXME: Disabled due to issues with Picard/Samtools disagreeing, backwards compatibility. # validation_params.command.set_kwargs(IN_REFERENCE = prefix["Reference"]) # validation_params.command.add_option("R", "%(IN_REFERENCE)s", sep = "=") # Ignored since we filter out misses and low-quality hits during mapping, which # leads to a large proportion of missing mates for PE reads. validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND", sep="=") # Ignored due to high rate of false positives for lanes with few hits, where # high-quality reads may case ValidateSamFile to mis-identify the qualities validation_params.command.add_option("IGNORE", "INVALID_QUALITY_FORMAT", sep="=") subnodes.append(validation_params.build_node()) description = "<w/Validation: " + str(subnodes[0])[1:] MetaNode.__init__(self, description=description, subnodes=subnodes, dependencies=dependencies)
def __init__(self, config, prefix, node, log_file=None): input_file, has_index = self._get_input_file(node) subnodes, dependencies = [node], node.dependencies if not has_index: node = BAMIndexNode(infile=input_file, dependencies=node) subnodes.append(node) validation_params = ValidateBAMNode.customize(config=config, input_bam=input_file, output_log=log_file, dependencies=node) # Check MD tags against reference sequence # FIXME: Disabled due to issues with Picard/Samtools disagreeing, # backwards compatibility. See the discussion at # http://sourceforge.net/mailarchive/message.php?msg_id=31348639 # validation_params.command.set_kwargs(IN_REF=prefix["Reference"]) # validation_params.command.add_option("R", "%(IN_REF)s", sep="=") # Ignored since we may filter out misses and low-quality hits during # mapping, which leads to a large proportion of missing PE mates. validation_params.command.add_option("IGNORE", "MATE_NOT_FOUND", sep="=") # Ignored due to high rate of false positives for lanes with few hits, # where high-quality reads may cause mis-identification of qualities validation_params.command.add_option("IGNORE", "INVALID_QUALITY_FORMAT", sep="=") subnodes.append(validation_params.build_node()) description = "<w/Validation: " + str(subnodes[0])[1:] MetaNode.__init__(self, description=description, subnodes=subnodes, dependencies=dependencies)
def __init__(self, config, reference, infiles, outfile, intervals=None, dependencies=()): if not intervals: intervals = outfile + ".intervals" infiles = safe_coerce_to_tuple(infiles) trainer = _IndelTrainerNode(config=config, reference=reference, infiles=infiles, outfile=intervals, dependencies=dependencies) aligner = _IndelRealignerNode(config=config, reference=reference, intervals=intervals, infiles=infiles, outfile=outfile, dependencies=trainer) description = "<GATK Indel Realigner: %i files -> '%s'>" \ % (len(infiles), outfile) MetaNode.__init__(self, description=description, subnodes=[trainer, aligner], dependencies=dependencies)
def __init__(self, input_files, destination, filter_by, dependencies=()): subnodes = [] filter_by = dict(filter_by) for (filename, node) in input_files.iteritems(): output_filename = fileutils.reroot_path(destination, filename) subnodes.append(FilterSingletonsNode(input_file=filename, output_file=output_filename, filter_by=filter_by, dependencies=node)) MetaNode.__init__(self, description="<FilterSingleton: %i files -> '%s'>" % (len(subnodes), destination), subnodes=subnodes, dependencies=dependencies)
def __init__(self, rootdir, sequences, preset = "auto", subnodes = (), dependencies = ()): subnodes = [] for sequence in sequences: prefix = os.path.join(rootdir, sequence) node = MAFFTNode(infile = prefix + ".fasta", outfile = prefix + ".afa", preset = preset, dependencies = dependencies) subnodes.append(node) MetaNode.__init__(self, description = "<MAFFTAlignSequences (%s): In '%s'>" \ % (preset, rootdir), subnodes = subnodes, dependencies = dependencies)
def __init__(self, config, reference, infiles, outfile, intervals=None, dependencies=()): if not intervals: intervals = outfile + ".intervals" infiles = safe_coerce_to_tuple(infiles) trainer = _IndelTrainerNode( config=config, reference=reference, infiles=infiles, outfile=intervals, dependencies=dependencies ) aligner = _IndelRealignerNode( config=config, reference=reference, intervals=intervals, infiles=infiles, outfile=outfile, dependencies=trainer, ) description = "<GATK Indel Realigner: %i files -> '%s'>" % (len(infiles), outfile) MetaNode.__init__(self, description=description, subnodes=[trainer, aligner], dependencies=dependencies)