def validate_sam(self, samfile, reads_fastq, reference_fasta, global_alignment=True): self.assertTrue(os.path.exists(samfile)) self.assertTrue(os.path.exists(reads_fastq)) self.assertTrue(os.path.exists(reference_fasta)) return ReadAlignmentStats.getReadAlignmentStats(samFile=samfile, readFastqFile=reads_fastq, referenceFastaFile=reference_fasta, globalAlignment=global_alignment)
def validateSam(self, samFile, readFastqFile, referenceFastaFile): """Checks if a sam file is valid. """ # Check if samfile exists self.assertTrue(os.path.isfile(samFile)) #The call calculate identity will run a lot of internal consistency checks #as it calculates the alignment identity. return ReadAlignmentStats.getReadAlignmentStats(samFile, readFastqFile, referenceFastaFile, globalAlignment=True)
def validateSamfile(self, global_alignment=True): self.assertTrue(self.is_file(self.out_sam)) self.assertTrue(self.is_file(self.test_reads)) self.assertTrue(self.is_file(self.references)) return ReadAlignmentStats.getReadAlignmentStats( samFile=self.out_sam, readFastqFile=self.test_reads, referenceFastaFile=self.references, globalAlignment=global_alignment)
def main(): #Parse the inputs args/options parser = OptionParser( usage="usage: samFile, readFastqFile, referenceFastaFile [options]", version="%prog 0.1") #Options parser.add_option("--identity", dest="identity", help="Print identity of alignments", default=False, action="store_true") parser.add_option("--readCoverage", dest="readCoverage", help="Print read coverage of alignments", default=False, action="store_true") parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", help="Print mismatches per aligned base", default=False, action="store_true") parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", help="Print deletions per base of alignments", default=False, action="store_true") parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", help="Print insertions per base of alignments", default=False, action="store_true") parser.add_option( "--localAlignment", dest="localAlignment", help= "Ignore unaligned prefix and suffix of each read in making calculation", default=False, action="store_true") parser.add_option( "--printValuePerReadAlignment", dest="printValuePerReadAlignment", help="Prints the value of statistics for each read alignment", default=False, action="store_true") parser.add_option( "--noStats", dest="noStats", help= "Do not print stats (avg, median, min, max, mode) of desired statistic", default=False, action="store_true") parser.add_option( "--printAlignmentData", dest="printAlignmentData", help= "Print all stats for each read alignment in tabular format; include unaligned with --includeUnaligned", default=False, action="store_true") parser.add_option( "--includeUnaligned", dest="includeUnaligned", help="Includes unaligned reads when printing alignment data", default=False, action="store_true") addLoggingOptions(parser) #Parse the options/arguments options, args = parser.parse_args() #Setup logging setLoggingFromOptions(options) #Print help message if no input if len(sys.argv) == 1: parser.print_help() sys.exit(0) #Exit if the arguments are not what we expect if len(args) != 3: raise RuntimeError("Expected three arguments, got: %s" % " ".join(args)) #Now do the stats calculation samFile, readFastqFile, referenceFastaFile = args readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats( samFile, readFastqFile, referenceFastaFile, globalAlignment=not options.localAlignment, includeUnaligned=options.includeUnaligned) def report(values, statisticName): if not options.noStats: print "Average" + statisticName, numpy.average(values) print "Median" + statisticName, numpy.median(values) print "Min" + statisticName, min(values) print "Max" + statisticName, max(values) if options.printValuePerReadAlignment: print "Values" + statisticName, "\t".join(map(str, values)) def report_alignment_data(): name = map(lambda rAS: rAS.readName(), readAlignmentStats) ref_id = map(lambda rAS: rAS.referenceID(), readAlignmentStats) read_type = map(lambda rAS: rAS.readType(), readAlignmentStats) length = map(lambda rAS: rAS.readLength(), readAlignmentStats) identity = map(lambda rAS: rAS.identity(), readAlignmentStats) read_coverage = map(lambda rAS: rAS.readCoverage(), readAlignmentStats) ref_coverage = map(lambda rAS: rAS.referenceCoverage(), readAlignmentStats) mismatch = map(lambda rAS: rAS.mismatchesPerAlignedBase(), readAlignmentStats) insertion = map(lambda rAS: rAS.insertionsPerReadBase(), readAlignmentStats) deletion = map(lambda rAS: rAS.deletionsPerReadBase(), readAlignmentStats) mean_quality = map(lambda rAS: rAS.readMeanQuality(), readAlignmentStats) aligned = map(lambda rAS: rAS.isAligned(), readAlignmentStats) aligned_length = map(lambda rAS: rAS.alignedReadLength(), readAlignmentStats) ref_c_content = map(lambda rAS: rAS.getRefCContent(), readAlignmentStats) ref_gc_content = map(lambda rAS: rAS.getRefGcContent(), readAlignmentStats) print "\t".join(["Name", "ReferenceID", "ReadType", "Length", "Aligned", \ "AlignedLength", "Identity", "ReadCoverage", \ "ReferenceCoverage", "MismatchPerBase", \ "InsertionPerBase", "DeletionPerBase", "MeanQuality", "RefCContent", "RefGcContent"]) for read in zip(name, ref_id, read_type, length, aligned, aligned_length, \ identity, read_coverage, ref_coverage, mismatch, insertion,\ deletion, mean_quality, ref_c_content, ref_gc_content): print "\t".join(map(str, read)) if options.printAlignmentData: report_alignment_data() else: if options.identity: report(map(lambda rAS: rAS.identity(), readAlignmentStats), "Identity") if options.readCoverage: report(map(lambda rAS: rAS.readCoverage(), readAlignmentStats), "ReadCoverage") if options.mismatchesPerAlignedBase: report( map(lambda rAS: rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase") if options.deletionsPerReadBase: report( map(lambda rAS: rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase") if options.insertionsPerReadBase: report( map(lambda rAS: rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase")
def main(): #Parse the inputs args/options parser = OptionParser(usage="usage: samFile, readFastqFile, referenceFastaFile [options]", version="%prog 0.1") #Options parser.add_option("--identity", dest="identity", help="Print identity of alignments", default=False, action="store_true") parser.add_option("--readCoverage", dest="readCoverage", help="Print read coverage of alignments", default=False, action="store_true") parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", help="Print mismatches per aligned base", default=False, action="store_true") parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", help="Print deletions per base of alignments", default=False, action="store_true") parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", help="Print insertions per base of alignments", default=False, action="store_true") parser.add_option("--localAlignment", dest="localAlignment", help="Ignore unaligned prefix and suffix of each read in making calculation", default=False, action="store_true") parser.add_option("--printValuePerReadAlignment", dest="printValuePerReadAlignment", help="Prints the value of statistics for each read alignment", default=False, action="store_true") parser.add_option("--noStats", dest="noStats", help="Do not print stats (avg, median, min, max, mode) of desired statistic", default=False, action="store_true") addLoggingOptions(parser) #Parse the options/arguments options, args = parser.parse_args() #Setup logging setLoggingFromOptions(options) #Print help message if no input if len(sys.argv) == 1: parser.print_help() sys.exit(0) #Exit if the arguments are not what we expect if len(args) != 3: raise RuntimeError("Expected three arguments, got: %s" % " ".join(args)) #Now do the stats calculation samFile, readFastqFile, referenceFastaFile = args readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats(samFile, readFastqFile, referenceFastaFile, globalAlignment=not options.localAlignment) def report(values, statisticName): if not options.noStats: print "Average" + statisticName, numpy.average(values) print "Median" + statisticName, numpy.median(values) print "Min" + statisticName, min(values) print "Max" + statisticName, max(values) if options.printValuePerReadAlignment: print "Values" + statisticName, "\t".join(map(str, values)) if options.identity: report(map(lambda rAS : rAS.identity(), readAlignmentStats), "Identity") if options.readCoverage: report(map(lambda rAS : rAS.readCoverage(), readAlignmentStats), "ReadCoverage") if options.mismatchesPerAlignedBase: report(map(lambda rAS : rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase") if options.deletionsPerReadBase: report(map(lambda rAS : rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase") if options.insertionsPerReadBase: report(map(lambda rAS : rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase")
def main(): #Parse the inputs args/options parser = OptionParser(usage="usage: samFile, readFastqFile, referenceFastaFile [options]", version="%prog 0.1") #Options parser.add_option("--readIdentity", dest="readIdentity", help="Print readIdentity of alignments", default=False, action="store_true") parser.add_option("--alignmentIdentity", dest="alignmentIdentity", help="Print alignmentIdentity", default=False, action="store_true") parser.add_option("--readCoverage", dest="readCoverage", help="Print read coverage of alignments", default=False, action="store_true") parser.add_option("--mismatchesPerAlignedBase", dest="mismatchesPerAlignedBase", help="Print mismatches per aligned base", default=False, action="store_true") parser.add_option("--deletionsPerReadBase", dest="deletionsPerReadBase", help="Print deletions per base of alignments", default=False, action="store_true") parser.add_option("--insertionsPerReadBase", dest="insertionsPerReadBase", help="Print insertions per base of alignments", default=False, action="store_true") parser.add_option("--readLength", dest="readLength", help="Print read lengths of aligned reads", default=False, action="store_true") parser.add_option("--localAlignment", dest="localAlignment", help="Ignore unaligned prefix and suffix of each read in making calculation", default=False, action="store_true") parser.add_option("--printValuePerReadAlignment", dest="printValuePerReadAlignment", help="Prints the value of statistics for each read alignment", default=False, action="store_true") parser.add_option("--noStats", dest="noStats", help="Do not print stats (avg, median, min, max, mode) of desired statistic", default=False, action="store_true") addLoggingOptions(parser) #Parse the options/arguments options, args = parser.parse_args() #Setup logging setLoggingFromOptions(options) #Print help message if no input if len(sys.argv) == 1: parser.print_help() sys.exit(0) #Exit if the arguments are not what we expect if len(args) != 3: raise RuntimeError("Expected three arguments, got: %s" % " ".join(args)) #Now do the stats calculation samFile, readFastqFile, referenceFastaFile = args readAlignmentStats = ReadAlignmentStats.getReadAlignmentStats(samFile, readFastqFile, referenceFastaFile, globalAlignment=not options.localAlignment) def report(values, statisticName): if not options.noStats: print "Average" + statisticName, numpy.average(values) print "Median" + statisticName, numpy.median(values) print "Min" + statisticName, min(values) print "Max" + statisticName, max(values) if options.printValuePerReadAlignment: print "Values" + statisticName, "\t".join(map(str, values)) if options.readIdentity: report(map(lambda rAS : rAS.readIdentity(), readAlignmentStats), "ReadIdentity") if options.alignmentIdentity: report(map(lambda rAS : rAS.alignmentIdentity(), readAlignmentStats), "AlignmentIdentity") if options.readCoverage: report(map(lambda rAS : rAS.readCoverage(), readAlignmentStats), "ReadCoverage") if options.mismatchesPerAlignedBase: report(map(lambda rAS : rAS.mismatchesPerAlignedBase(), readAlignmentStats), "MismatchesPerAlignedBase") if options.deletionsPerReadBase: report(map(lambda rAS : rAS.deletionsPerReadBase(), readAlignmentStats), "DeletionsPerReadBase") if options.insertionsPerReadBase: report(map(lambda rAS : rAS.insertionsPerReadBase(), readAlignmentStats), "InsertionsPerReadBase") if options.readLength: report(map(lambda rAS : rAS.readLength(), readAlignmentStats), "ReadLength")