def parseFASTAFilteringCommandLineOptions(args, reads): """ Examine parsed FASTA filtering command-line options and return filtered reads. @param args: An argparse namespace, as returned by the argparse C{parse_args} function. @param reads: A C{Reads} instance to filter. @return: The filtered C{Reads} instance. """ keepSequences = ( parseRangeString(args.keepSequences, convertToZeroBased=True) if args.keepSequences else None) removeSequences = ( parseRangeString(args.removeSequences, convertToZeroBased=True) if args.removeSequences else None) return reads.filter( minLength=args.minLength, maxLength=args.maxLength, whitelist=set(args.whitelist) if args.whitelist else None, blacklist=set(args.blacklist) if args.blacklist else None, whitelistFile=args.whitelistFile, blacklistFile=args.blacklistFile, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, keepSequences=keepSequences, removeSequences=removeSequences, head=args.head, removeDuplicates=args.removeDuplicates, removeDuplicatesById=args.removeDuplicatesById, randomSubset=args.randomSubset, trueLength=args.trueLength, sampleFraction=args.sampleFraction, sequenceNumbersFile=args.sequenceNumbersFile)
def parseFASTAEditingCommandLineOptions(args, reads): """ Examine parsed FASTA editing command-line options and return information about kept sites and sequences. @param args: An argparse namespace, as returned by the argparse C{parse_args} function. @param reads: A C{Reads} instance to filter. @return: The filtered C{Reads} instance. """ removeGaps = args.removeGaps removeDescriptions = args.removeDescriptions truncateTitlesAfter = args.truncateTitlesAfter keepSites = ( parseRangeString(args.keepSites, convertToZeroBased=True) if args.keepSites else None) if args.keepSitesFile: keepSites = keepSites or set() with open(args.keepSitesFile) as fp: for lineNumber, line in enumerate(fp): try: keepSites.update( parseRangeString(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Keep sites file %r line %d could not be parsed: ' '%s' % (args.keepSitesFile, lineNumber, e)) removeSites = ( parseRangeString(args.removeSites, convertToZeroBased=True) if args.removeSites else None) if args.removeSitesFile: removeSites = removeSites or set() with open(args.removeSitesFile) as fp: for lineNumber, line in enumerate(fp): try: removeSites.update( parseRangeString(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Remove sites file %r line %d parse error: %s' % (args.removeSitesFile, lineNumber, e)) return reads.filter( removeGaps=removeGaps, truncateTitlesAfter=truncateTitlesAfter, removeDescriptions=removeDescriptions, idLambda=args.idLambda, readLambda=args.readLambda, keepSites=keepSites, removeSites=removeSites, reverse=args.reverse, reverseComplement=args.reverseComplement)
def parseFASTAEditingCommandLineOptions(args, reads): """ Examine parsed FASTA editing command-line options and return information about kept sites and sequences. @param args: An argparse namespace, as returned by the argparse C{parse_args} function. @param reads: A C{Reads} instance to filter. @return: The filtered C{Reads} instance. """ removeGaps = args.removeGaps removeDescriptions = args.removeDescriptions truncateTitlesAfter = args.truncateTitlesAfter keepSites = (parseRangeString(args.keepSites, convertToZeroBased=True) if args.keepSites else None) if args.keepSitesFile: keepSites = keepSites or set() with open(args.keepSitesFile) as fp: for lineNumber, line in enumerate(fp): try: keepSites.update( parseRangeString(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Keep sites file %r line %d could not be parsed: ' '%s' % (args.keepSitesFile, lineNumber, e)) removeSites = (parseRangeString(args.removeSites, convertToZeroBased=True) if args.removeSites else None) if args.removeSitesFile: removeSites = removeSites or set() with open(args.removeSitesFile) as fp: for lineNumber, line in enumerate(fp): try: removeSites.update( parseRangeString(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Remove sites file %r line %d parse error: %s' % (args.removeSitesFile, lineNumber, e)) return reads.filter(removeGaps=removeGaps, truncateTitlesAfter=truncateTitlesAfter, removeDescriptions=removeDescriptions, idLambda=args.idLambda, readLambda=args.readLambda, keepSites=keepSites, removeSites=removeSites, reverse=args.reverse, reverseComplement=args.reverseComplement)
def testZeroConversion(self): """ If we ask for zero conversion, the result must be as expected. """ self.assertEqual({3, 5, 6, 7, 8, 9, 10, 11}, parseRangeString('6-8,9,10-12,4', convertToZeroBased=True))
def parseFASTAFilteringCommandLineOptions(args, reads): """ Examine parsed FASTA filtering command-line options and return filtered reads. @param args: An argparse namespace, as returned by the argparse C{parse_args} function. @param reads: A C{Reads} instance to filter. @return: The filtered C{Reads} instance. """ keepSequences = (parseRangeString(args.keepSequences, convertToZeroBased=True) if args.keepSequences else None) removeSequences = (parseRangeString(args.removeSequences, convertToZeroBased=True) if args.removeSequences else None) return reads.filter( minLength=args.minLength, maxLength=args.maxLength, whitelist=set(args.whitelist) if args.whitelist else None, blacklist=set(args.blacklist) if args.blacklist else None, whitelistFile=args.whitelistFile, blacklistFile=args.blacklistFile, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, keepSequences=keepSequences, removeSequences=removeSequences, head=args.head, removeDuplicates=args.removeDuplicates, removeDuplicatesById=args.removeDuplicatesById, randomSubset=args.randomSubset, trueLength=args.trueLength, sampleFraction=args.sampleFraction, sequenceNumbersFile=args.sequenceNumbersFile)
def testSingleRange(self): """ A single range must result in the expected set. """ self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-10'))
def testSingleNumberSpaceBeforeAndAfter(self): """ A single number preceeded and followed by whitespace must result in the expected set. """ self.assertEqual({6}, parseRangeString(' 6 '))
def testSingleNumber(self): """ A single number must result in the expected set. """ self.assertEqual({6}, parseRangeString('6'))
if args.align: len1, len2 = map(len, reads) if len1 == len2: print('Pre-alignment, sequence lengths were identical: %s' % len1) else: print('Pre-alignment, sequence lengths: %d, %d (difference %d)' % (len1, len2, abs(len1 - len2))) # Align. reads = needle(reads) if args.alignmentFile: assert reads.save(args.alignmentFile) == 2 offsets = (parseRangeString(args.sites, convertToZeroBased=True) if args.sites else None) read1, read2 = reads len1, len2 = map(len, reads) identicalLengths = len1 == len2 # Sanity check. if args.align: assert identicalLengths match = compareDNAReads(read1, read2, matchAmbiguous=(not args.strict), offsets=offsets)
def testTwoRangesAndTwoNumbers(self): """ Two ranges and two numbers must result in the expected set. """ self.assertEqual({4, 6, 7, 8, 9, 10, 11, 12}, parseRangeString('6-8,9,10-12,4'))
def testTwoOverlappingRanges(self): """ Two overlapping ranges must result in the expected set. """ self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-9,7-10'))
def testTwoRanges(self): """ Two ranges must result in the expected set. """ self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-8,9-10'))
def testSingleRangeWithSpaceBeforeAfterHyphen(self): """ A single range with spaces before and after the hyphen must result in the expected set. """ self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6 - 10'))
def parseFASTAFilteringCommandLineOptions(args, reads): """ Examine parsed command-line options and return information about kept sites and sequences. @param args: An argparse namespace, as returned by the argparse C{parse_args} function. @param reads: A C{Reads} instance to filter. @return: The filtered C{Reads} instance. """ keepSequences = ( parseRangeString(args.keepSequences, convertToZeroBased=True) if args.keepSequences else None) removeSequences = ( parseRangeString(args.removeSequences, convertToZeroBased=True) if args.removeSequences else None) keepSites = ( parseRangeString(args.keepSites, convertToZeroBased=True) if args.keepSites else None) if args.keepSitesFile: keepSites = keepSites or set() with open(args.keepSitesFile) as fp: for lineNumber, line in enumerate(fp): try: keepSites.update( parseRangeString(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Keep sites file %r line %d could not be parsed: %s' % (args.keepSitesFile, lineNumber, e)) removeSites = ( parseRangeString(args.removeSites, convertToZeroBased=True) if args.removeSites else None) if args.removeSitesFile: removeSites = removeSites or set() with open(args.removeSitesFile) as fp: for lineNumber, line in enumerate(fp): try: removeSites.update( parseRangeString(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Remove sites file %r line %d parse error: %s' % (args.removeSitesFile, lineNumber, e)) return reads.filter( minLength=args.minLength, maxLength=args.maxLength, removeGaps=args.removeGaps, whitelist=set(args.whitelist) if args.whitelist else None, blacklist=set(args.blacklist) if args.blacklist else None, whitelistFile=args.whitelistFile, blacklistFile=args.blacklistFile, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, truncateTitlesAfter=args.truncateTitlesAfter, keepSequences=keepSequences, removeSequences=removeSequences, head=args.head, removeDuplicates=args.removeDuplicates, removeDuplicatesById=args.removeDuplicatesById, removeDescriptions=args.removeDescriptions, randomSubset=args.randomSubset, trueLength=args.trueLength, sampleFraction=args.sampleFraction, sequenceNumbersFile=args.sequenceNumbersFile, idLambda=args.idLambda, readLambda=args.readLambda, keepSites=keepSites, removeSites=removeSites)
def testTwoRangesAndANumber(self): """ Two ranges and a number must result in the expected set. """ self.assertEqual({6, 7, 8, 10}, parseRangeString('6-8,10'))
if args.align: len1, len2 = map(len, reads) if len1 == len2: print('Pre-alignment, sequence lengths were identical: %s' % len1) else: print('Pre-alignment, sequence lengths: %d, %d (difference %d)' % ( len1, len2, abs(len1 - len2))) # Align. reads = needle(reads) if args.alignmentFile: assert reads.save(args.alignmentFile) == 2 offsets = (parseRangeString(args.sites, convertToZeroBased=True) if args.sites else None) read1, read2 = reads len1, len2 = map(len, reads) identicalLengths = len1 == len2 # Sanity check. if args.align: assert identicalLengths match = compareDNAReads(read1, read2, matchAmbiguous=(not args.strict), offsets=offsets) x = 'Post-alignment, sequence' if args.align else 'Sequence' if identicalLengths: