def parseFASTAFilteringCommandLineOptions(args, reads): """ Examine parsed FASTA filtering command-line options and return filtered reads. @param args: An argparse namespace, as returned by the argparse C{parse_args} function. @param reads: A C{Reads} instance to filter. @return: The filtered C{Reads} instance. """ keepSequences = ( parseRangeExpression(args.keepSequences, convertToZeroBased=True) if args.keepSequences else None) removeSequences = ( parseRangeExpression(args.removeSequences, convertToZeroBased=True) if args.removeSequences else None) return reads.filter( minLength=args.minLength, maxLength=args.maxLength, maxNFraction=args.maxNFraction, whitelist=set(args.whitelist) if args.whitelist else None, blacklist=set(args.blacklist) if args.blacklist else None, whitelistFile=args.whitelistFile, blacklistFile=args.blacklistFile, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, keepSequences=keepSequences, removeSequences=removeSequences, head=args.head, removeDuplicates=args.removeDuplicates, removeDuplicatesById=args.removeDuplicatesById, removeDuplicatesUseMD5=args.removeDuplicatesUseMD5, randomSubset=args.randomSubset, trueLength=args.trueLength, sampleFraction=args.sampleFraction, sequenceNumbersFile=args.sequenceNumbersFile)
def parseFASTAEditingCommandLineOptions(args, reads): """ Examine parsed FASTA editing command-line options and return information about kept sites and sequences. @param args: An argparse namespace, as returned by the argparse C{parse_args} function. @param reads: A C{Reads} instance to filter. @return: The filtered C{Reads} instance. """ removeGaps = args.removeGaps removeDescriptions = args.removeDescriptions truncateTitlesAfter = args.truncateTitlesAfter keepSites = (parseRangeExpression(args.keepSites, convertToZeroBased=True) if args.keepSites else None) if args.keepSitesFile: keepSites = keepSites or set() with open(args.keepSitesFile) as fp: for lineNumber, line in enumerate(fp): try: keepSites.update( parseRangeExpression(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Keep sites file %r line %d could not be parsed: ' '%s' % (args.keepSitesFile, lineNumber, e)) removeSites = (parseRangeExpression(args.removeSites, convertToZeroBased=True) if args.removeSites else None) if args.removeSitesFile: removeSites = removeSites or set() with open(args.removeSitesFile) as fp: for lineNumber, line in enumerate(fp): try: removeSites.update( parseRangeExpression(line, convertToZeroBased=True)) except ValueError as e: raise ValueError( 'Remove sites file %r line %d parse error: %s' % (args.removeSitesFile, lineNumber, e)) return reads.filter(removeGaps=removeGaps, truncateTitlesAfter=truncateTitlesAfter, removeDescriptions=removeDescriptions, idLambda=args.idLambda, readLambda=args.readLambda, keepSites=keepSites, removeSites=removeSites, reverse=args.reverse, reverseComplement=args.reverseComplement)
def testDoubleParens(self): """ A difference with two parentheses such as '(3-5 | 7-9) & (5-7 | 9-11)' must produce the expected set. """ self.assertEqual({5, 7, 9}, parseRangeExpression('(3-5 | 7-9) & (5-7 | 9-11)'))
def testParens(self): """ A difference with parentheses such as '(3-5 | 7-9) & 5-7' must produce the expected set. """ self.assertEqual({5, 7}, parseRangeExpression('(3-5 | 7-9) & 5-7'))
def testDifferenceWithSpaces(self): """ A difference such as 6-10 - 7-8 must produce the expected set. """ self.assertEqual({6, 9, 10}, parseRangeExpression('6-10 - 7-8'))
def testIntersection(self): """ An intersection such as 3-4 & 4-8 must produce the expected set. """ self.assertEqual({4}, parseRangeExpression('3-4 & 4-8'))
def testUnion(self): """ A union such as 3-4 | 6-8 must produce the expected set. """ self.assertEqual({3, 4, 6, 7, 8}, parseRangeExpression('3-4 | 6-8'))
def testTwoRangesWithSpace(self): """ A simple 3-4, 6-8 string must produce the expected set. """ self.assertEqual({3, 4, 6, 7, 8}, parseRangeExpression('3-4, 6-8'))
def testCommasAndRange(self): """ A simple 3,4,5-7 string must produce the expected set. """ self.assertEqual({3, 4, 5, 6, 7}, parseRangeExpression('3,4,5-7'))
def testCommas(self): """ A simple 3,4,5 string must produce the expected set. """ self.assertEqual({3, 4, 5}, parseRangeExpression('3,4,5'))
def testOneRangeZeroBased(self): """ A simple 3-4 string must produce the expected set when convertToZeroBased is True. """ self.assertEqual({2, 3}, parseRangeExpression('3-4', True))
def testOneRange(self): """ A simple 3-4 string must produce the expected set. """ self.assertEqual({3, 4}, parseRangeExpression('3-4'))
def testEmptyString(self): """ An empty string must produce an empty set. """ self.assertEqual(set(), parseRangeExpression(''))
if args.align: len1, len2 = map(len, reads) if len1 == len2: print('Pre-alignment, sequence lengths were identical: %s' % len1) else: print('Pre-alignment, sequence lengths: %d, %d (difference %d)' % (len1, len2, abs(len1 - len2))) # Align. reads = needle(reads) if args.alignmentFile: assert reads.save(args.alignmentFile) == 2 offsets = (parseRangeExpression(args.sites, convertToZeroBased=True) if args.sites else None) read1, read2 = reads len1, len2 = map(len, reads) identicalLengths = len1 == len2 # Sanity check. if args.align: assert identicalLengths match = compareAaReads(read1, read2, offsets=offsets) x = 'Post-alignment, sequence' if args.align else 'Sequence' if identicalLengths: print('%s lengths are identical: %s' % (x, len1))
parser.add_argument( 'range', metavar='NUMBER,RANGE,...', help=('The ranges of directory inc numbers that should be printed. E.g., ' '1-3,5 will output just the 1st, 2nd, 3rd, and 5th directory ' 'names. All others will be omitted. This option can include ' 'parentheses and Python set operators, e.g. ' '"(3-5 | 10-12) - 5-10".')) args = parser.parse_args() result = [] missing = [] for n in parseRangeExpression(args.range): files = glob('[DW]_[0-9][0-9][0-9][0-9][0-9][0-9]_%d_*' % n) if len(files) == 1: result.append(files[0]) else: missing.append(n) if missing: print('Found no matching directories for %s %s.' % ('index' if len(missing) == 1 else 'indices', ', '.join( map(str, sorted(missing)))), file=sys.stderr) sys.exit(1) else: print('\n'.join(result))