Beispiel #1
0
def parseFASTAFilteringCommandLineOptions(args, reads):
    """
    Examine parsed FASTA filtering command-line options and return filtered
    reads.

    @param args: An argparse namespace, as returned by the argparse
        C{parse_args} function.
    @param reads: A C{Reads} instance to filter.
    @return: The filtered C{Reads} instance.
    """
    keepSequences = (
        parseRangeString(args.keepSequences, convertToZeroBased=True)
        if args.keepSequences else None)

    removeSequences = (
        parseRangeString(args.removeSequences, convertToZeroBased=True)
        if args.removeSequences else None)

    return reads.filter(
        minLength=args.minLength, maxLength=args.maxLength,
        whitelist=set(args.whitelist) if args.whitelist else None,
        blacklist=set(args.blacklist) if args.blacklist else None,
        whitelistFile=args.whitelistFile, blacklistFile=args.blacklistFile,
        titleRegex=args.titleRegex,
        negativeTitleRegex=args.negativeTitleRegex,
        keepSequences=keepSequences, removeSequences=removeSequences,
        head=args.head, removeDuplicates=args.removeDuplicates,
        removeDuplicatesById=args.removeDuplicatesById,
        randomSubset=args.randomSubset, trueLength=args.trueLength,
        sampleFraction=args.sampleFraction,
        sequenceNumbersFile=args.sequenceNumbersFile)
Beispiel #2
0
def parseFASTAEditingCommandLineOptions(args, reads):
    """
    Examine parsed FASTA editing command-line options and return information
    about kept sites and sequences.

    @param args: An argparse namespace, as returned by the argparse
        C{parse_args} function.
    @param reads: A C{Reads} instance to filter.
    @return: The filtered C{Reads} instance.
    """
    removeGaps = args.removeGaps
    removeDescriptions = args.removeDescriptions
    truncateTitlesAfter = args.truncateTitlesAfter
    keepSites = (
        parseRangeString(args.keepSites, convertToZeroBased=True)
        if args.keepSites else None)

    if args.keepSitesFile:
        keepSites = keepSites or set()
        with open(args.keepSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    keepSites.update(
                        parseRangeString(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Keep sites file %r line %d could not be parsed: '
                        '%s' % (args.keepSitesFile, lineNumber, e))

    removeSites = (
        parseRangeString(args.removeSites, convertToZeroBased=True)
        if args.removeSites else None)

    if args.removeSitesFile:
        removeSites = removeSites or set()
        with open(args.removeSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    removeSites.update(
                        parseRangeString(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Remove sites file %r line %d parse error: %s'
                        % (args.removeSitesFile, lineNumber, e))

    return reads.filter(
        removeGaps=removeGaps,
        truncateTitlesAfter=truncateTitlesAfter,
        removeDescriptions=removeDescriptions,
        idLambda=args.idLambda, readLambda=args.readLambda,
        keepSites=keepSites, removeSites=removeSites,
        reverse=args.reverse, reverseComplement=args.reverseComplement)
Beispiel #3
0
def parseFASTAEditingCommandLineOptions(args, reads):
    """
    Examine parsed FASTA editing command-line options and return information
    about kept sites and sequences.

    @param args: An argparse namespace, as returned by the argparse
        C{parse_args} function.
    @param reads: A C{Reads} instance to filter.
    @return: The filtered C{Reads} instance.
    """
    removeGaps = args.removeGaps
    removeDescriptions = args.removeDescriptions
    truncateTitlesAfter = args.truncateTitlesAfter
    keepSites = (parseRangeString(args.keepSites, convertToZeroBased=True)
                 if args.keepSites else None)

    if args.keepSitesFile:
        keepSites = keepSites or set()
        with open(args.keepSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    keepSites.update(
                        parseRangeString(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Keep sites file %r line %d could not be parsed: '
                        '%s' % (args.keepSitesFile, lineNumber, e))

    removeSites = (parseRangeString(args.removeSites, convertToZeroBased=True)
                   if args.removeSites else None)

    if args.removeSitesFile:
        removeSites = removeSites or set()
        with open(args.removeSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    removeSites.update(
                        parseRangeString(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Remove sites file %r line %d parse error: %s' %
                        (args.removeSitesFile, lineNumber, e))

    return reads.filter(removeGaps=removeGaps,
                        truncateTitlesAfter=truncateTitlesAfter,
                        removeDescriptions=removeDescriptions,
                        idLambda=args.idLambda,
                        readLambda=args.readLambda,
                        keepSites=keepSites,
                        removeSites=removeSites,
                        reverse=args.reverse,
                        reverseComplement=args.reverseComplement)
Beispiel #4
0
 def testZeroConversion(self):
     """
     If we ask for zero conversion, the result must be as expected.
     """
     self.assertEqual({3, 5, 6, 7, 8, 9, 10, 11},
                      parseRangeString('6-8,9,10-12,4',
                                       convertToZeroBased=True))
Beispiel #5
0
 def testZeroConversion(self):
     """
     If we ask for zero conversion, the result must be as expected.
     """
     self.assertEqual({3, 5, 6, 7, 8, 9, 10, 11},
                      parseRangeString('6-8,9,10-12,4',
                                       convertToZeroBased=True))
Beispiel #6
0
def parseFASTAFilteringCommandLineOptions(args, reads):
    """
    Examine parsed FASTA filtering command-line options and return filtered
    reads.

    @param args: An argparse namespace, as returned by the argparse
        C{parse_args} function.
    @param reads: A C{Reads} instance to filter.
    @return: The filtered C{Reads} instance.
    """
    keepSequences = (parseRangeString(args.keepSequences,
                                      convertToZeroBased=True)
                     if args.keepSequences else None)

    removeSequences = (parseRangeString(args.removeSequences,
                                        convertToZeroBased=True)
                       if args.removeSequences else None)

    return reads.filter(
        minLength=args.minLength,
        maxLength=args.maxLength,
        whitelist=set(args.whitelist) if args.whitelist else None,
        blacklist=set(args.blacklist) if args.blacklist else None,
        whitelistFile=args.whitelistFile,
        blacklistFile=args.blacklistFile,
        titleRegex=args.titleRegex,
        negativeTitleRegex=args.negativeTitleRegex,
        keepSequences=keepSequences,
        removeSequences=removeSequences,
        head=args.head,
        removeDuplicates=args.removeDuplicates,
        removeDuplicatesById=args.removeDuplicatesById,
        randomSubset=args.randomSubset,
        trueLength=args.trueLength,
        sampleFraction=args.sampleFraction,
        sequenceNumbersFile=args.sequenceNumbersFile)
Beispiel #7
0
 def testSingleRange(self):
     """
     A single range must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-10'))
Beispiel #8
0
 def testSingleNumberSpaceBeforeAndAfter(self):
     """
     A single number preceeded and followed by whitespace must result in
     the expected set.
     """
     self.assertEqual({6}, parseRangeString(' 6  '))
Beispiel #9
0
 def testSingleNumber(self):
     """
     A single number must result in the expected set.
     """
     self.assertEqual({6}, parseRangeString('6'))
if args.align:
    len1, len2 = map(len, reads)
    if len1 == len2:
        print('Pre-alignment, sequence lengths were identical: %s' % len1)
    else:
        print('Pre-alignment, sequence lengths: %d, %d (difference %d)' %
              (len1, len2, abs(len1 - len2)))

    # Align.
    reads = needle(reads)

    if args.alignmentFile:
        assert reads.save(args.alignmentFile) == 2

offsets = (parseRangeString(args.sites, convertToZeroBased=True)
           if args.sites else None)

read1, read2 = reads
len1, len2 = map(len, reads)
identicalLengths = len1 == len2

# Sanity check.
if args.align:
    assert identicalLengths

match = compareDNAReads(read1,
                        read2,
                        matchAmbiguous=(not args.strict),
                        offsets=offsets)
Beispiel #11
0
 def testTwoRangesAndTwoNumbers(self):
     """
     Two ranges and two numbers must result in the expected set.
     """
     self.assertEqual({4, 6, 7, 8, 9, 10, 11, 12},
                      parseRangeString('6-8,9,10-12,4'))
Beispiel #12
0
 def testTwoOverlappingRanges(self):
     """
     Two overlapping ranges must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-9,7-10'))
Beispiel #13
0
 def testTwoOverlappingRanges(self):
     """
     Two overlapping ranges must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-9,7-10'))
Beispiel #14
0
 def testTwoRanges(self):
     """
     Two ranges must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-8,9-10'))
Beispiel #15
0
 def testSingleRangeWithSpaceBeforeAfterHyphen(self):
     """
     A single range with spaces before and after the hyphen must result in
     the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6 - 10'))
Beispiel #16
0
 def testSingleRange(self):
     """
     A single range must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-10'))
Beispiel #17
0
 def testSingleNumberSpaceBeforeAndAfter(self):
     """
     A single number preceeded and followed by whitespace must result in
     the expected set.
     """
     self.assertEqual({6}, parseRangeString(' 6  '))
Beispiel #18
0
 def testSingleNumber(self):
     """
     A single number must result in the expected set.
     """
     self.assertEqual({6}, parseRangeString('6'))
Beispiel #19
0
 def testSingleRangeWithSpaceBeforeAfterHyphen(self):
     """
     A single range with spaces before and after the hyphen must result in
     the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6 - 10'))
Beispiel #20
0
 def testTwoRanges(self):
     """
     Two ranges must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 9, 10}, parseRangeString('6-8,9-10'))
Beispiel #21
0
def parseFASTAFilteringCommandLineOptions(args, reads):
    """
    Examine parsed command-line options and return information about kept
    sites and sequences.

    @param args: An argparse namespace, as returned by the argparse
        C{parse_args} function.
    @param reads: A C{Reads} instance to filter.
    @return: The filtered C{Reads} instance.
    """
    keepSequences = (
        parseRangeString(args.keepSequences, convertToZeroBased=True)
        if args.keepSequences else None)

    removeSequences = (
        parseRangeString(args.removeSequences, convertToZeroBased=True)
        if args.removeSequences else None)

    keepSites = (
        parseRangeString(args.keepSites, convertToZeroBased=True)
        if args.keepSites else None)

    if args.keepSitesFile:
        keepSites = keepSites or set()
        with open(args.keepSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    keepSites.update(
                        parseRangeString(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Keep sites file %r line %d could not be parsed: %s'
                        % (args.keepSitesFile, lineNumber, e))

    removeSites = (
        parseRangeString(args.removeSites, convertToZeroBased=True)
        if args.removeSites else None)

    if args.removeSitesFile:
        removeSites = removeSites or set()
        with open(args.removeSitesFile) as fp:
            for lineNumber, line in enumerate(fp):
                try:
                    removeSites.update(
                        parseRangeString(line, convertToZeroBased=True))
                except ValueError as e:
                    raise ValueError(
                        'Remove sites file %r line %d parse error: %s'
                        % (args.removeSitesFile, lineNumber, e))

    return reads.filter(
        minLength=args.minLength, maxLength=args.maxLength,
        removeGaps=args.removeGaps,
        whitelist=set(args.whitelist) if args.whitelist else None,
        blacklist=set(args.blacklist) if args.blacklist else None,
        whitelistFile=args.whitelistFile, blacklistFile=args.blacklistFile,
        titleRegex=args.titleRegex,
        negativeTitleRegex=args.negativeTitleRegex,
        truncateTitlesAfter=args.truncateTitlesAfter,
        keepSequences=keepSequences, removeSequences=removeSequences,
        head=args.head, removeDuplicates=args.removeDuplicates,
        removeDuplicatesById=args.removeDuplicatesById,
        removeDescriptions=args.removeDescriptions,
        randomSubset=args.randomSubset, trueLength=args.trueLength,
        sampleFraction=args.sampleFraction,
        sequenceNumbersFile=args.sequenceNumbersFile,
        idLambda=args.idLambda, readLambda=args.readLambda,
        keepSites=keepSites, removeSites=removeSites)
Beispiel #22
0
 def testTwoRangesAndANumber(self):
     """
     Two ranges and a number must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 10}, parseRangeString('6-8,10'))
Beispiel #23
0
 def testTwoRangesAndTwoNumbers(self):
     """
     Two ranges and two numbers must result in the expected set.
     """
     self.assertEqual({4, 6, 7, 8, 9, 10, 11, 12},
                      parseRangeString('6-8,9,10-12,4'))
Beispiel #24
0
 def testTwoRangesAndANumber(self):
     """
     Two ranges and a number must result in the expected set.
     """
     self.assertEqual({6, 7, 8, 10}, parseRangeString('6-8,10'))
Beispiel #25
0
if args.align:
    len1, len2 = map(len, reads)
    if len1 == len2:
        print('Pre-alignment, sequence lengths were identical: %s' % len1)
    else:
        print('Pre-alignment, sequence lengths: %d, %d (difference %d)' % (
            len1, len2, abs(len1 - len2)))

    # Align.
    reads = needle(reads)

    if args.alignmentFile:
        assert reads.save(args.alignmentFile) == 2

offsets = (parseRangeString(args.sites, convertToZeroBased=True)
           if args.sites else None)

read1, read2 = reads
len1, len2 = map(len, reads)
identicalLengths = len1 == len2

# Sanity check.
if args.align:
    assert identicalLengths

match = compareDNAReads(read1, read2, matchAmbiguous=(not args.strict),
                        offsets=offsets)

x = 'Post-alignment, sequence' if args.align else 'Sequence'
if identicalLengths: