Example #1
0
 def testNoUpperCaseSequencesOnly(self):
     """
     If upperCase is not passed to combineReads the resulting read
     sequences must have their original case.
     """
     reads = list(combineReads(None, ['id aCGt']))
     self.assertEqual([Read('id', 'aCGt')], reads)
Example #2
0
 def testUpperCaseSequencesOnly(self):
     """
     Passing upperCase=True to combineReads must result in read sequences
     being upper cased.
     """
     reads = list(combineReads(None, ['id acgt'], upperCase=True))
     self.assertEqual([Read('id', 'ACGT')], reads)
Example #3
0
 def testNoneNone(self):
     """
     A C{None} FASTA file name and None sequences results in an empty
     FastaReads instance.
     """
     reads = combineReads(None, None)
     self.assertEqual([], list(reads))
Example #4
0
 def testNoneEmpty(self):
     """
     A C{None} FASTA file name and an empty sequences list results in an
     empty FastaReads instance.
     """
     reads = list(combineReads(None, []))
     self.assertEqual([], reads)
Example #5
0
 def testSequencesOnly(self):
     """
     A C{None} FASTA file name and a non-empty sequences list results in a
     FastaReads instance with the expected read.
     """
     reads = list(combineReads(None, ['id ACGTSSS'], readClass=AARead))
     self.assertEqual([AARead('id', 'ACGTSSS')], reads)
Example #6
0
 def testNoUpperCaseSequencesOnly(self):
     """
     If upperCase is not passed to combineReads the resulting read
     sequences must have their original case.
     """
     reads = list(combineReads(None, ['id aCGt']))
     self.assertEqual([Read('id', 'aCGt')], reads)
Example #7
0
 def testSequencesOnly(self):
     """
     A C{None} FASTA file name and a non-empty sequences list results in a
     FastaReads instance with the expected read.
     """
     reads = list(combineReads(None, ['id ACGTSSS'], readClass=AARead))
     self.assertEqual([AARead('id', 'ACGTSSS')], reads)
Example #8
0
 def testNoneEmpty(self):
     """
     A C{None} FASTA file name and an empty sequences list results in an
     empty FastaReads instance.
     """
     reads = list(combineReads(None, []))
     self.assertEqual([], reads)
Example #9
0
 def testNoneNone(self):
     """
     A C{None} FASTA file name and None sequences results in an empty
     FastaReads instance.
     """
     reads = combineReads(None, None)
     self.assertEqual([], list(reads))
Example #10
0
 def testUpperCaseSequencesOnly(self):
     """
     Passing upperCase=True to combineReads must result in read sequences
     being upper cased.
     """
     reads = list(combineReads(None, ['id acgt'], upperCase=True))
     self.assertEqual([Read('id', 'ACGT')], reads)
Example #11
0
 def testDefaultReadIdPrefix(self):
     """
     A C{None} FASTA file name and a non-empty sequences list with a
     sequence that has no id results in a FastaReads instance with the
     expected read.
     """
     reads = list(combineReads(None, ['ACGT']))
     self.assertEqual([Read('command-line-read-1', 'ACGT')], reads)
Example #12
0
 def testDefaultReadIdPrefix(self):
     """
     A C{None} FASTA file name and a non-empty sequences list with a
     sequence that has no id results in a FastaReads instance with the
     expected read.
     """
     reads = list(combineReads(None, ['ACGT']))
     self.assertEqual([Read('command-line-read-1', 'ACGT')], reads)
Example #13
0
 def testUpperCaseFileOnly(self):
     """
     When passing upperCase=True and a FASTA file, the resulting
     FastaReads must have the read sequence in uppper case.
     """
     data = '\n'.join(['>id1', 'acgt'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(combineReads('filename.fasta', None, upperCase=True))
         self.assertEqual([Read('id1', 'ACGT')], reads)
Example #14
0
 def testCustomReadIdPrefix(self):
     """
     A C{None} FASTA file name and a non-empty sequences list with a
     sequence that has no id, but with a custom read id prefix, results in a
     FastaReads instance with the expected read.
     """
     reads = list(combineReads(None, ['ACGTSSS'], idPrefix='prefix-',
                  readClass=AARead))
     self.assertEqual([AARead('prefix-1', 'ACGTSSS')], reads)
Example #15
0
 def testFileOnly(self):
     """
     If a FASTA file is given but sequences is C{None}, the resulting
     FastaReads must contain the expected read.
     """
     data = '\n'.join(['>id1', 'ACGT'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(combineReads('filename.fasta', None))
         self.assertEqual([Read('id1', 'ACGT')], reads)
Example #16
0
 def testNoUpperCaseFileOnly(self):
     """
     If upperCase is not passed and a FASTA file is given, the resulting
     FastaReads must contain the expected read, in the original case.
     """
     data = '\n'.join(['>id1', 'AcgT'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(combineReads('filename.fasta', None))
         self.assertEqual([Read('id1', 'AcgT')], reads)
Example #17
0
 def testFileOnly(self):
     """
     If a FASTA file is given but sequences is C{None}, the resulting
     FastaReads must contain the expected read.
     """
     data = '\n'.join(['>id1', 'ACGT'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(combineReads('filename.fasta', None))
         self.assertEqual([Read('id1', 'ACGT')], reads)
Example #18
0
 def testNoUpperCaseFileOnly(self):
     """
     If upperCase is not passed and a FASTA file is given, the resulting
     FastaReads must contain the expected read, in the original case.
     """
     data = '\n'.join(['>id1', 'AcgT'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(combineReads('filename.fasta', None))
         self.assertEqual([Read('id1', 'AcgT')], reads)
Example #19
0
 def testUpperCaseFileOnly(self):
     """
     When passing upperCase=True and a FASTA file, the resulting
     FastaReads must have the read sequence in uppper case.
     """
     data = '\n'.join(['>id1', 'acgt'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(combineReads('filename.fasta', None, upperCase=True))
         self.assertEqual([Read('id1', 'ACGT')], reads)
Example #20
0
 def testCustomReadIdPrefix(self):
     """
     A C{None} FASTA file name and a non-empty sequences list with a
     sequence that has no id, but with a custom read id prefix, results in a
     FastaReads instance with the expected read.
     """
     reads = list(
         combineReads(None, ['ACGTSSS'],
                      idPrefix='prefix-',
                      readClass=AARead))
     self.assertEqual([AARead('prefix-1', 'ACGTSSS')], reads)
Example #21
0
 def testSpecificReadClass(self):
     """
     A specific read class must result in a FastaReads instance with reads
     of that class, both for reads from a FASTA file and from individually
     specified sequences.
     """
     data = '\n'.join(['>id1', 'ACGT'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(
             combineReads('filename.fasta', ['ACGT'], readClass=RNARead))
         self.assertTrue(isinstance(reads[0], RNARead))
         self.assertTrue(isinstance(reads[1], RNARead))
Example #22
0
 def testSpecificReadClass(self):
     """
     A specific read class must result in a FastaReads instance with reads
     of that class, both for reads from a FASTA file and from individually
     specified sequences.
     """
     data = '\n'.join(['>id1', 'ACGT'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(combineReads('filename.fasta', ['ACGT'],
                                   readClass=RNARead))
         self.assertTrue(isinstance(reads[0], RNARead))
         self.assertTrue(isinstance(reads[1], RNARead))
                  '"--k N" to specify the desired number of clusters.' %
                  basename(sys.argv[0]),
                  file=sys.stderr)
            sys.exit(1)
    else:
        if args.k is not None:
            print(
                '%s: If you use "--algorithm affinityPropagation", you cannot '
                'also use "--k N" to specify a desired number of clusters. '
                'The --k option only applies to clustering via '
                '"--algorithm kMeans".' % basename(sys.argv[0]),
                file=sys.stderr)
            sys.exit(1)

    database = databaseSpecifier.getDatabaseFromArgs(args)
    reads = combineReads(args.fastaFile, args.sequences, readClass=AAReadWithX)
    labels = {}

    # Process labels from --label arguments.
    if args.labels:
        for labelArg in args.labels:
            fields = labelArg.split(None, 1)
            if len(fields) != 2:
                raise ValueError('Bad label %r. --label arguments must have '
                                 'a label, a space, then a read id.' %
                                 labelArg)
            try:
                labels[fields[1]] = int(fields[0])
            except ValueError:
                print('%s: Non-integer label in --label argument %r.' %
                      (basename(sys.argv[0]), labelArg),
Example #24
0
    def getDatabaseFromArgs(self, args, dbParams=None):
        """
        Read an existing database (if args.database is given) or create
        one from command-line arguments in C{args} and the database parameters
        in C{dbParams}.

        There is an order of preference in examining the arguments used to
        specify a database: pre-existing in a file (via --filePrefix),
        and then via the creation of a new database. There are currently no
        checks to make sure the user isn't trying to do conflicting things,
        such as restoring from a file and also specifying landmark finders,
        the one with the highest priority is silently acted on first.

        @param args: Command line arguments as returned by the C{argparse}
            C{parse_args} method.
        @param dbParams: A C{DatabaseParameters} instance or C{None} to use
            default parameters.
        @raise ValueError: If a database cannot be found or created.
        @return: A C{light.database.Database} instance.
        """

        database = None
        dbParams = dbParams or DatabaseParameters()
        filePrefix = args.filePrefix

        if filePrefix:
            # Check to see which save files exist so we know if a restore
            # is possible, and what kind of database & connector were
            # involved.
            exists = os.path.exists
            dbSaveFile = filePrefix + Database.SAVE_SUFFIX
            scSaveFile = filePrefix + SimpleConnector.SAVE_SUFFIX
            if six.PY3:
                wcSaveFile = filePrefix + WampServerConnector.SAVE_SUFFIX
            beSaveFile = filePrefix + Backend.SAVE_SUFFIX
            if exists(dbSaveFile):
                if exists(scSaveFile):
                    if exists(beSaveFile):
                        database = Database.restore(filePrefix)
                    else:
                        raise RuntimeError(
                            'A database save file (%s) and simple connector '
                            'save file (%s) are both present, but no backend '
                            'save file (%s) exists!' %
                            (dbSaveFile, scSaveFile, beSaveFile))
                elif six.PY3 and exists(wcSaveFile):
                    # We do not have to check for backend save files in the
                    # case of a WAMP database, as these may be on another
                    # machine.
                    database = Database.restore(filePrefix)
                else:
                    if six.PY3:
                        raise RuntimeError(
                            'A database save file (%s) is present, but no '
                            'simple connector save file (%s) or WAMP '
                            'connector save file (%s) exists!' %
                            (dbSaveFile, scSaveFile, wcSaveFile))
                    else:
                        raise RuntimeError(
                            'A database save file (%s) is present, but no '
                            'simple connector save file (%s) exists!' %
                            (dbSaveFile, scSaveFile))

        if database is None and self._allowWamp:
            if args.wampServer:
                connector = WampServerConnector(dbParams,
                                                filePrefix=filePrefix)
                database = Database(dbParams,
                                    connector=connector,
                                    filePrefix=filePrefix)
            elif args.wampClient:
                database = getWampClientDatabase(args)

        if database is None and self._allowCreation:
            # A new in-memory database, with a simple connector and a local
            # backend.
            database = Database(dbParams, filePrefix=filePrefix)

        if database is None and self._allowWamp:
            # Last try: guess that they might be wanting to talk to a WAMP
            # database, even though --wampClient isn't specified.
            database = getWampClientDatabase(args)

        if database is None:
            raise RuntimeError(
                'Not enough information given to specify a database, %sand no '
                'remote WAMP database could be found.' %
                ('' if self._allowCreation else
                 'database creation is not enabled, '))

        if self._allowPopulation:
            for read in combineReads(args.databaseFasta,
                                     args.databaseSequences,
                                     readClass=AAReadWithX,
                                     upperCase=True):
                database.addSubject(read)

        return database