def testNoUpperCaseSequencesOnly(self): """ If upperCase is not passed to combineReads the resulting read sequences must have their original case. """ reads = list(combineReads(None, ['id aCGt'])) self.assertEqual([Read('id', 'aCGt')], reads)
def testUpperCaseSequencesOnly(self): """ Passing upperCase=True to combineReads must result in read sequences being upper cased. """ reads = list(combineReads(None, ['id acgt'], upperCase=True)) self.assertEqual([Read('id', 'ACGT')], reads)
def testNoneNone(self): """ A C{None} FASTA file name and None sequences results in an empty FastaReads instance. """ reads = combineReads(None, None) self.assertEqual([], list(reads))
def testNoneEmpty(self): """ A C{None} FASTA file name and an empty sequences list results in an empty FastaReads instance. """ reads = list(combineReads(None, [])) self.assertEqual([], reads)
def testSequencesOnly(self): """ A C{None} FASTA file name and a non-empty sequences list results in a FastaReads instance with the expected read. """ reads = list(combineReads(None, ['id ACGTSSS'], readClass=AARead)) self.assertEqual([AARead('id', 'ACGTSSS')], reads)
def testDefaultReadIdPrefix(self): """ A C{None} FASTA file name and a non-empty sequences list with a sequence that has no id results in a FastaReads instance with the expected read. """ reads = list(combineReads(None, ['ACGT'])) self.assertEqual([Read('command-line-read-1', 'ACGT')], reads)
def testUpperCaseFileOnly(self): """ When passing upperCase=True and a FASTA file, the resulting FastaReads must have the read sequence in uppper case. """ data = '\n'.join(['>id1', 'acgt']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list(combineReads('filename.fasta', None, upperCase=True)) self.assertEqual([Read('id1', 'ACGT')], reads)
def testCustomReadIdPrefix(self): """ A C{None} FASTA file name and a non-empty sequences list with a sequence that has no id, but with a custom read id prefix, results in a FastaReads instance with the expected read. """ reads = list(combineReads(None, ['ACGTSSS'], idPrefix='prefix-', readClass=AARead)) self.assertEqual([AARead('prefix-1', 'ACGTSSS')], reads)
def testFileOnly(self): """ If a FASTA file is given but sequences is C{None}, the resulting FastaReads must contain the expected read. """ data = '\n'.join(['>id1', 'ACGT']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list(combineReads('filename.fasta', None)) self.assertEqual([Read('id1', 'ACGT')], reads)
def testNoUpperCaseFileOnly(self): """ If upperCase is not passed and a FASTA file is given, the resulting FastaReads must contain the expected read, in the original case. """ data = '\n'.join(['>id1', 'AcgT']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list(combineReads('filename.fasta', None)) self.assertEqual([Read('id1', 'AcgT')], reads)
def testFileOnly(self): """ If a FASTA file is given but sequences is C{None}, the resulting FastaReads must contain the expected read. """ data = '\n'.join(['>id1', 'ACGT']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(combineReads('filename.fasta', None)) self.assertEqual([Read('id1', 'ACGT')], reads)
def testNoUpperCaseFileOnly(self): """ If upperCase is not passed and a FASTA file is given, the resulting FastaReads must contain the expected read, in the original case. """ data = '\n'.join(['>id1', 'AcgT']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(combineReads('filename.fasta', None)) self.assertEqual([Read('id1', 'AcgT')], reads)
def testUpperCaseFileOnly(self): """ When passing upperCase=True and a FASTA file, the resulting FastaReads must have the read sequence in uppper case. """ data = '\n'.join(['>id1', 'acgt']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(combineReads('filename.fasta', None, upperCase=True)) self.assertEqual([Read('id1', 'ACGT')], reads)
def testCustomReadIdPrefix(self): """ A C{None} FASTA file name and a non-empty sequences list with a sequence that has no id, but with a custom read id prefix, results in a FastaReads instance with the expected read. """ reads = list( combineReads(None, ['ACGTSSS'], idPrefix='prefix-', readClass=AARead)) self.assertEqual([AARead('prefix-1', 'ACGTSSS')], reads)
def testSpecificReadClass(self): """ A specific read class must result in a FastaReads instance with reads of that class, both for reads from a FASTA file and from individually specified sequences. """ data = '\n'.join(['>id1', 'ACGT']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list( combineReads('filename.fasta', ['ACGT'], readClass=RNARead)) self.assertTrue(isinstance(reads[0], RNARead)) self.assertTrue(isinstance(reads[1], RNARead))
def testSpecificReadClass(self): """ A specific read class must result in a FastaReads instance with reads of that class, both for reads from a FASTA file and from individually specified sequences. """ data = '\n'.join(['>id1', 'ACGT']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(combineReads('filename.fasta', ['ACGT'], readClass=RNARead)) self.assertTrue(isinstance(reads[0], RNARead)) self.assertTrue(isinstance(reads[1], RNARead))
'"--k N" to specify the desired number of clusters.' % basename(sys.argv[0]), file=sys.stderr) sys.exit(1) else: if args.k is not None: print( '%s: If you use "--algorithm affinityPropagation", you cannot ' 'also use "--k N" to specify a desired number of clusters. ' 'The --k option only applies to clustering via ' '"--algorithm kMeans".' % basename(sys.argv[0]), file=sys.stderr) sys.exit(1) database = databaseSpecifier.getDatabaseFromArgs(args) reads = combineReads(args.fastaFile, args.sequences, readClass=AAReadWithX) labels = {} # Process labels from --label arguments. if args.labels: for labelArg in args.labels: fields = labelArg.split(None, 1) if len(fields) != 2: raise ValueError('Bad label %r. --label arguments must have ' 'a label, a space, then a read id.' % labelArg) try: labels[fields[1]] = int(fields[0]) except ValueError: print('%s: Non-integer label in --label argument %r.' % (basename(sys.argv[0]), labelArg),
def getDatabaseFromArgs(self, args, dbParams=None): """ Read an existing database (if args.database is given) or create one from command-line arguments in C{args} and the database parameters in C{dbParams}. There is an order of preference in examining the arguments used to specify a database: pre-existing in a file (via --filePrefix), and then via the creation of a new database. There are currently no checks to make sure the user isn't trying to do conflicting things, such as restoring from a file and also specifying landmark finders, the one with the highest priority is silently acted on first. @param args: Command line arguments as returned by the C{argparse} C{parse_args} method. @param dbParams: A C{DatabaseParameters} instance or C{None} to use default parameters. @raise ValueError: If a database cannot be found or created. @return: A C{light.database.Database} instance. """ database = None dbParams = dbParams or DatabaseParameters() filePrefix = args.filePrefix if filePrefix: # Check to see which save files exist so we know if a restore # is possible, and what kind of database & connector were # involved. exists = os.path.exists dbSaveFile = filePrefix + Database.SAVE_SUFFIX scSaveFile = filePrefix + SimpleConnector.SAVE_SUFFIX if six.PY3: wcSaveFile = filePrefix + WampServerConnector.SAVE_SUFFIX beSaveFile = filePrefix + Backend.SAVE_SUFFIX if exists(dbSaveFile): if exists(scSaveFile): if exists(beSaveFile): database = Database.restore(filePrefix) else: raise RuntimeError( 'A database save file (%s) and simple connector ' 'save file (%s) are both present, but no backend ' 'save file (%s) exists!' % (dbSaveFile, scSaveFile, beSaveFile)) elif six.PY3 and exists(wcSaveFile): # We do not have to check for backend save files in the # case of a WAMP database, as these may be on another # machine. database = Database.restore(filePrefix) else: if six.PY3: raise RuntimeError( 'A database save file (%s) is present, but no ' 'simple connector save file (%s) or WAMP ' 'connector save file (%s) exists!' % (dbSaveFile, scSaveFile, wcSaveFile)) else: raise RuntimeError( 'A database save file (%s) is present, but no ' 'simple connector save file (%s) exists!' % (dbSaveFile, scSaveFile)) if database is None and self._allowWamp: if args.wampServer: connector = WampServerConnector(dbParams, filePrefix=filePrefix) database = Database(dbParams, connector=connector, filePrefix=filePrefix) elif args.wampClient: database = getWampClientDatabase(args) if database is None and self._allowCreation: # A new in-memory database, with a simple connector and a local # backend. database = Database(dbParams, filePrefix=filePrefix) if database is None and self._allowWamp: # Last try: guess that they might be wanting to talk to a WAMP # database, even though --wampClient isn't specified. database = getWampClientDatabase(args) if database is None: raise RuntimeError( 'Not enough information given to specify a database, %sand no ' 'remote WAMP database could be found.' % ('' if self._allowCreation else 'database creation is not enabled, ')) if self._allowPopulation: for read in combineReads(args.databaseFasta, args.databaseSequences, readClass=AAReadWithX, upperCase=True): database.addSubject(read) return database