def _readsForSpec(self, spec): """ Yield reads for a given specification. @param sequenceSpec: A C{dict} with information about the sequences to be produced. """ alphabet = None previousRead = None nSequences = spec.get('count', 1) for count in range(nSequences): id_ = None if 'sections' in spec: sequence = '' for section in spec['sections']: read = self._specToRead( section, previousRead) sequence += read.sequence if alphabet is None: alphabet = read.alphabet else: read = self._specToRead(spec, previousRead) sequence = read.sequence id_ = read.id alphabet = read.alphabet if id_ is None: try: id_ = spec['id'] except KeyError: prefix = spec.get('id prefix', self._defaultIdPrefix) prefixCount = self._idPrefixCount.setdefault(prefix, 0) + 1 self._idPrefixCount[prefix] += 1 id_ = '%s%d' % (prefix, prefixCount) try: id_ = id_ + ' ' + spec['description'] except KeyError: pass read = Read(id_, sequence) read.alphabet = alphabet if id_ in self._sequenceSpecs: raise ValueError( "Sequence id '%s' has already been used." % id_) else: self._sequences[id_] = read if not spec.get('skip'): yield read previousRead = read
def _specToRead(self, spec, previousRead=None): """ Get a sequence from a specification. @param spec: A C{dict} with keys/values specifying a sequence. @param previousRead: If not C{None}, a {dark.Read} instance containing the last read this method returned. This is only used when 'ratchet' is given for a specification, in which case we generate a mutant based on the previous read. @raise ValueError: If the section spec refers to a non-existent other sequence, or to part of another sequence but the requested part exceeds the bounds of the other sequence. Or if the C{spec} does not have a 'length' key when no other sequence is being referred to. @return: A C{dark.Read} instance. """ alphabet = self.NT length = spec.get('length', self._defaultLength) if spec.get('ratchet') and previousRead: read = Read(None, previousRead.sequence) alphabet = previousRead.alphabet elif 'from id' in spec: fromId = spec['from id'] try: fromRead = self._sequences[fromId] except KeyError: raise ValueError("Sequence section refers to the id '%s' of " "non-existent other sequence." % fromId) else: # The start offset in the spec is 1-based. Convert to 0-based. index = int(spec.get('start', 1)) - 1 # Use the given length (if any) else the length of the # named read. length = spec.get('length', len(fromRead)) sequence = fromRead.sequence[index:index + length] alphabet = fromRead.alphabet if len(sequence) != length: raise ValueError( "Sequence specification refers to sequence id '%s', " "starting at index %d with length %d, but sequence " "'%s' is not long enough to support that." % (fromId, index + 1, length, fromId)) read = Read(None, sequence) elif 'sequence' in spec: read = Read(None, spec['sequence']) elif 'sequence file' in spec: noFileClass = builtins.FileNotFoundError if PY3 else IOError reads = iter(FastaReads(spec['sequence file'])) try: read = next(reads) except StopIteration: raise ValueError("Sequence file '%s' is empty." % spec['sequence file']) except noFileClass: raise ValueError("Sequence file '%s' could not be read." % spec['sequence file']) elif spec.get('alphabet'): alphabet = spec['alphabet'] read = Read(None, ''.join(choice(alphabet) for _ in range(length))) elif spec.get('random aa'): alphabet = self.AA read = Read(None, ''.join(choice(alphabet) for _ in range(length))) else: read = Read(None, ''.join(choice(alphabet) for _ in range(length))) try: rate = spec['mutation rate'] except KeyError: pass else: read.sequence = self._mutate(read.sequence, rate, alphabet) read.alphabet = alphabet return read