Esempio n. 1
0
    def _readsForSpec(self, spec):
        """
        Yield reads for a given specification.

        @param sequenceSpec: A C{dict} with information about the sequences
            to be produced.
        """
        alphabet = None
        previousRead = None
        nSequences = spec.get('count', 1)

        for count in range(nSequences):
            id_ = None
            if 'sections' in spec:
                sequence = ''
                for section in spec['sections']:
                    read = self._specToRead(
                        section, previousRead)
                    sequence += read.sequence
                    if alphabet is None:
                        alphabet = read.alphabet
            else:
                read = self._specToRead(spec, previousRead)
                sequence = read.sequence
                id_ = read.id
                alphabet = read.alphabet

            if id_ is None:
                try:
                    id_ = spec['id']
                except KeyError:
                    prefix = spec.get('id prefix', self._defaultIdPrefix)
                    prefixCount = self._idPrefixCount.setdefault(prefix, 0) + 1
                    self._idPrefixCount[prefix] += 1
                    id_ = '%s%d' % (prefix, prefixCount)

            try:
                id_ = id_ + ' ' + spec['description']
            except KeyError:
                pass

            read = Read(id_, sequence)
            read.alphabet = alphabet

            if id_ in self._sequenceSpecs:
                raise ValueError(
                    "Sequence id '%s' has already been used." % id_)
            else:
                self._sequences[id_] = read

            if not spec.get('skip'):
                yield read
                previousRead = read
Esempio n. 2
0
    def _specToRead(self, spec, previousRead=None):
        """
        Get a sequence from a specification.

        @param spec: A C{dict} with keys/values specifying a sequence.
        @param previousRead: If not C{None}, a {dark.Read} instance containing
            the last read this method returned. This is only used when
            'ratchet' is given for a specification, in which case we generate
            a mutant based on the previous read.
        @raise ValueError: If the section spec refers to a non-existent other
            sequence, or to part of another sequence but the requested part
            exceeds the bounds of the other sequence. Or if the C{spec} does
            not have a 'length' key when no other sequence is being referred
            to.
        @return: A C{dark.Read} instance.
        """
        alphabet = self.NT
        length = spec.get('length', self._defaultLength)

        if spec.get('ratchet') and previousRead:
            read = Read(None, previousRead.sequence)
            alphabet = previousRead.alphabet

        elif 'from id' in spec:
            fromId = spec['from id']
            try:
                fromRead = self._sequences[fromId]
            except KeyError:
                raise ValueError("Sequence section refers to the id '%s' of "
                                 "non-existent other sequence." % fromId)
            else:
                # The start offset in the spec is 1-based. Convert to 0-based.
                index = int(spec.get('start', 1)) - 1
                # Use the given length (if any) else the length of the
                # named read.
                length = spec.get('length', len(fromRead))
                sequence = fromRead.sequence[index:index + length]
                alphabet = fromRead.alphabet

                if len(sequence) != length:
                    raise ValueError(
                        "Sequence specification refers to sequence id '%s', "
                        "starting at index %d with length %d, but sequence "
                        "'%s' is not long enough to support that." %
                        (fromId, index + 1, length, fromId))

                read = Read(None, sequence)

        elif 'sequence' in spec:
            read = Read(None, spec['sequence'])

        elif 'sequence file' in spec:
            noFileClass = builtins.FileNotFoundError if PY3 else IOError
            reads = iter(FastaReads(spec['sequence file']))
            try:
                read = next(reads)
            except StopIteration:
                raise ValueError("Sequence file '%s' is empty." %
                                 spec['sequence file'])
            except noFileClass:
                raise ValueError("Sequence file '%s' could not be read." %
                                 spec['sequence file'])

        elif spec.get('alphabet'):
            alphabet = spec['alphabet']
            read = Read(None, ''.join(choice(alphabet) for _ in range(length)))

        elif spec.get('random aa'):
            alphabet = self.AA
            read = Read(None, ''.join(choice(alphabet) for _ in range(length)))

        else:
            read = Read(None, ''.join(choice(alphabet) for _ in range(length)))

        try:
            rate = spec['mutation rate']
        except KeyError:
            pass
        else:
            read.sequence = self._mutate(read.sequence, rate, alphabet)

        read.alphabet = alphabet

        return read