Esempi in Python per MutableSeq, esempi in Python per SAP.Bio.Seq.MutableSeq

Esempio n. 1

0

Mostra file

File: Organism.py Progetto: kaspermunch/sap

def random_population(genome_alphabet, genome_size, num_organisms,
                      fitness_calculator):
    """Generate a population of individuals with randomly set genomes.

    Arguments:

    o genome_alphabet -- An Alphabet object describing all of the
    possible letters that could potentially be in the genome of an
    organism.

    o genome_size -- The size of each organisms genome.

    o num_organism -- The number of organisms we want in the population.

    o fitness_calculator -- A function that will calculate the fitness
    of the organism when given the organisms genome.
    """
    all_orgs = []

    # a random number generator to get letters for the genome
    letter_rand = random.Random()

    # figure out what type of characters are in the alphabet
    if isinstance(genome_alphabet.letters[0], str):
        if sys.version_info[0] == 3:
            alphabet_type = "u"  # Use unicode string on Python 3
        else:
            alphabet_type = "c"  # Use byte string on Python 2
    elif isinstance(genome_alphabet.letters[0], int):
        alphabet_type = "i"
    elif isinstance(genome_alphabet.letters[0], float):
        alphabet_type = "d"
    else:
        raise ValueError(
            "Alphabet type is unsupported: %s" % genome_alphabet.letters)

    for org_num in range(num_organisms):
        new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet)

        # generate the genome randomly
        for gene_num in range(genome_size):
            new_gene = letter_rand.choice(genome_alphabet.letters)
            new_genome.append(new_gene)

        # add the new organism with this genome
        all_orgs.append(Organism(new_genome, fitness_calculator))

    return all_orgs

Esempio n. 2

0

Mostra file

File: Organism.py Progetto: cbirdlab/sap

def random_population(genome_alphabet, genome_size, num_organisms,
                      fitness_calculator):
    """Generate a population of individuals with randomly set genomes.

    Arguments:

    o genome_alphabet -- An Alphabet object describing all of the
    possible letters that could potentially be in the genome of an
    organism.

    o genome_size -- The size of each organisms genome.

    o num_organism -- The number of organisms we want in the population.

    o fitness_calculator -- A function that will calculate the fitness
    of the organism when given the organisms genome.
    """
    all_orgs = []

    # a random number generator to get letters for the genome
    letter_rand = random.Random()

    # figure out what type of characters are in the alphabet
    if isinstance(genome_alphabet.letters[0], str):
        if sys.version_info[0] == 3:
            alphabet_type = "u"  # Use unicode string on Python 3
        else:
            alphabet_type = "c"  # Use byte string on Python 2
    elif isinstance(genome_alphabet.letters[0], int):
        alphabet_type = "i"
    elif isinstance(genome_alphabet.letters[0], float):
        alphabet_type = "d"
    else:
        raise ValueError("Alphabet type is unsupported: %s" %
                         genome_alphabet.letters)

    for org_num in range(num_organisms):
        new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet)

        # generate the genome randomly
        for gene_num in range(genome_size):
            new_gene = letter_rand.choice(genome_alphabet.letters)
            new_genome.append(new_gene)

        # add the new organism with this genome
        all_orgs.append(Organism(new_genome, fitness_calculator))

    return all_orgs

Esempio n. 3

0

Mostra file

File: Schema.py Progetto: cbirdlab/sap

    def random_motif(self):
        """Create a random motif within the given parameters.

        This returns a single motif string with letters from the given
        alphabet. The size of the motif will be randomly chosen between
        max_size and min_size.
        """
        motif_size = random.randrange(self._min_size, self._max_size)

        motif = ""
        for letter_num in range(motif_size):
            cur_letter = random.choice(self._alphabet.letters)
            motif += cur_letter

        return MutableSeq(motif, self._alphabet)

Esempio n. 4

0

Mostra file

File: MarkovModel.py Progetto: cbirdlab/sap

    def viterbi(self, sequence, state_alphabet):
        """Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        """

        # calculate logarithms of the initial, transition, and emission probs
        log_initial = self._log_transform(self.initial_prob)
        log_trans = self._log_transform(self.transition_prob)
        log_emission = self._log_transform(self.emission_prob)

        viterbi_probs = {}
        pred_state_seq = {}
        state_letters = state_alphabet.letters

        # --- recursion
        # loop over the training squence (i = 1 .. L)
        # NOTE: My index numbers are one less than what is given in Durbin
        # et al, since we are indexing the sequence going from 0 to
        # (Length - 1) not 1 to Length, like in Durbin et al.
        for i in range(0, len(sequence)):
            # loop over all of the possible i-th states in the state path
            for cur_state in state_letters:
                # e_{l}(x_{i})
                emission_part = log_emission[(cur_state, sequence[i])]

                max_prob = 0
                if i == 0:
                    # for the first state, use the initial probability rather
                    # than looking back to previous states
                    max_prob = log_initial[cur_state]
                else:
                    # loop over all possible (i-1)-th previous states
                    possible_state_probs = {}
                    for prev_state in self.transitions_to(cur_state):
                        # a_{kl}
                        trans_part = log_trans[(prev_state, cur_state)]

                        # v_{k}(i - 1)
                        viterbi_part = viterbi_probs[(prev_state, i - 1)]
                        cur_prob = viterbi_part + trans_part

                        possible_state_probs[prev_state] = cur_prob

                    # calculate the viterbi probability using the max
                    max_prob = max(possible_state_probs.values())

                # v_{k}(i)
                viterbi_probs[(cur_state, i)] = (emission_part + max_prob)

                if i > 0:
                    # get the most likely prev_state leading to cur_state
                    for state in possible_state_probs:
                        if possible_state_probs[state] == max_prob:
                            pred_state_seq[(i - 1, cur_state)] = state
                            break

        # --- termination
        # calculate the probability of the state path
        # loop over all states
        all_probs = {}
        for state in state_letters:
            # v_{k}(L)
            all_probs[state] = viterbi_probs[(state, len(sequence) - 1)]

        state_path_prob = max(all_probs.values())

        # find the last pointer we need to trace back from
        last_state = ''
        for state in all_probs:
            if all_probs[state] == state_path_prob:
                last_state = state

        assert last_state != '', "Didn't find the last state to trace from!"

        # --- traceback
        traceback_seq = MutableSeq('', state_alphabet)

        loop_seq = list(range(1, len(sequence)))
        loop_seq.reverse()

        # last_state is the last state in the most probable state sequence.
        # Compute that sequence by walking backwards in time. From the i-th
        # state in the sequence, find the (i-1)-th state as the most
        # probable state preceding the i-th state.
        state = last_state
        traceback_seq.append(state)
        for i in loop_seq:
            state = pred_state_seq[(i - 1, state)]
            traceback_seq.append(state)

        # put the traceback sequence in the proper orientation
        traceback_seq.reverse()

        return traceback_seq.toseq(), state_path_prob