def random_population(genome_alphabet, genome_size, num_organisms, fitness_calculator): """Generate a population of individuals with randomly set genomes. Arguments: o genome_alphabet -- An Alphabet object describing all of the possible letters that could potentially be in the genome of an organism. o genome_size -- The size of each organisms genome. o num_organism -- The number of organisms we want in the population. o fitness_calculator -- A function that will calculate the fitness of the organism when given the organisms genome. """ all_orgs = [] # a random number generator to get letters for the genome letter_rand = random.Random() # figure out what type of characters are in the alphabet if isinstance(genome_alphabet.letters[0], str): if sys.version_info[0] == 3: alphabet_type = "u" # Use unicode string on Python 3 else: alphabet_type = "c" # Use byte string on Python 2 elif isinstance(genome_alphabet.letters[0], int): alphabet_type = "i" elif isinstance(genome_alphabet.letters[0], float): alphabet_type = "d" else: raise ValueError( "Alphabet type is unsupported: %s" % genome_alphabet.letters) for org_num in range(num_organisms): new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet) # generate the genome randomly for gene_num in range(genome_size): new_gene = letter_rand.choice(genome_alphabet.letters) new_genome.append(new_gene) # add the new organism with this genome all_orgs.append(Organism(new_genome, fitness_calculator)) return all_orgs
def random_population(genome_alphabet, genome_size, num_organisms, fitness_calculator): """Generate a population of individuals with randomly set genomes. Arguments: o genome_alphabet -- An Alphabet object describing all of the possible letters that could potentially be in the genome of an organism. o genome_size -- The size of each organisms genome. o num_organism -- The number of organisms we want in the population. o fitness_calculator -- A function that will calculate the fitness of the organism when given the organisms genome. """ all_orgs = [] # a random number generator to get letters for the genome letter_rand = random.Random() # figure out what type of characters are in the alphabet if isinstance(genome_alphabet.letters[0], str): if sys.version_info[0] == 3: alphabet_type = "u" # Use unicode string on Python 3 else: alphabet_type = "c" # Use byte string on Python 2 elif isinstance(genome_alphabet.letters[0], int): alphabet_type = "i" elif isinstance(genome_alphabet.letters[0], float): alphabet_type = "d" else: raise ValueError("Alphabet type is unsupported: %s" % genome_alphabet.letters) for org_num in range(num_organisms): new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet) # generate the genome randomly for gene_num in range(genome_size): new_gene = letter_rand.choice(genome_alphabet.letters) new_genome.append(new_gene) # add the new organism with this genome all_orgs.append(Organism(new_genome, fitness_calculator)) return all_orgs
def random_motif(self): """Create a random motif within the given parameters. This returns a single motif string with letters from the given alphabet. The size of the motif will be randomly chosen between max_size and min_size. """ motif_size = random.randrange(self._min_size, self._max_size) motif = "" for letter_num in range(motif_size): cur_letter = random.choice(self._alphabet.letters) motif += cur_letter return MutableSeq(motif, self._alphabet)
def viterbi(self, sequence, state_alphabet): """Calculate the most probable state path using the Viterbi algorithm. This implements the Viterbi algorithm (see pgs 55-57 in Durbin et al for a full explanation -- this is where I took my implementation ideas from), to allow decoding of the state path, given a sequence of emissions. Arguments: o sequence -- A Seq object with the emission sequence that we want to decode. o state_alphabet -- The alphabet of the possible state sequences that can be generated. """ # calculate logarithms of the initial, transition, and emission probs log_initial = self._log_transform(self.initial_prob) log_trans = self._log_transform(self.transition_prob) log_emission = self._log_transform(self.emission_prob) viterbi_probs = {} pred_state_seq = {} state_letters = state_alphabet.letters # --- recursion # loop over the training squence (i = 1 .. L) # NOTE: My index numbers are one less than what is given in Durbin # et al, since we are indexing the sequence going from 0 to # (Length - 1) not 1 to Length, like in Durbin et al. for i in range(0, len(sequence)): # loop over all of the possible i-th states in the state path for cur_state in state_letters: # e_{l}(x_{i}) emission_part = log_emission[(cur_state, sequence[i])] max_prob = 0 if i == 0: # for the first state, use the initial probability rather # than looking back to previous states max_prob = log_initial[cur_state] else: # loop over all possible (i-1)-th previous states possible_state_probs = {} for prev_state in self.transitions_to(cur_state): # a_{kl} trans_part = log_trans[(prev_state, cur_state)] # v_{k}(i - 1) viterbi_part = viterbi_probs[(prev_state, i - 1)] cur_prob = viterbi_part + trans_part possible_state_probs[prev_state] = cur_prob # calculate the viterbi probability using the max max_prob = max(possible_state_probs.values()) # v_{k}(i) viterbi_probs[(cur_state, i)] = (emission_part + max_prob) if i > 0: # get the most likely prev_state leading to cur_state for state in possible_state_probs: if possible_state_probs[state] == max_prob: pred_state_seq[(i - 1, cur_state)] = state break # --- termination # calculate the probability of the state path # loop over all states all_probs = {} for state in state_letters: # v_{k}(L) all_probs[state] = viterbi_probs[(state, len(sequence) - 1)] state_path_prob = max(all_probs.values()) # find the last pointer we need to trace back from last_state = '' for state in all_probs: if all_probs[state] == state_path_prob: last_state = state assert last_state != '', "Didn't find the last state to trace from!" # --- traceback traceback_seq = MutableSeq('', state_alphabet) loop_seq = list(range(1, len(sequence))) loop_seq.reverse() # last_state is the last state in the most probable state sequence. # Compute that sequence by walking backwards in time. From the i-th # state in the sequence, find the (i-1)-th state as the most # probable state preceding the i-th state. state = last_state traceback_seq.append(state) for i in loop_seq: state = pred_state_seq[(i - 1, state)] traceback_seq.append(state) # put the traceback sequence in the proper orientation traceback_seq.reverse() return traceback_seq.toseq(), state_path_prob