def mutant_locations(self, loc, count=1, max=False): ''' this function takes from the dict of all potential _mutation_locations a set of mutations that fall within a loc tuple. It returns an iterator that spits out potential mutants at these locations; it is randomized by position first, then by mutation. count is the number of mutations to return, setting to one returns all possible sequences off by one, setting to two returns all sequences with two mutations made, etc, etc. ''' #first, make sure my self._mutant_locations dict is instantiatied if not hasattr(self, '_mutant_locations'): self._mutant_locations = mutate.mutant_locations(self) #deal with interval() versus tuple inputs if isinstance(loc, interval): if len(loc) == 0: return iter([]) loc_ivl = loc loc = interval.hull([loc_ivl]).to_tuple() else: loc_ivl = interval(loc) #create an iterator that returns all keys for _mutant_locations that are #in this location range mut_ivls = (interval(ml) for ml in self._mutant_locations.keys()) #now loc iter will output a non-random set of mutation locations which are #keys to the _mutation_locations dict loc_iter = ifilter(itemgetter(1), ((ivl, ivl.overlaps(loc_ivl)) for \ ivl in mut_ivls)) #change interval obj into loc tuple loc_tup = lambda loc: loc[0].to_tuple() #get the mutation set (the values) for a loc tuple loc_muts = lambda loc: self._mutant_locations[loc_tup(loc)] #expand the mutation set into individual mutations for a loc tuple loc_mset = lambda loc: ((loc_tup(loc), i) for i in loc_muts(loc)) #put them all together for a randomized list of generators, one generator #for each loc tuple pos_mut_sets = map(lambda loc: (loc_mset(loc)), loc_iter) emit_sets = combinations(util.irandomize( chain.from_iterable(pos_mut_sets), seed=random_seed), count) emit_sets = imap(frozenset, emit_sets) is_unique_pos = \ lambda mset: ( len([m[0] for m in mset]) == len(set([m[0] for m in mset])) and set(mset) not in self.mut_sets) mut_iter = util.irandomize( ifilter(is_unique_pos, util.irandomize(emit_sets, seed=random_seed)), seed=random_seed) # if this feature overlaps exons #expand the motif to codons, so that we can check that mutants are # synonymous if interval(self.exon_list[0].extract_pos()).overlaps(loc_ivl): codon_loc = \ (interval(mutate.expand_motif_to_codons(self, loc)) \ & interval(self.exon_list[0].extract_pos())).to_tuple() #check all mutations for synonymousness seq_str = str(self.seq)[slice(*codon_loc)] is_synon = lambda seq_str, codon_loc: lambda mut_tups: \ mutate.check_translation(\ string.upper(mutate.tups_to_str(seq_str, codon_loc, mut_tups)), seq_str) is_synon = is_synon(seq_str, codon_loc) return util.irandomize(ifilter(lambda mut: is_synon(mut), mut_iter), seed=random_seed) else: return mut_iter
def mutate_all_positions(self, loc): ''' mutate every codon and/or nucleotide within feature bounds ''' #first, make sure my self._mutant_locations dict is instantiated if not hasattr(self, '_mutant_locations'): self._mutant_locations = mutate.mutant_locations(self) #deal with interval() versus tuple inputs if isinstance(loc, interval): loc_ivl = loc loc = interval.hull([loc_ivl]).to_tuple() else: loc_ivl = interval(loc) ivl_len = loc_ivl.sum_len() (e_coords, i_coords) = mutate.get_motif_boundaries(loc, self) #mutant choices will be a list of random.choice lambda functions #that randomly chooses a different codon for every position or a different #nucleotide for every intronic base mutant_choices = set() for codon_loc in e_coords: #go through every codon in codon_loc for c_loc in range(codon_loc[0], codon_loc[1], 3): codon = str(self.seq[c_loc:(c_loc + 3)]).upper() #get other codons bckt = mutate.codon_back_table() fwdt = mutate.codon_fwd_table() other_codons = bckt[fwdt[codon]] other_codons = other_codons.difference((codon,)) if len(other_codons) == 0: continue #convert these codons into mut tuples (cmut_tuples) # (one codon might be two or even three tuples) cmut_tuples = () for other_cod in other_codons: cod_tup = () for diff in util.str_diff(other_cod, codon): diff_loc = (c_loc + diff, c_loc + diff + 1) cod_tup += ((diff_loc, other_cod[diff]),) cmut_tuples += (cod_tup,) #finally store a lambda function that randomly chooses a #different codon for this position, using a unique-state #random generator rgen = random.Random() rgen.seed(random_seed ^ hash(cmut_tuples) ^ hash(loc)) codon_choice = lambda cmt, rgen: lambda: rgen.choice(cmt) mutant_choices.add(codon_choice(cmut_tuples, rgen)) for intron_loc in i_coords: intron_ivl = interval(intron_loc) mut_ivls = (interval(ml) for ml in self._mutant_locations.keys()) loc_list = filter(itemgetter(1), \ [(ivl, ivl.overlaps(intron_ivl)) for ivl in mut_ivls]) #change interval obj into loc tuple loc_tup = lambda loc: loc[0].to_tuple() #get the mutation set (the values) for a loc tuple loc_muts = lambda loc: self._mutant_locations[loc_tup(loc)] #expand the mutation set into individual mutations for a loc tuple loc_mset = lambda loc, rgen: \ lambda: rgen.choice([((loc_tup(loc), i),) for i in loc_muts(loc)]) #generate independently seeded random number gens for each pos rgens = [random.Random() for i in loc_list] [rg.seed((random_seed, loc)) for rg, loc in zip(rgens, loc_list)] pos_rnd_muts = map(lambda loc, rgen: loc_mset(loc, rgen), loc_list, rgens) mutant_choices.update(pos_rnd_muts) #now that we have a mutant choices list with one function for every # codon/nt, we need to create a generator that calls each function in the # list once only while True: yielded = set() next_mut = frozenset( chain.from_iterable(map(lambda f: f(), mutant_choices))) seen_count = 0 if next_mut not in yielded: yielded.add(next_mut) yield next_mut elif next_mut in yielded and seen_count < 20: seen_count += 1 elif next_mut in yielded and seen_count >= 20: raise StopIteration