def build_initial_best_motifs(): length = len(fst(dna_strings)) def random_mapper(dna_string): i = randrange(length - k + 1) return dna_string[i: i + k] return list(map(random_mapper, dna_strings))
def detect(filename): langs, kws = cfg.read_kws(relative_path(KEYWORDS_DIR, __file__)) inputs, _ = inputs_dict(filename, kws, langs) model_coeffs = cfg.read_config(relative_path(MODEL_CONFIG, __file__)) responses_dict = compute_neurons(inputs, model_coeffs) print(responses_dict) min_response = max(responses_dict.iteritems(), key=snd) return fst(min_response).capitalize()
def matcher(token_tuple): token_type = fst(token_tuple) token_value = snd(token_tuple) should_be_discarded = not token_value or \ token_type == NUMBER or \ token_type == STRING or \ token_type == N_TOKENS or \ any(token_value.startswith(quot) and token_value.endswith(quot) for quot in QUOTES) return should_be_discarded
def clean_tokens(readline): def matcher(token_tuple): token_type = fst(token_tuple) token_value = snd(token_tuple) should_be_discarded = not token_value or \ token_type == NUMBER or \ token_type == STRING or \ token_type == N_TOKENS or \ any(token_value.startswith(quot) and token_value.endswith(quot) for quot in QUOTES) return should_be_discarded return map(lambda t: (fst(t), snd(t)), ifilterfalse(matcher, generate_tokens(readline)))
def greedy_motif_search(dna_strings, k, profile_builder=build_profile_columns, initial_motifs_builder=first_kmer_builder): best_motifs = initial_motifs_builder(dna_strings, k) for kmer in all_possible_kmers_from_text(fst(dna_strings), k): current_score = matrix_score(best_motifs) motifs = [kmer] for dna_string in tail(dna_strings): profile = profile_builder(motifs) additional_motif = most_probable_kmer(dna_string, k, profile) motifs.append(additional_motif) if current_score > matrix_score(motifs): best_motifs = motifs return best_motifs
def column_consensus(column): return fst(top_by_snd_and_ignore(((nucleotide, column.count(nucleotide)) for nucleotide in column), descending=True))
def mapper(read): return '({0}|{1})'.format(fst(read), snd(read))
def most_probable_kmer(dna_string, k, probabilities): all_kmers = all_possible_kmers_from_text(dna_string, k) return fst(top_by_snd_and_ignore(list((kmer, probability_score(kmer, probabilities)) for kmer in all_kmers), descending=True))
def space_paired_read(read, d): return fst(read) + ' ' * d + snd(read)