def run_test_matchmaker_elongated_seed(): test_motif_1 = structures.w_motif(5, 6) test_motif_1.from_string("NNVBGNSBGNN") test_motif_1.change_structure_position(0, glob_var._loop) #test_motif_1.print() test_motif_2 = structures.w_motif(6, 6) test_motif_2.from_string("ANGAGCAANNNA") test_motif_2.change_structure_position(1, glob_var._loop) test_motif_2.change_structure_position(3, glob_var._loop) #test_motif_2.print() test_string_1 = 'AAGGGAGGGAACCCU' test_sequence_1 = structures.w_sequence(len(test_string_1)) test_sequence_1.from_sequence(test_string_1) test_string_2 = 'ACGAGCAAAAAAGCCU' test_sequence_2 = structures.w_sequence(len(test_string_2)) test_sequence_2.from_sequence(test_string_2) w_motifs = [test_motif_1, test_motif_2] w_sequences = [test_sequence_1, test_sequence_2] n_motifs = type_conversions.w_to_n_motifs_list(w_motifs) n_sequences = type_conversions.w_to_n_sequences_list(w_sequences) boolean_matchmaker_desired = np.array([[1, 0], [0, 1]], dtype=bool) boolean_matchmaker_res = np.zeros(shape=(2, 2), dtype=bool) for i, mt in enumerate(n_motifs): for k, sq in enumerate(n_sequences): is_match = matchmaker.is_there_motif_instance(mt, sq, is_degenerate=True) boolean_matchmaker_res[i,k] = is_match assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired))
def run_test_elongated_seed(seqs_of_interest, discr_exp_profile, nbins, N, do_print=True): elong_seed = create_one_seed(do_print) current_profile, time_spent = matchmaker.calculate_profile_one_motif( elong_seed, seqs_of_interest, is_degenerate=True) matching_sequences = [ seqs_of_interest[x] for x in range(current_profile.values.shape[0]) if current_profile.values[x] ] curr_mi = MI.mut_info(current_profile.values, discr_exp_profile, x_bins=2, y_bins=nbins) if do_print: print(curr_mi) first_N_matching_sequences = matching_sequences[0:N] counter = 0 for seq in first_N_matching_sequences: curr_matching_indices = matchmaker.find_all_motif_instances( elong_seed, seq, is_degenerate=True) for match_index in curr_matching_indices: counter += 1 match_sequence = structures.w_sequence(elong_seed.linear_length) match_sequence.nts = seq.nts[match_index:match_index + elong_seed.linear_length] match_string = match_sequence.print(return_string=True) print("Match %d: %s" % (counter, match_string))
def time_reading_fasta(fasta_file): tr_dict_loc = {} seqs_order = [] with open(fasta_file, 'r') as f: split_string = f.read().split('>') for entry in split_string: if entry == '': continue seq_start = entry.find('\n') annotation = entry[:seq_start] sequence_string = entry[seq_start + 1:].replace('\n', '') current_sequence = structures.w_sequence(len(sequence_string)) current_sequence.from_sequence(sequence_string) time_create_object = timeit.timeit( lambda: structures.w_sequence(len(sequence_string)), number=100) time_fill_object = timeit.timeit( lambda: current_sequence.from_sequence(sequence_string), number=100) time_compress_object = timeit.timeit( lambda: current_sequence.compress(), number=100) time_compress_named_object = timeit.timeit( lambda: IO.compress_named_sequences( {annotation: current_sequence}, [annotation]), number=100) print("Create object: %.5f" % time_create_object) print("Fill object: %.5f" % time_fill_object) print("Compress object: %.5f" % time_compress_object) print("Compress named object: %.5f" % time_compress_named_object) print() # curr_timing = timeit.timeit('current_sequence.from_sequence(sequence_string)', # 'from __main__ import current_sequence, sequence_string') # print(curr_timing) # # tr_dict_loc[annotation] = current_sequence # seqs_order.append(annotation) return tr_dict_loc, seqs_order
def run_test_matchmaker_non_degenerate(): # test matchmaking algorithms # 3 strings listed here contain instances of 3 matches that are also listed here test_motif_1 = structures.w_motif(4,6) test_motif_2 = structures.w_motif(4,6) test_motif_3 = structures.w_motif(4,6) test_motif_1.from_string("GNCANCNNUU") test_motif_2.from_string("AAUNNGNGNU") test_motif_3.from_string("NNACGNNCUU") test_motifs_list_w = [test_motif_1, test_motif_2, test_motif_3] test_motifs_list = type_conversions.w_to_n_motifs_list(test_motifs_list_w) test_string_1 = 'UUUUUUUGACAACAAUUTGTCUUUUU' # instance motif_1 at 7 test_string_2 = "GGCAUCAGUUUUUUAAUGUGUGAUCAUUGGGUUCCCCCUUUUU" # instance motif_2 at 14 test_string_3 = "AAUUAAAACCCCCCCAAACGCCCUUGUUUCCCACCACGGGCUUGUGGAAAAUUUUUU" # instances motif_3 at 15 and 33 test_sequence_1 = structures.w_sequence(len(test_string_1)) test_sequence_2 = structures.w_sequence(len(test_string_2)) test_sequence_3 = structures.w_sequence(len(test_string_3)) test_sequence_1.from_sequence(test_string_1) test_sequence_2.from_sequence(test_string_2) test_sequence_3.from_sequence(test_string_3) test_sequences_list_w = [test_sequence_1, test_sequence_2, test_sequence_3] test_sequences_list = type_conversions.w_to_n_sequences_list(test_sequences_list_w) boolean_matchmaker_desired = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=bool) boolean_matchmaker_res = np.zeros(shape=(3, 3), dtype=bool) indices_matchmaker_desired = [[7],[],[],[],[14],[],[],[],[15,33]] indices_matchmaker_res = [] for i, mt in enumerate(test_motifs_list): for k, sq in enumerate(test_sequences_list): is_match = matchmaker.is_there_motif_instance(mt, sq) matching_indices = matchmaker.find_all_motif_instances(mt, sq) boolean_matchmaker_res[i,k] = is_match indices_matchmaker_res.append(matching_indices) assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired)) assert(indices_matchmaker_res == indices_matchmaker_desired)
def create_single_pair(stem=4, loop=7, motif_str="NGCAUNGNANN", seq_str="UGCAUUGUAUGUGUG"): test_motif = structures.w_motif(stem, loop) test_motif.from_string(motif_str) n_test_motif = type_conversions.w_to_n_motif(test_motif) test_sequence = structures.w_sequence(len(seq_str)) test_sequence.from_sequence(seq_str) n_test_sequence = type_conversions.w_to_n_sequence(test_sequence) return n_test_motif, n_test_sequence
def run_test_current_pair(stem = 4, loop = 7, motif_str = "NGCAUNGNANN", seq_str = "UGCAUUGUAUGUGUG"): test_motif = structures.w_motif(stem, loop) test_motif.from_string(motif_str) n_test_motif = type_conversions.w_to_n_motif(test_motif) test_sequence = structures.w_sequence(len(seq_str)) test_sequence.from_sequence(seq_str) n_test_sequence = type_conversions.w_to_n_sequence(test_sequence) is_match = matchmaker.is_there_motif_instance(n_test_motif, n_test_sequence) if is_match: print("Sequence %s matches the motif %s" % (seq_str, motif_str)) motif_instances = matchmaker.find_all_motif_instances(n_test_motif, n_test_sequence) print("Motif instances are: ", ", ".join([str(x) for x in motif_instances])) else: print("Sequence %s DOES NOT matches the motif %s" % (seq_str, motif_str))