def run_test_matchmaker_elongated_seed(): test_motif_1 = structures.w_motif(5, 6) test_motif_1.from_string("NNVBGNSBGNN") test_motif_1.change_structure_position(0, glob_var._loop) #test_motif_1.print() test_motif_2 = structures.w_motif(6, 6) test_motif_2.from_string("ANGAGCAANNNA") test_motif_2.change_structure_position(1, glob_var._loop) test_motif_2.change_structure_position(3, glob_var._loop) #test_motif_2.print() test_string_1 = 'AAGGGAGGGAACCCU' test_sequence_1 = structures.w_sequence(len(test_string_1)) test_sequence_1.from_sequence(test_string_1) test_string_2 = 'ACGAGCAAAAAAGCCU' test_sequence_2 = structures.w_sequence(len(test_string_2)) test_sequence_2.from_sequence(test_string_2) w_motifs = [test_motif_1, test_motif_2] w_sequences = [test_sequence_1, test_sequence_2] n_motifs = type_conversions.w_to_n_motifs_list(w_motifs) n_sequences = type_conversions.w_to_n_sequences_list(w_sequences) boolean_matchmaker_desired = np.array([[1, 0], [0, 1]], dtype=bool) boolean_matchmaker_res = np.zeros(shape=(2, 2), dtype=bool) for i, mt in enumerate(n_motifs): for k, sq in enumerate(n_sequences): is_match = matchmaker.is_there_motif_instance(mt, sq, is_degenerate=True) boolean_matchmaker_res[i,k] = is_match assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired))
def convert_motif_text_file(inp_filename, out_filename): current_sequence_str = '' current_structure_str = '' total_bitstring = b'' with open(inp_filename, 'r') as rf: for i, line in enumerate(rf): stripped_line = line.rstrip() if i % 2 == 0: current_structure_str = stripped_line else: current_sequence_str = stripped_line stem_length = current_structure_str.index('.') loop_length = len(current_structure_str) - stem_length current_motif = structures.w_motif(stem_length, loop_length) current_motif.from_string(current_sequence_str) current_motif.compress() total_bitstring += current_motif.bytestring with open(out_filename, 'wb') as wf: wf.write(total_bitstring)
def create_one_seed(do_print = True): test_motif_1 = structures.w_motif(5, 4) test_motif_1.from_string("BSHNVBCNU") test_motif_1.change_structure_position(0, glob_var._loop) if do_print: print("Test motif:") test_motif_1.print() test_motif_1.print_linear() n_test_motif = type_conversions.w_to_n_motif(test_motif_1) return n_test_motif
def run_test_matchmaker_non_degenerate(): # test matchmaking algorithms # 3 strings listed here contain instances of 3 matches that are also listed here test_motif_1 = structures.w_motif(4,6) test_motif_2 = structures.w_motif(4,6) test_motif_3 = structures.w_motif(4,6) test_motif_1.from_string("GNCANCNNUU") test_motif_2.from_string("AAUNNGNGNU") test_motif_3.from_string("NNACGNNCUU") test_motifs_list_w = [test_motif_1, test_motif_2, test_motif_3] test_motifs_list = type_conversions.w_to_n_motifs_list(test_motifs_list_w) test_string_1 = 'UUUUUUUGACAACAAUUTGTCUUUUU' # instance motif_1 at 7 test_string_2 = "GGCAUCAGUUUUUUAAUGUGUGAUCAUUGGGUUCCCCCUUUUU" # instance motif_2 at 14 test_string_3 = "AAUUAAAACCCCCCCAAACGCCCUUGUUUCCCACCACGGGCUUGUGGAAAAUUUUUU" # instances motif_3 at 15 and 33 test_sequence_1 = structures.w_sequence(len(test_string_1)) test_sequence_2 = structures.w_sequence(len(test_string_2)) test_sequence_3 = structures.w_sequence(len(test_string_3)) test_sequence_1.from_sequence(test_string_1) test_sequence_2.from_sequence(test_string_2) test_sequence_3.from_sequence(test_string_3) test_sequences_list_w = [test_sequence_1, test_sequence_2, test_sequence_3] test_sequences_list = type_conversions.w_to_n_sequences_list(test_sequences_list_w) boolean_matchmaker_desired = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=bool) boolean_matchmaker_res = np.zeros(shape=(3, 3), dtype=bool) indices_matchmaker_desired = [[7],[],[],[],[14],[],[],[],[15,33]] indices_matchmaker_res = [] for i, mt in enumerate(test_motifs_list): for k, sq in enumerate(test_sequences_list): is_match = matchmaker.is_there_motif_instance(mt, sq) matching_indices = matchmaker.find_all_motif_instances(mt, sq) boolean_matchmaker_res[i,k] = is_match indices_matchmaker_res.append(matching_indices) assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired)) assert(indices_matchmaker_res == indices_matchmaker_desired)
def create_single_pair(stem=4, loop=7, motif_str="NGCAUNGNANN", seq_str="UGCAUUGUAUGUGUG"): test_motif = structures.w_motif(stem, loop) test_motif.from_string(motif_str) n_test_motif = type_conversions.w_to_n_motif(test_motif) test_sequence = structures.w_sequence(len(seq_str)) test_sequence.from_sequence(seq_str) n_test_sequence = type_conversions.w_to_n_sequence(test_sequence) return n_test_motif, n_test_sequence
def prepare_known_seeds(args): seqs_shape, seqs_to_test, bin_file_to_test, desired_numbers = define_constants(args) w_motifs_list = [0] * len(seqs_to_test) for ind, seq in enumerate(seqs_to_test): curr_test_motif = structures.w_motif(seqs_shape[0], seqs_shape[1]) curr_test_motif.from_string(seq) w_motifs_list[ind] = curr_test_motif seqs_dict, seqs_order = IO.read_rna_bin_file(bin_file_to_test) w_seqs_list = [seqs_dict[name] for name in seqs_order] n_motifs_list = type_conversions.w_to_n_motifs_list(w_motifs_list) n_seqs_list = type_conversions.w_to_n_sequences_list(w_seqs_list) return n_motifs_list, n_seqs_list
def run_test_current_pair(stem = 4, loop = 7, motif_str = "NGCAUNGNANN", seq_str = "UGCAUUGUAUGUGUG"): test_motif = structures.w_motif(stem, loop) test_motif.from_string(motif_str) n_test_motif = type_conversions.w_to_n_motif(test_motif) test_sequence = structures.w_sequence(len(seq_str)) test_sequence.from_sequence(seq_str) n_test_sequence = type_conversions.w_to_n_sequence(test_sequence) is_match = matchmaker.is_there_motif_instance(n_test_motif, n_test_sequence) if is_match: print("Sequence %s matches the motif %s" % (seq_str, motif_str)) motif_instances = matchmaker.find_all_motif_instances(n_test_motif, n_test_sequence) print("Motif instances are: ", ", ".join([str(x) for x in motif_instances])) else: print("Sequence %s DOES NOT matches the motif %s" % (seq_str, motif_str))