def run_test_matchmaker_elongated_seed():
    test_motif_1 = structures.w_motif(5, 6)
    test_motif_1.from_string("NNVBGNSBGNN")
    test_motif_1.change_structure_position(0, glob_var._loop)
    #test_motif_1.print()

    test_motif_2 = structures.w_motif(6, 6)
    test_motif_2.from_string("ANGAGCAANNNA")
    test_motif_2.change_structure_position(1, glob_var._loop)
    test_motif_2.change_structure_position(3, glob_var._loop)
    #test_motif_2.print()

    test_string_1 = 'AAGGGAGGGAACCCU'
    test_sequence_1 = structures.w_sequence(len(test_string_1))
    test_sequence_1.from_sequence(test_string_1)

    test_string_2 = 'ACGAGCAAAAAAGCCU'
    test_sequence_2 = structures.w_sequence(len(test_string_2))
    test_sequence_2.from_sequence(test_string_2)

    w_motifs = [test_motif_1, test_motif_2]
    w_sequences = [test_sequence_1, test_sequence_2]

    n_motifs = type_conversions.w_to_n_motifs_list(w_motifs)
    n_sequences = type_conversions.w_to_n_sequences_list(w_sequences)

    boolean_matchmaker_desired = np.array([[1, 0], [0, 1]], dtype=bool)
    boolean_matchmaker_res = np.zeros(shape=(2, 2), dtype=bool)

    for i, mt in enumerate(n_motifs):
        for k, sq in enumerate(n_sequences):
            is_match = matchmaker.is_there_motif_instance(mt, sq, is_degenerate=True)
            boolean_matchmaker_res[i,k] = is_match

    assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired))
def convert_motif_text_file(inp_filename, out_filename):
    current_sequence_str = ''
    current_structure_str = ''

    total_bitstring = b''

    with open(inp_filename, 'r') as rf:
        for i, line in enumerate(rf):
            stripped_line = line.rstrip()

            if i % 2 == 0:
                current_structure_str = stripped_line
            else:
                current_sequence_str = stripped_line

                stem_length = current_structure_str.index('.')
                loop_length = len(current_structure_str) - stem_length

                current_motif = structures.w_motif(stem_length, loop_length)
                current_motif.from_string(current_sequence_str)

                current_motif.compress()
                total_bitstring += current_motif.bytestring

    with open(out_filename, 'wb') as wf:
        wf.write(total_bitstring)
def create_one_seed(do_print = True):
    test_motif_1 = structures.w_motif(5, 4)
    test_motif_1.from_string("BSHNVBCNU")
    test_motif_1.change_structure_position(0, glob_var._loop)
    if do_print:
        print("Test motif:")
        test_motif_1.print()
        test_motif_1.print_linear()
    n_test_motif = type_conversions.w_to_n_motif(test_motif_1)
    return n_test_motif
def run_test_matchmaker_non_degenerate():
    # test matchmaking algorithms
    # 3 strings listed here contain instances of 3 matches that are also listed here

    test_motif_1 = structures.w_motif(4,6)
    test_motif_2 = structures.w_motif(4,6)
    test_motif_3 = structures.w_motif(4,6)
    test_motif_1.from_string("GNCANCNNUU")
    test_motif_2.from_string("AAUNNGNGNU")
    test_motif_3.from_string("NNACGNNCUU")
    test_motifs_list_w = [test_motif_1, test_motif_2, test_motif_3]
    test_motifs_list = type_conversions.w_to_n_motifs_list(test_motifs_list_w)

    test_string_1 = 'UUUUUUUGACAACAAUUTGTCUUUUU' # instance motif_1 at 7
    test_string_2 = "GGCAUCAGUUUUUUAAUGUGUGAUCAUUGGGUUCCCCCUUUUU" # instance motif_2 at 14
    test_string_3 = "AAUUAAAACCCCCCCAAACGCCCUUGUUUCCCACCACGGGCUUGUGGAAAAUUUUUU" # instances motif_3 at 15 and 33

    test_sequence_1 = structures.w_sequence(len(test_string_1))
    test_sequence_2 = structures.w_sequence(len(test_string_2))
    test_sequence_3 = structures.w_sequence(len(test_string_3))
    test_sequence_1.from_sequence(test_string_1)
    test_sequence_2.from_sequence(test_string_2)
    test_sequence_3.from_sequence(test_string_3)
    test_sequences_list_w = [test_sequence_1, test_sequence_2, test_sequence_3]
    test_sequences_list = type_conversions.w_to_n_sequences_list(test_sequences_list_w)

    boolean_matchmaker_desired = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=bool)
    boolean_matchmaker_res = np.zeros(shape=(3, 3), dtype=bool)
    indices_matchmaker_desired = [[7],[],[],[],[14],[],[],[],[15,33]]
    indices_matchmaker_res = []

    for i, mt in enumerate(test_motifs_list):
        for k, sq in enumerate(test_sequences_list):
            is_match = matchmaker.is_there_motif_instance(mt, sq)
            matching_indices = matchmaker.find_all_motif_instances(mt, sq)
            boolean_matchmaker_res[i,k] = is_match
            indices_matchmaker_res.append(matching_indices)


    assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired))
    assert(indices_matchmaker_res == indices_matchmaker_desired)
Exemple #5
0
def create_single_pair(stem=4,
                       loop=7,
                       motif_str="NGCAUNGNANN",
                       seq_str="UGCAUUGUAUGUGUG"):
    test_motif = structures.w_motif(stem, loop)
    test_motif.from_string(motif_str)
    n_test_motif = type_conversions.w_to_n_motif(test_motif)

    test_sequence = structures.w_sequence(len(seq_str))
    test_sequence.from_sequence(seq_str)
    n_test_sequence = type_conversions.w_to_n_sequence(test_sequence)

    return n_test_motif, n_test_sequence
def prepare_known_seeds(args):
    seqs_shape, seqs_to_test, bin_file_to_test, desired_numbers = define_constants(args)

    w_motifs_list = [0] * len(seqs_to_test)
    for ind, seq in enumerate(seqs_to_test):
        curr_test_motif = structures.w_motif(seqs_shape[0], seqs_shape[1])
        curr_test_motif.from_string(seq)
        w_motifs_list[ind] = curr_test_motif

    seqs_dict, seqs_order = IO.read_rna_bin_file(bin_file_to_test)
    w_seqs_list = [seqs_dict[name] for name in seqs_order]

    n_motifs_list = type_conversions.w_to_n_motifs_list(w_motifs_list)
    n_seqs_list = type_conversions.w_to_n_sequences_list(w_seqs_list)
    return n_motifs_list, n_seqs_list
def run_test_current_pair(stem = 4, loop = 7,
                      motif_str = "NGCAUNGNANN",
                      seq_str = "UGCAUUGUAUGUGUG"):
    test_motif = structures.w_motif(stem, loop)
    test_motif.from_string(motif_str)
    n_test_motif = type_conversions.w_to_n_motif(test_motif)

    test_sequence = structures.w_sequence(len(seq_str))
    test_sequence.from_sequence(seq_str)
    n_test_sequence = type_conversions.w_to_n_sequence(test_sequence)
    is_match = matchmaker.is_there_motif_instance(n_test_motif, n_test_sequence)

    if is_match:
        print("Sequence %s matches the motif %s" % (seq_str, motif_str))
        motif_instances = matchmaker.find_all_motif_instances(n_test_motif, n_test_sequence)
        print("Motif instances are: ", ", ".join([str(x) for x in motif_instances]))

    else:
        print("Sequence %s DOES NOT matches the motif %s" % (seq_str, motif_str))