Ejemplo n.º 1
0
def run_test_matchmaker_elongated_seed():
    test_motif_1 = structures.w_motif(5, 6)
    test_motif_1.from_string("NNVBGNSBGNN")
    test_motif_1.change_structure_position(0, glob_var._loop)
    #test_motif_1.print()

    test_motif_2 = structures.w_motif(6, 6)
    test_motif_2.from_string("ANGAGCAANNNA")
    test_motif_2.change_structure_position(1, glob_var._loop)
    test_motif_2.change_structure_position(3, glob_var._loop)
    #test_motif_2.print()

    test_string_1 = 'AAGGGAGGGAACCCU'
    test_sequence_1 = structures.w_sequence(len(test_string_1))
    test_sequence_1.from_sequence(test_string_1)

    test_string_2 = 'ACGAGCAAAAAAGCCU'
    test_sequence_2 = structures.w_sequence(len(test_string_2))
    test_sequence_2.from_sequence(test_string_2)

    w_motifs = [test_motif_1, test_motif_2]
    w_sequences = [test_sequence_1, test_sequence_2]

    n_motifs = type_conversions.w_to_n_motifs_list(w_motifs)
    n_sequences = type_conversions.w_to_n_sequences_list(w_sequences)

    boolean_matchmaker_desired = np.array([[1, 0], [0, 1]], dtype=bool)
    boolean_matchmaker_res = np.zeros(shape=(2, 2), dtype=bool)

    for i, mt in enumerate(n_motifs):
        for k, sq in enumerate(n_sequences):
            is_match = matchmaker.is_there_motif_instance(mt, sq, is_degenerate=True)
            boolean_matchmaker_res[i,k] = is_match

    assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired))
Ejemplo n.º 2
0
def run_test_elongated_seed(seqs_of_interest,
                            discr_exp_profile,
                            nbins,
                            N,
                            do_print=True):
    elong_seed = create_one_seed(do_print)
    current_profile, time_spent = matchmaker.calculate_profile_one_motif(
        elong_seed, seqs_of_interest, is_degenerate=True)
    matching_sequences = [
        seqs_of_interest[x] for x in range(current_profile.values.shape[0])
        if current_profile.values[x]
    ]
    curr_mi = MI.mut_info(current_profile.values,
                          discr_exp_profile,
                          x_bins=2,
                          y_bins=nbins)
    if do_print:
        print(curr_mi)

    first_N_matching_sequences = matching_sequences[0:N]

    counter = 0

    for seq in first_N_matching_sequences:
        curr_matching_indices = matchmaker.find_all_motif_instances(
            elong_seed, seq, is_degenerate=True)
        for match_index in curr_matching_indices:
            counter += 1
            match_sequence = structures.w_sequence(elong_seed.linear_length)
            match_sequence.nts = seq.nts[match_index:match_index +
                                         elong_seed.linear_length]
            match_string = match_sequence.print(return_string=True)
            print("Match %d: %s" % (counter, match_string))
Ejemplo n.º 3
0
def time_reading_fasta(fasta_file):
    tr_dict_loc = {}
    seqs_order = []
    with open(fasta_file, 'r') as f:
        split_string = f.read().split('>')
        for entry in split_string:
            if entry == '':
                continue
            seq_start = entry.find('\n')
            annotation = entry[:seq_start]
            sequence_string = entry[seq_start + 1:].replace('\n', '')
            current_sequence = structures.w_sequence(len(sequence_string))
            current_sequence.from_sequence(sequence_string)

            time_create_object = timeit.timeit(
                lambda: structures.w_sequence(len(sequence_string)),
                number=100)
            time_fill_object = timeit.timeit(
                lambda: current_sequence.from_sequence(sequence_string),
                number=100)
            time_compress_object = timeit.timeit(
                lambda: current_sequence.compress(), number=100)
            time_compress_named_object = timeit.timeit(
                lambda: IO.compress_named_sequences(
                    {annotation: current_sequence}, [annotation]),
                number=100)

            print("Create object: %.5f" % time_create_object)
            print("Fill object: %.5f" % time_fill_object)
            print("Compress object: %.5f" % time_compress_object)
            print("Compress named object: %.5f" % time_compress_named_object)
            print()

            # curr_timing = timeit.timeit('current_sequence.from_sequence(sequence_string)',
            #                             'from __main__ import current_sequence, sequence_string')
            # print(curr_timing)

            #
            # tr_dict_loc[annotation] = current_sequence
            # seqs_order.append(annotation)

    return tr_dict_loc, seqs_order
Ejemplo n.º 4
0
def run_test_matchmaker_non_degenerate():
    # test matchmaking algorithms
    # 3 strings listed here contain instances of 3 matches that are also listed here

    test_motif_1 = structures.w_motif(4,6)
    test_motif_2 = structures.w_motif(4,6)
    test_motif_3 = structures.w_motif(4,6)
    test_motif_1.from_string("GNCANCNNUU")
    test_motif_2.from_string("AAUNNGNGNU")
    test_motif_3.from_string("NNACGNNCUU")
    test_motifs_list_w = [test_motif_1, test_motif_2, test_motif_3]
    test_motifs_list = type_conversions.w_to_n_motifs_list(test_motifs_list_w)

    test_string_1 = 'UUUUUUUGACAACAAUUTGTCUUUUU' # instance motif_1 at 7
    test_string_2 = "GGCAUCAGUUUUUUAAUGUGUGAUCAUUGGGUUCCCCCUUUUU" # instance motif_2 at 14
    test_string_3 = "AAUUAAAACCCCCCCAAACGCCCUUGUUUCCCACCACGGGCUUGUGGAAAAUUUUUU" # instances motif_3 at 15 and 33

    test_sequence_1 = structures.w_sequence(len(test_string_1))
    test_sequence_2 = structures.w_sequence(len(test_string_2))
    test_sequence_3 = structures.w_sequence(len(test_string_3))
    test_sequence_1.from_sequence(test_string_1)
    test_sequence_2.from_sequence(test_string_2)
    test_sequence_3.from_sequence(test_string_3)
    test_sequences_list_w = [test_sequence_1, test_sequence_2, test_sequence_3]
    test_sequences_list = type_conversions.w_to_n_sequences_list(test_sequences_list_w)

    boolean_matchmaker_desired = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=bool)
    boolean_matchmaker_res = np.zeros(shape=(3, 3), dtype=bool)
    indices_matchmaker_desired = [[7],[],[],[],[14],[],[],[],[15,33]]
    indices_matchmaker_res = []

    for i, mt in enumerate(test_motifs_list):
        for k, sq in enumerate(test_sequences_list):
            is_match = matchmaker.is_there_motif_instance(mt, sq)
            matching_indices = matchmaker.find_all_motif_instances(mt, sq)
            boolean_matchmaker_res[i,k] = is_match
            indices_matchmaker_res.append(matching_indices)


    assert(np.array_equal(boolean_matchmaker_res, boolean_matchmaker_desired))
    assert(indices_matchmaker_res == indices_matchmaker_desired)
Ejemplo n.º 5
0
def create_single_pair(stem=4,
                       loop=7,
                       motif_str="NGCAUNGNANN",
                       seq_str="UGCAUUGUAUGUGUG"):
    test_motif = structures.w_motif(stem, loop)
    test_motif.from_string(motif_str)
    n_test_motif = type_conversions.w_to_n_motif(test_motif)

    test_sequence = structures.w_sequence(len(seq_str))
    test_sequence.from_sequence(seq_str)
    n_test_sequence = type_conversions.w_to_n_sequence(test_sequence)

    return n_test_motif, n_test_sequence
Ejemplo n.º 6
0
def run_test_current_pair(stem = 4, loop = 7,
                      motif_str = "NGCAUNGNANN",
                      seq_str = "UGCAUUGUAUGUGUG"):
    test_motif = structures.w_motif(stem, loop)
    test_motif.from_string(motif_str)
    n_test_motif = type_conversions.w_to_n_motif(test_motif)

    test_sequence = structures.w_sequence(len(seq_str))
    test_sequence.from_sequence(seq_str)
    n_test_sequence = type_conversions.w_to_n_sequence(test_sequence)
    is_match = matchmaker.is_there_motif_instance(n_test_motif, n_test_sequence)

    if is_match:
        print("Sequence %s matches the motif %s" % (seq_str, motif_str))
        motif_instances = matchmaker.find_all_motif_instances(n_test_motif, n_test_sequence)
        print("Motif instances are: ", ", ".join([str(x) for x in motif_instances]))

    else:
        print("Sequence %s DOES NOT matches the motif %s" % (seq_str, motif_str))