Esempio n. 1
0
def test_instance_from_file(filename, paramsconfig):
    '''
    THis function parses an instance from a file and then builds the model
    and predicts the alignment for
    one instance.

    AN instance here means 1 ref sequence with 1 or more query
    sequences
    '''
    instance = helpers.parse_instance(filename)
    modelname = filename[0:len(filename)-4]
    t1 = time.time()
    test_instance(modelname, instance, paramsconfig)
    t2 =time.time()
    print("total processing time: " + str(t2-t1) + " seconds.")
Esempio n. 2
0
def test_instance_from_file(filename, paramsconfig):
    '''
    THis function parses an instance from a file and then builds the model
    and predicts the alignment for
    one instance.

    AN instance here means 1 ref sequence with 1 or more query
    sequences
    '''
    instance = helpers.parse_instance(filename)
    modelname = filename[0:len(filename) - 4]
    t1 = time.time()
    test_instance(modelname, instance, paramsconfig)
    t2 = time.time()
    print("total processing time: " + str(t2 - t1) + " seconds.")
Esempio n. 3
0
def test_batch(folder_path, output_name, paramstuple):
    if folder_path[-1] != "/":
        folder_path += "/"
    stats = [0] * 11
    score_ratios = []
    f = file(output_name, 'a')
    f.write("--------------------------------")
    f.write("\n\nCurrently processing folder " + folder_path + "\n\n")
    file_names = os.listdir(folder_path)
    for filename in file_names:
        instance = helpers.parse_instance(folder_path + filename)
        print("Currently testing: "+ filename)
        print("length of reference sequence: " +  str(len(instance[2])))
        print("number of query sequences: " +  str(len(instance[3])))
        #modelname = filename[0:len(filename)-4]
        modelname  = filename
        results, m = test_instance(modelname, instance, paramstuple)
        f.write("\n\n\n" + filename + ":\n")
        match_results = []
        for query_index, result in enumerate(results):
            frame_shifts = locate_frameshifts(instance[3][query_index],
                                              result[0])
            if frame_shifts == []:
                stats[0] += 1
            elif len(frame_shifts) == 1:
                stats[1] += 1
            elif len(frame_shifts) == 2:
                stats[2] += 1
            elif len(frame_shifts) == 3:
                stats[3] += 1
            else:  #4 or more frame-shifts
                stats[4] += 1
            '''
            print("\nVITERBI PATH: "+ str(result[0]))
            print("\nPREDICTED ALIGNMENT:")
            print("\n" + result[1][0])
            print("\n" + result[1][1])
            print("\nORIGINAL ALIGNMENT:")
            print("\n" + instance[0])
            print("\n" + instance[1][query_index])
            '''
            alignments_match = _alignments_match((result[1][0], result[1][1]),
                                             (instance[0], instance[1][query_index]))

            match_results.append(alignments_match)
            if alignments_match:
                stats[6] += 1
            else:
                stats[7] += 1
           
            #print("in analytics: result[1] " + str(result[1]))
            path_decoded_from_original = helpers.pair_to_path(m, result[1])
            score_original_alignment = 0
            #( model.score_alignment_against_model(m,(instance[0],
            #                                          instance[1][query_index]))
            if score_original_alignment == 0:
                score_ratio = 0
            else:
                score_ratio = result[2]/score_original_alignment


            score_ratios.append(score_ratio)

            acc_site_index_original = helpers.get_exon_start(instance[1][query_index])
            acc_site_index_predicted = helpers.get_exon_start(result[1][1])
            do_site_index_original = helpers.get_exon_end(instance[1][query_index])
            do_site_index_predicted = helpers.get_exon_end(result[1][1])
            acc_site_correct = (acc_site_index_original ==
                                acc_site_index_predicted)
            do_site_correct = (do_site_index_original ==
                               do_site_index_predicted)
            if acc_site_correct:
                stats[9] += 1
            if do_site_correct:
                stats[10] += 1
                
            _write_query_details(query_index, f, instance, result,
                                 frame_shifts, alignments_match,
                                 score_original_alignment, score_ratio,
                                 path_decoded_from_original,
                                 acc_site_correct,
                                 do_site_correct)
        f.write("\n\nmixed results: ")
        if sum(match_results) == 0 or sum(match_results) == len(match_results):
            f.write("False")
        else:
            f.write("True")
            stats[8] += 1

    f.flush()
    aggregated_ratios = helpers.aggregate_ratios(score_ratios)
    return stats, aggregated_ratios, score_ratios
Esempio n. 4
0
def test_batch(folder_path, output_name, paramstuple):
    if folder_path[-1] != "/":
        folder_path += "/"
    stats = [0] * 11
    score_ratios = []
    f = file(output_name, 'a')
    f.write("--------------------------------")
    f.write("\n\nCurrently processing folder " + folder_path + "\n\n")
    file_names = os.listdir(folder_path)
    for filename in file_names:
        instance = helpers.parse_instance(folder_path + filename)
        print("Currently testing: " + filename)
        print("length of reference sequence: " + str(len(instance[2])))
        print("number of query sequences: " + str(len(instance[3])))
        #modelname = filename[0:len(filename)-4]
        modelname = filename
        results, m = test_instance(modelname, instance, paramstuple)
        f.write("\n\n\n" + filename + ":\n")
        match_results = []
        for query_index, result in enumerate(results):
            frame_shifts = locate_frameshifts(instance[3][query_index],
                                              result[0])
            if frame_shifts == []:
                stats[0] += 1
            elif len(frame_shifts) == 1:
                stats[1] += 1
            elif len(frame_shifts) == 2:
                stats[2] += 1
            elif len(frame_shifts) == 3:
                stats[3] += 1
            else:  #4 or more frame-shifts
                stats[4] += 1
            '''
            print("\nVITERBI PATH: "+ str(result[0]))
            print("\nPREDICTED ALIGNMENT:")
            print("\n" + result[1][0])
            print("\n" + result[1][1])
            print("\nORIGINAL ALIGNMENT:")
            print("\n" + instance[0])
            print("\n" + instance[1][query_index])
            '''
            alignments_match = _alignments_match(
                (result[1][0], result[1][1]),
                (instance[0], instance[1][query_index]))

            match_results.append(alignments_match)
            if alignments_match:
                stats[6] += 1
            else:
                stats[7] += 1

            #print("in analytics: result[1] " + str(result[1]))
            path_decoded_from_original = helpers.pair_to_path(m, result[1])
            score_original_alignment = 0
            #( model.score_alignment_against_model(m,(instance[0],
            #                                          instance[1][query_index]))
            if score_original_alignment == 0:
                score_ratio = 0
            else:
                score_ratio = result[2] / score_original_alignment

            score_ratios.append(score_ratio)

            acc_site_index_original = helpers.get_exon_start(
                instance[1][query_index])
            acc_site_index_predicted = helpers.get_exon_start(result[1][1])
            do_site_index_original = helpers.get_exon_end(
                instance[1][query_index])
            do_site_index_predicted = helpers.get_exon_end(result[1][1])
            acc_site_correct = (
                acc_site_index_original == acc_site_index_predicted)
            do_site_correct = (
                do_site_index_original == do_site_index_predicted)
            if acc_site_correct:
                stats[9] += 1
            if do_site_correct:
                stats[10] += 1

            _write_query_details(query_index, f, instance, result,
                                 frame_shifts, alignments_match,
                                 score_original_alignment, score_ratio,
                                 path_decoded_from_original, acc_site_correct,
                                 do_site_correct)
        f.write("\n\nmixed results: ")
        if sum(match_results) == 0 or sum(match_results) == len(match_results):
            f.write("False")
        else:
            f.write("True")
            stats[8] += 1

    f.flush()
    aggregated_ratios = helpers.aggregate_ratios(score_ratios)
    return stats, aggregated_ratios, score_ratios
Esempio n. 5
0
                  ". FILE: " + path_prefix + params.BLOSUM_FILE)
        else:
            print("substitution matrix = " + str(args.matrix) +
                  ". FILE: " + path_prefix + params.ETH_FILE)
        if args.is_first_exon:
            print(path_prefix + params.FIRST_CODON_PROFILE + " used instead of the usual acceptor profile")
        if args.is_last_exon:
            print(path_prefix + params.LAST_CODON_PROFILE +  " used instead of donor profile")
        if args.has_AC_acceptor:
            print(path_prefix + params.U12_ACC_PROFILE + " used instead of the usual acceptor profile")
        if args.has_AT_donor:
            print(path_prefix + params.U12_DONOR_PROFILE + " used instead of the usual donor profile")
            

    #parsing the FASTA input file:    
    instance = helpers.parse_instance(args.input_file)
    params.CLADE = args.clade
    paramsconfig = params.Params_config()
    paramsconfig.set_params((args.fs_prob, args.ci_prob, args.ci2_prob, args.total_cd_prob, args.matrix.upper()))
    paramsconfig.set_sequence(instance[2], instance[4], instance[5],
                              args.is_first_exon, args.is_last_exon,
                              args.has_AC_acceptor, args.has_AT_donor)

    if args.verbosity> 0:
        print("\nReference sequene (raw): " + instance[0])
        print("\nCodons in the reference sequence: " + str(paramsconfig.seq_as_codons))
        print("\nUpstream intron phase: " + str(instance[4])) 
        print("\nDownstream intron phase: " + str(instance[5]))
        print("\nBuilding a profile HMM for the reference sequence...")
        
    m = model.HMM(args.input_file, paramsconfig)
Esempio n. 6
0
                  path_prefix + params.ETH_FILE)
        if args.is_first_exon:
            print(path_prefix + params.FIRST_CODON_PROFILE +
                  " used instead of the usual acceptor profile")
        if args.is_last_exon:
            print(path_prefix + params.LAST_CODON_PROFILE +
                  " used instead of donor profile")
        if args.has_AC_acceptor:
            print(path_prefix + params.U12_ACC_PROFILE +
                  " used instead of the usual acceptor profile")
        if args.has_AT_donor:
            print(path_prefix + params.U12_DONOR_PROFILE +
                  " used instead of the usual donor profile")

    #parsing the FASTA input file:
    instance = helpers.parse_instance(args.input_file)
    params.CLADE = args.clade
    paramsconfig = params.Params_config()
    paramsconfig.set_params(
        (args.fs_prob, args.ci_prob, args.ci2_prob, args.total_cd_prob,
         args.matrix.upper()))
    paramsconfig.set_sequence(instance[2], instance[4], instance[5],
                              args.is_first_exon, args.is_last_exon,
                              args.has_AC_acceptor, args.has_AT_donor)

    if args.verbosity > 0:
        print("\nReference sequene (raw): " + instance[0])
        print("\nCodons in the reference sequence: " +
              str(paramsconfig.seq_as_codons))
        print("\nUpstream intron phase: " + str(instance[4]))
        print("\nDownstream intron phase: " + str(instance[5]))