def test_instance_from_file(filename, paramsconfig): ''' THis function parses an instance from a file and then builds the model and predicts the alignment for one instance. AN instance here means 1 ref sequence with 1 or more query sequences ''' instance = helpers.parse_instance(filename) modelname = filename[0:len(filename)-4] t1 = time.time() test_instance(modelname, instance, paramsconfig) t2 =time.time() print("total processing time: " + str(t2-t1) + " seconds.")
def test_instance_from_file(filename, paramsconfig): ''' THis function parses an instance from a file and then builds the model and predicts the alignment for one instance. AN instance here means 1 ref sequence with 1 or more query sequences ''' instance = helpers.parse_instance(filename) modelname = filename[0:len(filename) - 4] t1 = time.time() test_instance(modelname, instance, paramsconfig) t2 = time.time() print("total processing time: " + str(t2 - t1) + " seconds.")
def test_batch(folder_path, output_name, paramstuple): if folder_path[-1] != "/": folder_path += "/" stats = [0] * 11 score_ratios = [] f = file(output_name, 'a') f.write("--------------------------------") f.write("\n\nCurrently processing folder " + folder_path + "\n\n") file_names = os.listdir(folder_path) for filename in file_names: instance = helpers.parse_instance(folder_path + filename) print("Currently testing: "+ filename) print("length of reference sequence: " + str(len(instance[2]))) print("number of query sequences: " + str(len(instance[3]))) #modelname = filename[0:len(filename)-4] modelname = filename results, m = test_instance(modelname, instance, paramstuple) f.write("\n\n\n" + filename + ":\n") match_results = [] for query_index, result in enumerate(results): frame_shifts = locate_frameshifts(instance[3][query_index], result[0]) if frame_shifts == []: stats[0] += 1 elif len(frame_shifts) == 1: stats[1] += 1 elif len(frame_shifts) == 2: stats[2] += 1 elif len(frame_shifts) == 3: stats[3] += 1 else: #4 or more frame-shifts stats[4] += 1 ''' print("\nVITERBI PATH: "+ str(result[0])) print("\nPREDICTED ALIGNMENT:") print("\n" + result[1][0]) print("\n" + result[1][1]) print("\nORIGINAL ALIGNMENT:") print("\n" + instance[0]) print("\n" + instance[1][query_index]) ''' alignments_match = _alignments_match((result[1][0], result[1][1]), (instance[0], instance[1][query_index])) match_results.append(alignments_match) if alignments_match: stats[6] += 1 else: stats[7] += 1 #print("in analytics: result[1] " + str(result[1])) path_decoded_from_original = helpers.pair_to_path(m, result[1]) score_original_alignment = 0 #( model.score_alignment_against_model(m,(instance[0], # instance[1][query_index])) if score_original_alignment == 0: score_ratio = 0 else: score_ratio = result[2]/score_original_alignment score_ratios.append(score_ratio) acc_site_index_original = helpers.get_exon_start(instance[1][query_index]) acc_site_index_predicted = helpers.get_exon_start(result[1][1]) do_site_index_original = helpers.get_exon_end(instance[1][query_index]) do_site_index_predicted = helpers.get_exon_end(result[1][1]) acc_site_correct = (acc_site_index_original == acc_site_index_predicted) do_site_correct = (do_site_index_original == do_site_index_predicted) if acc_site_correct: stats[9] += 1 if do_site_correct: stats[10] += 1 _write_query_details(query_index, f, instance, result, frame_shifts, alignments_match, score_original_alignment, score_ratio, path_decoded_from_original, acc_site_correct, do_site_correct) f.write("\n\nmixed results: ") if sum(match_results) == 0 or sum(match_results) == len(match_results): f.write("False") else: f.write("True") stats[8] += 1 f.flush() aggregated_ratios = helpers.aggregate_ratios(score_ratios) return stats, aggregated_ratios, score_ratios
def test_batch(folder_path, output_name, paramstuple): if folder_path[-1] != "/": folder_path += "/" stats = [0] * 11 score_ratios = [] f = file(output_name, 'a') f.write("--------------------------------") f.write("\n\nCurrently processing folder " + folder_path + "\n\n") file_names = os.listdir(folder_path) for filename in file_names: instance = helpers.parse_instance(folder_path + filename) print("Currently testing: " + filename) print("length of reference sequence: " + str(len(instance[2]))) print("number of query sequences: " + str(len(instance[3]))) #modelname = filename[0:len(filename)-4] modelname = filename results, m = test_instance(modelname, instance, paramstuple) f.write("\n\n\n" + filename + ":\n") match_results = [] for query_index, result in enumerate(results): frame_shifts = locate_frameshifts(instance[3][query_index], result[0]) if frame_shifts == []: stats[0] += 1 elif len(frame_shifts) == 1: stats[1] += 1 elif len(frame_shifts) == 2: stats[2] += 1 elif len(frame_shifts) == 3: stats[3] += 1 else: #4 or more frame-shifts stats[4] += 1 ''' print("\nVITERBI PATH: "+ str(result[0])) print("\nPREDICTED ALIGNMENT:") print("\n" + result[1][0]) print("\n" + result[1][1]) print("\nORIGINAL ALIGNMENT:") print("\n" + instance[0]) print("\n" + instance[1][query_index]) ''' alignments_match = _alignments_match( (result[1][0], result[1][1]), (instance[0], instance[1][query_index])) match_results.append(alignments_match) if alignments_match: stats[6] += 1 else: stats[7] += 1 #print("in analytics: result[1] " + str(result[1])) path_decoded_from_original = helpers.pair_to_path(m, result[1]) score_original_alignment = 0 #( model.score_alignment_against_model(m,(instance[0], # instance[1][query_index])) if score_original_alignment == 0: score_ratio = 0 else: score_ratio = result[2] / score_original_alignment score_ratios.append(score_ratio) acc_site_index_original = helpers.get_exon_start( instance[1][query_index]) acc_site_index_predicted = helpers.get_exon_start(result[1][1]) do_site_index_original = helpers.get_exon_end( instance[1][query_index]) do_site_index_predicted = helpers.get_exon_end(result[1][1]) acc_site_correct = ( acc_site_index_original == acc_site_index_predicted) do_site_correct = ( do_site_index_original == do_site_index_predicted) if acc_site_correct: stats[9] += 1 if do_site_correct: stats[10] += 1 _write_query_details(query_index, f, instance, result, frame_shifts, alignments_match, score_original_alignment, score_ratio, path_decoded_from_original, acc_site_correct, do_site_correct) f.write("\n\nmixed results: ") if sum(match_results) == 0 or sum(match_results) == len(match_results): f.write("False") else: f.write("True") stats[8] += 1 f.flush() aggregated_ratios = helpers.aggregate_ratios(score_ratios) return stats, aggregated_ratios, score_ratios
". FILE: " + path_prefix + params.BLOSUM_FILE) else: print("substitution matrix = " + str(args.matrix) + ". FILE: " + path_prefix + params.ETH_FILE) if args.is_first_exon: print(path_prefix + params.FIRST_CODON_PROFILE + " used instead of the usual acceptor profile") if args.is_last_exon: print(path_prefix + params.LAST_CODON_PROFILE + " used instead of donor profile") if args.has_AC_acceptor: print(path_prefix + params.U12_ACC_PROFILE + " used instead of the usual acceptor profile") if args.has_AT_donor: print(path_prefix + params.U12_DONOR_PROFILE + " used instead of the usual donor profile") #parsing the FASTA input file: instance = helpers.parse_instance(args.input_file) params.CLADE = args.clade paramsconfig = params.Params_config() paramsconfig.set_params((args.fs_prob, args.ci_prob, args.ci2_prob, args.total_cd_prob, args.matrix.upper())) paramsconfig.set_sequence(instance[2], instance[4], instance[5], args.is_first_exon, args.is_last_exon, args.has_AC_acceptor, args.has_AT_donor) if args.verbosity> 0: print("\nReference sequene (raw): " + instance[0]) print("\nCodons in the reference sequence: " + str(paramsconfig.seq_as_codons)) print("\nUpstream intron phase: " + str(instance[4])) print("\nDownstream intron phase: " + str(instance[5])) print("\nBuilding a profile HMM for the reference sequence...") m = model.HMM(args.input_file, paramsconfig)
path_prefix + params.ETH_FILE) if args.is_first_exon: print(path_prefix + params.FIRST_CODON_PROFILE + " used instead of the usual acceptor profile") if args.is_last_exon: print(path_prefix + params.LAST_CODON_PROFILE + " used instead of donor profile") if args.has_AC_acceptor: print(path_prefix + params.U12_ACC_PROFILE + " used instead of the usual acceptor profile") if args.has_AT_donor: print(path_prefix + params.U12_DONOR_PROFILE + " used instead of the usual donor profile") #parsing the FASTA input file: instance = helpers.parse_instance(args.input_file) params.CLADE = args.clade paramsconfig = params.Params_config() paramsconfig.set_params( (args.fs_prob, args.ci_prob, args.ci2_prob, args.total_cd_prob, args.matrix.upper())) paramsconfig.set_sequence(instance[2], instance[4], instance[5], args.is_first_exon, args.is_last_exon, args.has_AC_acceptor, args.has_AT_donor) if args.verbosity > 0: print("\nReference sequene (raw): " + instance[0]) print("\nCodons in the reference sequence: " + str(paramsconfig.seq_as_codons)) print("\nUpstream intron phase: " + str(instance[4])) print("\nDownstream intron phase: " + str(instance[5]))