def hmm_iterator(hmms, sequences): """ Given a list of HMMs and a set of sequences, print the sensitivity, specifity and accuracy of the models. """ for hmm in hmms: #Set values to zero sensitivity = 0 specificity = 0 accuracy = 0 hmm_name = hmm.split("/")[-1].split(".")[0] #Parse tgf and run viterbi algorithm hmm = tgf.parse(hmm) alignments = viterbi.viterbi_all(hmm, sequences) #Compute true and false positives and false negatives true_positives = len( filter(lambda align: align.state_path[50] == "*", alignments)) false_negatives = len( filter(lambda align: align.state_path[50] != "*", alignments)) false_positives = false_negatives # True only for this concrete project! #Calculate sensitivity, specificity and accuracy sensitivity = float(true_positives) / (true_positives + false_negatives) specificity = float(true_positives) / (true_positives + false_positives) accuracy = (specificity + sensitivity) / 2.0 def print_percent(number): number = "{:.4f} %".format(number * 100.0) return number print "{}\tSN: {}\tSP: {}\tavgSNSP: {}".format( hmm_name, print_percent(sensitivity), print_percent(specificity), print_percent(accuracy))
def subject(self, filename="simple.tgf"): return tgf.parse("test/files/tgf/{}".format(filename))
else: if is_correct: fn += 1 else: tn += 1 return (tp, tn, fp, fn) def print_roc_data_in_tsv(roc_data): print "score\ttpr\tfpr\tppv\ttp\ttn\tfp\tfn" for score, metrics in sorted(roc_data.items()): print "{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}".format( score, *metrics) hmm = tgf_parser.parse(sys.argv[1]) sequences = seq_parser.parse(sys.argv[2]) alignments = viterbi.viterbi_all(hmm, sequences) evaluated_alignments = map(evaluate_alignment, alignments) max_score = max(alignments, key=lambda align: align.score).score min_score = min(alignments, key=lambda align: align.score).score roc_data = {} step_size = (max_score - min_score) / STEPS scores_iterator = frange(float_floor(min_score), float_ceil(max_score), step_size)
def subject(self, hmm=None, observations=10): if hmm is None: hmm = tgf.parse("test/files/tgf/simple.tgf") return sample.sample(hmm, observations)
return (action, args) def get_output_filename_from_args(args, position): if len(args) > position: return args[position] if __name__ == '__main__': action, args = parse_args() if action == 'sample': if len(args) < 2: print_help_message(invalid=True) hmm = tgf.parse(args[1]) length_of_sample = int(args[0]) output_filename = get_output_filename_from_args(args, 2) mallet_writer.write([sample.sample(hmm, length_of_sample)], output_filename) elif action == 'viterbi': if len(args) < 2: print_help_message(invalid=True) hmm = tgf.parse(args[0]) sequences = seq_parser.parse(args[1]) output_filename = get_output_filename_from_args(args, 2) mallet_writer.write(viterbi.viterbi_all(hmm, sequences), output_filename)
def subject(self, hmm_filename, sequence): return viterbi.viterbi( tgf.parse("test/files/tgf/{}".format(hmm_filename)), sequence)