def predict(sequence, model_or_filelike, compute_posterior=True): if isinstance(model_or_filelike, tuple): model = model_or_filelike else: _, model = parse(open(model_or_filelike)) _, path = viterbi(sequence, *model) if compute_posterior: forward_table, constants = forward(sequence, *model) backward_table = backward(sequence, constants, *model) posterior = forward_table * backward_table _, _, _, char_map, label_map, name_map = model observations = len(sequence) states = len(name_map) # just counts how many states there are per label group_counts = Counter(label_map.values()) table = np.zeros(shape=(observations, 3)) for i in range(observations): group_probs = defaultdict(float) for j in range(states): group = label_map[j].lower() group_probs[group] += posterior[i, j] for k, group in enumerate(GROUP_NAMES): table[i, k] = group_probs[group] return path, table / table.sum(axis=1, keepdims=True) return path
def predict(sequence, header, model_or_filelike, compute_posterior=True): if isinstance(model_or_filelike, tuple): model = model_or_filelike else: _, model = parse(open(model_or_filelike)) _, path = viterbi(sequence, *model) if compute_posterior: forward_table, constants = forward(sequence, *model) backward_table = backward(sequence, constants, *model) posterior = forward_table * backward_table _, _, _, char_map, label_map, name_map = model observations = len(sequence) states = len(name_map) table = np.zeros(shape=(observations, 3)) for i in range(observations): group_probs = defaultdict(float) for j in range(states): group = label_map[j].lower() group_probs[group] += posterior[i, j] for k, group in enumerate(GROUP_NAMES): table[i, k] = group_probs[group] return path, table/table.sum(axis=1, keepdims=True) return path
def cli(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--file', dest='sequence_file', type=argparse.FileType('r'), required=True, help='path to file in fasta format with sequences') parser.add_argument('-m', '--model', dest='model_file', type=argparse.FileType('r'), default=DEFAULT_MODEL, help='path to the model to use') if has_matplotlib: parser.add_argument('-p', '--plot', dest='plot_posterior', action='store_true', help='plot posterior probabilies') args = parser.parse_args() header, model = parse(args.model_file) for entry in load_fasta_file(args.sequence_file): path, posterior = predict(entry.sequence, header, model) with open(entry.id + '.summary', 'w') as summary_file: for start, end, state in summarize(path): print("{} {} {}".format(start, end, PRETTY_NAMES[state]), file=summary_file) with open(entry.id + '.annotation', 'w') as ann_file: print('>', entry.id, ' ', entry.description, sep='', file=ann_file) for line in textwrap.wrap(path, 79): print(line, file=ann_file) plot_filename = entry.id + '.plot' with open(plot_filename, 'w') as plot_file: dump_posterior_file(plot_file, posterior) if hasattr(args, 'plot_posterior') and args.plot_posterior: with open(plot_filename, 'r') as fileobj: plot(fileobj, entry.id + '.pdf')
def cli(): parser = argparse.ArgumentParser() parser.add_argument('-f', '--file', dest='sequence_file', type=argparse.FileType('r'), required=True, help='path to file in fasta format with sequences') parser.add_argument('-m', '--model', dest='model_file', type=argparse.FileType('r'), default=DEFAULT_MODEL, help='path to the model to use') parser.add_argument('-p', '--plot', dest='plot_posterior', action='store_true', help='plot posterior probabilies') args = parser.parse_args() header, model = parse(args.model_file) for record in sk.io.read(args.sequence_file, format='fasta'): path, posterior = predict(normalized_sequence, header, model) with open(record.metadata['id'] + '.summary', 'w') as summary_file: for start, end, state in summarize(path): print("{} {} {}".format(start, end, PRETTY_NAMES[state]), file=summary_file) with open(record.metadata['id'] + '.annotation', 'w') as ann_file: print('>', record.metadata['id'], ' ', record.metadata['description'], sep='', file=ann_file) for line in textwrap.wrap(path, 79): print(line, file=ann_file) plot_filename = record.metadata['id'] + '.plot' with open(plot_filename, 'w') as plot_file: print('inside', 'membrane', 'outside', file=plot_file) for i in range(len(normalized_sequence)): print('{} {} {}'.format(posterior[i, 0], posterior[i, 1], posterior[i, 2]), file=plot_file) if args.plot_posterior: plot(plot_filename, record.metadata['id'] + '.pdf')