def main(args, letter_to_emission, trans, emiss): if args.ticks and args.pbar: raise ValueError('it is silly to use both ticks and a progress bar') if args.pbar: pbar = progress.Bar(1 + 1 + args.n) else: pbar = None with open(args.fasta) as fin: raw_observations = fasta_to_raw_observations(fin.readlines()) arr = [letter_to_emission[c] for c in raw_observations] observations = np.array(arr, dtype=np.int8) if pbar: pbar.increment() model = estimation.FiniteModel(trans, emiss, observations) summary = Summary(model, pbar, args.ticks) estimation.baum_welch( model.update, model.get_expectations, trans, emiss, args.n, callback=summary.after_baum_welch) summary_text = summary.finish() suffix = args.common_suffix if args.summary: with open(args.summary + suffix, 'w') as fout: print >> fout, summary_text if args.log_likelihoods: with open(args.log_likelihoods + suffix, 'w') as fout: print >> fout, '\n'.join('%f' % x for x in summary.log_likelihoods) if args.posterior: hmm.pretty_print_posterior(raw_observations, model.get_posterior(), 60, args.posterior + suffix) if args.posterior_decoding: hmm.pretty_print_posterior_decoding(raw_observations, model.get_posterior(), 60, args.posterior_decoding + suffix)
def parallel_worker(params): """ @param params: a bunch of parameter packed into a sequence """ # unpack the params user_params, hmm_params = params f_in, d_out, niterations = user_params letter_to_emission, trans, emiss = hmm_params trans = np.array(trans) emiss = np.array(emiss) # define output path names suffix = '.' + os.path.basename(f_in) + ('.n%d.txt' % niterations) summary_name = os.path.join(d_out, 'summary' + suffix) log_likelihoods_name = os.path.join(d_out, 'log.likelihood' + suffix) posterior_name = os.path.join(d_out, 'soft.decoding' + suffix) posterior_decoding_name = os.path.join(d_out, 'hard.decoding' + suffix) # read the fasta file with open(f_in) as fin: raw_observations = fasta_to_raw_observations(fin.readlines()) arr = [letter_to_emission[c] for c in raw_observations] observations = np.array(arr, dtype=np.int8) # do the baum welch stuff model = estimation.FiniteModel(trans, emiss, observations) summary = Summary(model, None, None) estimation.baum_welch( model.update, model.get_expectations, trans, emiss, niterations, callback=summary.after_baum_welch) summary_text = summary.finish() # write the output files with open(summary_name, 'wt') as fout: print >> fout, summary_text with open(log_likelihoods_name, 'wt') as fout: print >> fout, '\n'.join('%f' % x for x in summary.log_likelihoods) hmm.pretty_print_posterior(raw_observations, model.get_posterior(), 60, posterior_name) hmm.pretty_print_posterior_decoding(raw_observations, model.get_posterior(), 60, posterior_decoding_name)