def plot_params(wkspacefile): args = workspace_to_npy_args(wkspacefile) try: os.mkdir(args.out_dir) except: pass plot_params(args)
def run_gmtk_lineagehmm(args): try: os.mkdir('gmtk_images') except: pass args.iteration = 'initial' args.observe = 'all' args.run_name = 'gmtk' args.out_params = 'gmtk_images/gmtk_{param}_{observe}' args.out_dir = '.' args.free_energy = [] plot_params(args) # convert .npy parameters and data into gmtk format args.observe_txt = [] for f in args.observe_matrix: obs, obs_txt = npy_observations_to_txt(f) args.observe_txt.append(obs_txt) # prepare and triangulate the graphical model strfile = 'lineagehmm.str' gmtk_master = 'dummy.master' with open(gmtk_master, 'w') as outfile: outfile.write(''' MEAN_IN_FILE inline 2 0 gauss_emit_1_mean 1 1.0 1 gauss_emit_2_mean 1 50.0 COVAR_IN_FILE inline 2 0 gauss_emit_1_covar 1 1.0 1 gauss_emit_2_covar 1 50.0 MC_IN_FILE inline 2 0 1 0 gauss_emit_probs_1 gauss_emit_1_mean gauss_emit_1_covar 1 1 0 gauss_emit_probs_2 gauss_emit_2_mean gauss_emit_2_covar DT_IN_FILE inline 1 0 parent_val 1 -1 { p0 } ''') make_lineage_model(strfile, obs.shape[0], args.K, obs.shape[2], vert_parent=args.vert_parent, mark_avail=mark_avail, separate_theta=args.separate_theta) cmd = 'gmtkTriangulate -strFile %s -inputMasterFile %s -rePart T -findBest T -triangulation completed ' % (strfile, gmtk_master) #cmd = 'gmtkTriangulate -strFile %s' % strfile subprocess.check_call(cmd, shell=True) # populate theta parts if they don't exist if args.separate_theta and any(not hasattr(args, 'theta_%s' % i) for i in range(2,args.I+1)): for i in range(2,args.I+1): if len(args.theta.shape) == 3: setattr(args, 'theta_%s' % i, args.theta) else: setattr(args, 'theta_%s' % i, args.theta[i-2,:,:,:]) args.params_to_save.append('theta_%s' % i) #del args.theta #args.params_to_save.remove('theta') # for each iteration... for args.iteration in range(1, args.max_iterations+1): # save args to disk w = npy_params_to_workspace(args) em_args = [] for index in range(len(args.observe_txt)): gmtk_master = '%s.master' % args.observe_txt[index] gmtk_obs = '%s.observations' % args.observe_txt[index] write_workspace_simple_master(w, gmtk_master) em_args.append((index, args.iteration, args.observe_txt[index], args.I, args.L, gmtk_master, gmtk_obs)) # run a gmtk em iteration on each file input, accumulating results if args.run_local: try: pool = multiprocessing.Pool(maxtasksperchild=1) pool.map_async(do_em_from_args, em_args).get(99999999) #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))]) except KeyboardInterrupt: print "Caught KeyboardInterrupt, terminating workers" pool.terminate() pool.join() else: pool.close() pool.join() else: pool = sge.SGEPool() #jobs_handle = pool.map_async(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))], chunksize=10) #jobs_handle = pool.map_async(do_em_from_args, [(i, i, i) for i in range(len(args.observe_txt))], chunksize=10) #jobs_handle = pool.imap_unordered(do_em_from_args, em_args, chunksize=1) jobs_handle = pool.map_async(do_em_from_args, em_args, chunksize=1) # wait for all jobs to finish for j in jobs_handle: #pass j.wait() # run one final accumulator to get params accumulate_em_runs(args, gmtk_obs, gmtk_master) plot_params(args) plot_energy(args) #check convergence f = args.free_energy[-1] try: print 'free energy is', f, 'percent change ll:', abs(args.last_free_energy - f) / args.last_free_energy except AttributeError: print 'first iteration. free energy is', f else: if abs(abs(args.last_free_energy - f) / args.last_free_energy) < args.epsilon_e: print 'converged! free energy is', f break finally: args.last_free_energy = f for p in args.params_to_save: numpy.save(os.path.join(args.out_dir, args.out_params.format(param=p, **args.__dict__)), args.__dict__[p]) if args.run_local: try: pool = multiprocessing.Pool() pool.map_async(do_viterbi_from_args, em_args).get(99999999) #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))]) except KeyboardInterrupt: print "Caught KeyboardInterrupt, terminating workers" pool.terminate() pool.join() else: pool.close() pool.join() else: pool = sge.SGEPool() jobs_handle = pool.map_async(do_viterbi_from_args, em_args, chunksize=1) # wait for all jobs to finish for j in jobs_handle: j.wait() for a in em_args: numpy.save('viterbi_Q_' + os.path.split(a[2])[1], parse_viterbi_states_to_Q(args, a[-1]))
def run_gmtk_lineagehmm(args): try: os.mkdir('gmtk_images') except: pass args.iteration = 'initial' args.observe = 'all' args.run_name = 'gmtk' args.out_params = 'gmtk_images/gmtk_{param}_{observe}' args.out_dir = '.' args.free_energy = [] plot_params(args) # convert .npy parameters and data into gmtk format args.observe_txt = [] for f in args.observe_matrix: obs, obs_txt = npy_observations_to_txt(f) args.observe_txt.append(obs_txt) # prepare and triangulate the graphical model strfile = 'lineagehmm.str' gmtk_master = 'dummy.master' with open(gmtk_master, 'w') as outfile: outfile.write(''' MEAN_IN_FILE inline 2 0 gauss_emit_1_mean 1 1.0 1 gauss_emit_2_mean 1 50.0 COVAR_IN_FILE inline 2 0 gauss_emit_1_covar 1 1.0 1 gauss_emit_2_covar 1 50.0 MC_IN_FILE inline 2 0 1 0 gauss_emit_probs_1 gauss_emit_1_mean gauss_emit_1_covar 1 1 0 gauss_emit_probs_2 gauss_emit_2_mean gauss_emit_2_covar DT_IN_FILE inline 1 0 parent_val 1 -1 { p0 } ''') make_lineage_model(strfile, obs.shape[0], args.K, obs.shape[2], vert_parent=args.vert_parent, mark_avail=mark_avail, separate_theta=args.separate_theta) cmd = 'gmtkTriangulate -strFile %s -inputMasterFile %s -rePart T -findBest T -triangulation completed ' % ( strfile, gmtk_master) #cmd = 'gmtkTriangulate -strFile %s' % strfile subprocess.check_call(cmd, shell=True) # populate theta parts if they don't exist if args.separate_theta and any(not hasattr(args, 'theta_%s' % i) for i in range(2, args.I + 1)): for i in range(2, args.I + 1): if len(args.theta.shape) == 3: setattr(args, 'theta_%s' % i, args.theta) else: setattr(args, 'theta_%s' % i, args.theta[i - 2, :, :, :]) args.params_to_save.append('theta_%s' % i) #del args.theta #args.params_to_save.remove('theta') # for each iteration... for args.iteration in range(1, args.max_iterations + 1): # save args to disk w = npy_params_to_workspace(args) em_args = [] for index in range(len(args.observe_txt)): gmtk_master = '%s.master' % args.observe_txt[index] gmtk_obs = '%s.observations' % args.observe_txt[index] write_workspace_simple_master(w, gmtk_master) em_args.append((index, args.iteration, args.observe_txt[index], args.I, args.L, gmtk_master, gmtk_obs)) # run a gmtk em iteration on each file input, accumulating results if args.run_local: try: pool = multiprocessing.Pool(maxtasksperchild=1) pool.map_async(do_em_from_args, em_args).get(99999999) #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))]) except KeyboardInterrupt: print "Caught KeyboardInterrupt, terminating workers" pool.terminate() pool.join() else: pool.close() pool.join() else: pool = sge.SGEPool() #jobs_handle = pool.map_async(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))], chunksize=10) #jobs_handle = pool.map_async(do_em_from_args, [(i, i, i) for i in range(len(args.observe_txt))], chunksize=10) #jobs_handle = pool.imap_unordered(do_em_from_args, em_args, chunksize=1) jobs_handle = pool.map_async(do_em_from_args, em_args, chunksize=1) # wait for all jobs to finish for j in jobs_handle: #pass j.wait() # run one final accumulator to get params accumulate_em_runs(args, gmtk_obs, gmtk_master) plot_params(args) plot_energy(args) #check convergence f = args.free_energy[-1] try: print 'free energy is', f, 'percent change ll:', abs( args.last_free_energy - f) / args.last_free_energy except AttributeError: print 'first iteration. free energy is', f else: if abs(abs(args.last_free_energy - f) / args.last_free_energy) < args.epsilon_e: print 'converged! free energy is', f break finally: args.last_free_energy = f for p in args.params_to_save: numpy.save( os.path.join(args.out_dir, args.out_params.format(param=p, **args.__dict__)), args.__dict__[p]) if args.run_local: try: pool = multiprocessing.Pool() pool.map_async(do_viterbi_from_args, em_args).get(99999999) #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))]) except KeyboardInterrupt: print "Caught KeyboardInterrupt, terminating workers" pool.terminate() pool.join() else: pool.close() pool.join() else: pool = sge.SGEPool() jobs_handle = pool.map_async(do_viterbi_from_args, em_args, chunksize=1) # wait for all jobs to finish for j in jobs_handle: j.wait() for a in em_args: numpy.save('viterbi_Q_' + os.path.split(a[2])[1], parse_viterbi_states_to_Q(args, a[-1]))