def plot_params(wkspacefile):
    args = workspace_to_npy_args(wkspacefile)
    try:
        os.mkdir(args.out_dir)
    except:
        pass
    plot_params(args)
Beispiel #2
0
def plot_params(wkspacefile):
    args = workspace_to_npy_args(wkspacefile)
    try:
        os.mkdir(args.out_dir)
    except:
        pass
    plot_params(args)
def run_gmtk_lineagehmm(args):
    try:
        os.mkdir('gmtk_images')
    except:
        pass

    args.iteration = 'initial'
    args.observe = 'all'
    args.run_name = 'gmtk'
    args.out_params = 'gmtk_images/gmtk_{param}_{observe}'
    args.out_dir = '.'
    args.free_energy = []
    plot_params(args)
    # convert .npy parameters and data into gmtk format
    args.observe_txt = []
    for f in args.observe_matrix:
        obs, obs_txt = npy_observations_to_txt(f)
        args.observe_txt.append(obs_txt)

    # prepare and triangulate the graphical model
    strfile = 'lineagehmm.str'
    gmtk_master = 'dummy.master'
    with open(gmtk_master, 'w') as outfile:
        outfile.write('''

MEAN_IN_FILE inline 2
0 gauss_emit_1_mean   1   1.0
1 gauss_emit_2_mean   1   50.0

COVAR_IN_FILE inline 2
0 gauss_emit_1_covar   1   1.0
1 gauss_emit_2_covar   1   50.0

MC_IN_FILE inline 2
0 1 0 gauss_emit_probs_1   gauss_emit_1_mean   gauss_emit_1_covar
1 1 0 gauss_emit_probs_2   gauss_emit_2_mean   gauss_emit_2_covar

DT_IN_FILE inline 1
0 parent_val 1
    -1 { p0 }

''')
    make_lineage_model(strfile, obs.shape[0], args.K, obs.shape[2], vert_parent=args.vert_parent, mark_avail=mark_avail, separate_theta=args.separate_theta)
    cmd = 'gmtkTriangulate -strFile %s -inputMasterFile %s -rePart T -findBest T  -triangulation completed ' % (strfile, gmtk_master)
    #cmd = 'gmtkTriangulate -strFile %s' % strfile
    subprocess.check_call(cmd, shell=True)

    # populate theta parts if they don't exist
    if args.separate_theta and any(not hasattr(args, 'theta_%s' % i) for i in range(2,args.I+1)):
        for i in range(2,args.I+1):
            if len(args.theta.shape) == 3:
                setattr(args, 'theta_%s' % i, args.theta)
            else:
                setattr(args, 'theta_%s' % i, args.theta[i-2,:,:,:])
            args.params_to_save.append('theta_%s' % i)
        #del args.theta
        #args.params_to_save.remove('theta')


    # for each iteration...
    for args.iteration in range(1, args.max_iterations+1):
        # save args to disk
        w = npy_params_to_workspace(args)

        em_args = []
        for index in range(len(args.observe_txt)):
            gmtk_master = '%s.master' % args.observe_txt[index]
            gmtk_obs = '%s.observations' % args.observe_txt[index]
            write_workspace_simple_master(w, gmtk_master)
            em_args.append((index, args.iteration, args.observe_txt[index], args.I, args.L, gmtk_master, gmtk_obs))

        # run a gmtk em iteration on each file input, accumulating results
        if args.run_local:
            try:
                pool = multiprocessing.Pool(maxtasksperchild=1)
                pool.map_async(do_em_from_args, em_args).get(99999999)
                #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))])
            except KeyboardInterrupt:
                print "Caught KeyboardInterrupt, terminating workers"
                pool.terminate()
                pool.join()
            else:
                pool.close()
                pool.join()
        else:
            pool = sge.SGEPool()
            #jobs_handle = pool.map_async(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))], chunksize=10)
            #jobs_handle = pool.map_async(do_em_from_args, [(i, i, i) for i in range(len(args.observe_txt))], chunksize=10)
            #jobs_handle = pool.imap_unordered(do_em_from_args, em_args, chunksize=1)
            jobs_handle = pool.map_async(do_em_from_args, em_args, chunksize=1)
            # wait for all jobs to finish
            for j in jobs_handle:
                #pass
                j.wait()

        # run one final accumulator to get params
        accumulate_em_runs(args, gmtk_obs, gmtk_master)

        plot_params(args)
        plot_energy(args)

        #check convergence
        f = args.free_energy[-1]
        try:
            print 'free energy is', f, 'percent change ll:', abs(args.last_free_energy - f) / args.last_free_energy
        except AttributeError:
            print 'first iteration. free energy is', f
        else:
            if abs(abs(args.last_free_energy - f) / args.last_free_energy) < args.epsilon_e:
                print 'converged! free energy is', f
                break
        finally:
            args.last_free_energy = f

        for p in args.params_to_save:
            numpy.save(os.path.join(args.out_dir, args.out_params.format(param=p, **args.__dict__)),
                    args.__dict__[p])

    if args.run_local:
        try:
            pool = multiprocessing.Pool()
            pool.map_async(do_viterbi_from_args, em_args).get(99999999)
            #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))])
        except KeyboardInterrupt:
            print "Caught KeyboardInterrupt, terminating workers"
            pool.terminate()
            pool.join()
        else:
            pool.close()
            pool.join()
    else:
        pool = sge.SGEPool()
        jobs_handle = pool.map_async(do_viterbi_from_args, em_args, chunksize=1)
        # wait for all jobs to finish
        for j in jobs_handle:
            j.wait()
    for a in em_args:
        numpy.save('viterbi_Q_' + os.path.split(a[2])[1], parse_viterbi_states_to_Q(args, a[-1]))
Beispiel #4
0
def run_gmtk_lineagehmm(args):
    try:
        os.mkdir('gmtk_images')
    except:
        pass

    args.iteration = 'initial'
    args.observe = 'all'
    args.run_name = 'gmtk'
    args.out_params = 'gmtk_images/gmtk_{param}_{observe}'
    args.out_dir = '.'
    args.free_energy = []
    plot_params(args)
    # convert .npy parameters and data into gmtk format
    args.observe_txt = []
    for f in args.observe_matrix:
        obs, obs_txt = npy_observations_to_txt(f)
        args.observe_txt.append(obs_txt)

    # prepare and triangulate the graphical model
    strfile = 'lineagehmm.str'
    gmtk_master = 'dummy.master'
    with open(gmtk_master, 'w') as outfile:
        outfile.write('''

MEAN_IN_FILE inline 2
0 gauss_emit_1_mean   1   1.0
1 gauss_emit_2_mean   1   50.0

COVAR_IN_FILE inline 2
0 gauss_emit_1_covar   1   1.0
1 gauss_emit_2_covar   1   50.0

MC_IN_FILE inline 2
0 1 0 gauss_emit_probs_1   gauss_emit_1_mean   gauss_emit_1_covar
1 1 0 gauss_emit_probs_2   gauss_emit_2_mean   gauss_emit_2_covar

DT_IN_FILE inline 1
0 parent_val 1
    -1 { p0 }

''')
    make_lineage_model(strfile,
                       obs.shape[0],
                       args.K,
                       obs.shape[2],
                       vert_parent=args.vert_parent,
                       mark_avail=mark_avail,
                       separate_theta=args.separate_theta)
    cmd = 'gmtkTriangulate -strFile %s -inputMasterFile %s -rePart T -findBest T  -triangulation completed ' % (
        strfile, gmtk_master)
    #cmd = 'gmtkTriangulate -strFile %s' % strfile
    subprocess.check_call(cmd, shell=True)

    # populate theta parts if they don't exist
    if args.separate_theta and any(not hasattr(args, 'theta_%s' % i)
                                   for i in range(2, args.I + 1)):
        for i in range(2, args.I + 1):
            if len(args.theta.shape) == 3:
                setattr(args, 'theta_%s' % i, args.theta)
            else:
                setattr(args, 'theta_%s' % i, args.theta[i - 2, :, :, :])
            args.params_to_save.append('theta_%s' % i)
        #del args.theta
        #args.params_to_save.remove('theta')

    # for each iteration...
    for args.iteration in range(1, args.max_iterations + 1):
        # save args to disk
        w = npy_params_to_workspace(args)

        em_args = []
        for index in range(len(args.observe_txt)):
            gmtk_master = '%s.master' % args.observe_txt[index]
            gmtk_obs = '%s.observations' % args.observe_txt[index]
            write_workspace_simple_master(w, gmtk_master)
            em_args.append((index, args.iteration, args.observe_txt[index],
                            args.I, args.L, gmtk_master, gmtk_obs))

        # run a gmtk em iteration on each file input, accumulating results
        if args.run_local:
            try:
                pool = multiprocessing.Pool(maxtasksperchild=1)
                pool.map_async(do_em_from_args, em_args).get(99999999)
                #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))])
            except KeyboardInterrupt:
                print "Caught KeyboardInterrupt, terminating workers"
                pool.terminate()
                pool.join()
            else:
                pool.close()
                pool.join()
        else:
            pool = sge.SGEPool()
            #jobs_handle = pool.map_async(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))], chunksize=10)
            #jobs_handle = pool.map_async(do_em_from_args, [(i, i, i) for i in range(len(args.observe_txt))], chunksize=10)
            #jobs_handle = pool.imap_unordered(do_em_from_args, em_args, chunksize=1)
            jobs_handle = pool.map_async(do_em_from_args, em_args, chunksize=1)
            # wait for all jobs to finish
            for j in jobs_handle:
                #pass
                j.wait()

        # run one final accumulator to get params
        accumulate_em_runs(args, gmtk_obs, gmtk_master)

        plot_params(args)
        plot_energy(args)

        #check convergence
        f = args.free_energy[-1]
        try:
            print 'free energy is', f, 'percent change ll:', abs(
                args.last_free_energy - f) / args.last_free_energy
        except AttributeError:
            print 'first iteration. free energy is', f
        else:
            if abs(abs(args.last_free_energy - f) /
                   args.last_free_energy) < args.epsilon_e:
                print 'converged! free energy is', f
                break
        finally:
            args.last_free_energy = f

        for p in args.params_to_save:
            numpy.save(
                os.path.join(args.out_dir,
                             args.out_params.format(param=p, **args.__dict__)),
                args.__dict__[p])

    if args.run_local:
        try:
            pool = multiprocessing.Pool()
            pool.map_async(do_viterbi_from_args, em_args).get(99999999)
            #map(do_em_from_args, [(i, w, args) for i in range(len(args.observe_txt))])
        except KeyboardInterrupt:
            print "Caught KeyboardInterrupt, terminating workers"
            pool.terminate()
            pool.join()
        else:
            pool.close()
            pool.join()
    else:
        pool = sge.SGEPool()
        jobs_handle = pool.map_async(do_viterbi_from_args,
                                     em_args,
                                     chunksize=1)
        # wait for all jobs to finish
        for j in jobs_handle:
            j.wait()
    for a in em_args:
        numpy.save('viterbi_Q_' + os.path.split(a[2])[1],
                   parse_viterbi_states_to_Q(args, a[-1]))