Example #1
0
def seed_sequence(com, traj_no, nseg=2, max_states=100, niter=10):

    segments = [[] for i in range(nseg)]
    pps = com[0].shape[0] // nseg  # point per segment
    for s in range(nseg):

        if s == 0:
            seg = (com[0][s * pps:(s + 1) * pps, [traj_no], :], com[1])
        else:
            seg = (com[0][s * pps - 1:(s + 1) * pps, [traj_no], :], com[1])

        segments[s] = hdphmm.InfiniteHMM(seg,
                                         traj_no=0,
                                         load_com=False,
                                         difference=False,
                                         observation_model='AR',
                                         order=1,
                                         max_states=max_states,
                                         dim=[0, 1, 2],
                                         prior='MNIW-N',
                                         save_every=1,
                                         hyperparams=None)

    z = np.zeros([1, 0], dtype=int)
    for s in range(nseg):
        segments[s].inference(niter)
        zseg = segments[s].z + max_states * s
        z = np.concatenate((z, zseg), axis=1)

    new_labels = {x: i for i, x in enumerate(np.unique(z))}
    for i in range(z.shape[1]):
        z[0, i] = new_labels[z[0, i]]

    return z
Example #2
0
def ihmm(res,
         niter=100,
         cluster=1,
         dt_A=1,
         dt_sigma=0.25,
         algorithm='agglomerative',
         final_parameters=None,
         nclusters_sigma=None,
         nclusters_A=None,
         tot_clusters=None,
         combine_clusters=False,
         nclusters_r=3,
         nclusters_T=5,
         order=1,
         seed=True):

    cluster_variables = ['diags', 'eigs']
    cluster_vars = cluster_variables[cluster]

    difference = False  # take first order difference of solute trajectories
    observation_model = 'AR'  # assume an autoregressive model (that's the only model implemented)
    order = order  # autoregressive order
    max_states = 200  # More is usually better
    traj_no = np.arange(
        24).tolist()  # np.arange(10).tolist()# [2]# None # np.arange(24)#2
    dim = [0, 1, 2]  # dimensions of trajectory to keep
    prior = 'MNIW-N'  # MNIW-N (includes means) or MNIW (forces means to zero)
    keep_xy = True
    save_every = 10

    # You can define a dictionary with some spline paramters
    spline_params = {
        'npts_spline': 10,
        'save': True,
        'savename': 'trajectories/spline_%s.pl' % res
    }

    com_savename = 'trajectories/com_xy_radial_%s.pl' % res

    com = 'trajectories/com_xy_radial_%s.pl' % res  # center of mass trajectories. If it exists, we can skip loading the MD trajectory and just load this
    com_raw = file_rw.load_object(com)
    if final_parameters is None:

        # We will be applying the IHMM to each tr|ajectory independently
        ihmm = [[] for i in range(24)]

        # initialize the ihmm for each trajectory
        for t in traj_no:

            if seed:

                z = seed_sequence(com_raw,
                                  t,
                                  nseg=4,
                                  max_states=max_states,
                                  niter=3)
                print('Seeding with %d states' % np.unique(z).size)

            else:

                z = None

            ihmm[t] = hdphmm.InfiniteHMM(com,
                                         traj_no=t,
                                         load_com=True,
                                         difference=difference,
                                         observation_model=observation_model,
                                         order=order,
                                         max_states=max_states,
                                         dim=dim,
                                         spline_params=spline_params,
                                         prior=prior,
                                         hyperparams=None,
                                         keep_xy=keep_xy,
                                         com_savename=com_savename,
                                         radial=True,
                                         save_com=False,
                                         save_every=save_every,
                                         res=res,
                                         gro='berendsen.gro',
                                         seed_sequence=z)

        for i in traj_no:
            ihmm[i].inference(niter)

        for i in traj_no:
            ihmm[i]._get_params(quiet=True)

        ihmmr = [[] for i in traj_no]

        niter_fixed = 10  # state sequence is fixed so parameter inference converges quick
        # convert to radial
        for i in traj_no:

            radial = np.zeros([ihmm[i].com.shape[0], 1, 2])
            radial[:, 0, 0] = np.linalg.norm(ihmm[i].com[:, 0, :2], axis=1)
            radial[:, 0, 1] = ihmm[i].com[:, 0, 2]

            ihmmr[i] = hdphmm.InfiniteHMM((radial, ihmm[i].dt),
                                          traj_no=[0],
                                          load_com=False,
                                          difference=False,
                                          order=order,
                                          max_states=max_states,
                                          dim=[0, 1],
                                          spline_params=spline_params,
                                          prior='MNIW-N',
                                          hyperparams=None,
                                          save_com=False,
                                          state_sequence=ihmm[i].z)

            ihmmr[i].inference(niter_fixed)

        for i in traj_no:
            ihmmr[i]._get_params(traj_no=0)

        file_rw.save_object({
            'ihmm': ihmm,
            'ihmmr': ihmmr
        }, 'ihmm_%s_%diter_max_states%d_seeded_fixed.pl' %
                            (res, niter, max_states))
        exit()

    else:

        ihmm = final_parameters['ihmm']
        ihmmr = final_parameters['ihmmr']

    # Cluster radial params

    A = None
    sigma = None
    mu = None
    T = None

    for t in range(24):

        estimated_states = ihmmr[t].z[0, :]
        found_states = list(np.unique(estimated_states))

        a = np.zeros([2, 2, len(found_states)
                      ])  # should probably an include a dimension for AR order
        s = np.zeros([2, 2, len(found_states)])
        m = np.zeros([2, len(found_states)])
        st = np.diag(ihmm[t].converged_params['T'].mean(axis=0))

        for i, state in enumerate(found_states):

            Amean = ihmmr[t].converged_params['A'][:, 0, ..., i].mean(axis=0)
            sigmamean = ihmmr[t].converged_params['sigma'][:, ...,
                                                           i].mean(axis=0)

            # we want to cluster on unconditional mean
            mucond = ihmmr[t].converged_params['mu'][..., i].mean(
                axis=0)  # conditional mean
            mumean = np.linalg.inv(np.eye(2) -
                                   Amean) @ mucond  # unconditional mean

            a[..., i] = Amean
            s[..., i] = sigmamean
            m[:, i] = mumean

        if A is None:
            A = a
            sigma = s
            mu = m
            T = st
        else:
            A = np.concatenate((A, a), axis=-1)
            sigma = np.concatenate((sigma, s), axis=-1)
            mu = np.concatenate((mu, m), axis=-1)
            T = np.concatenate((T, st), axis=-1)

    mu_ = np.copy(mu)

    # default is diags
    eigs = False
    diags = True
    if cluster_vars == 'eigs':
        eigs = True
        diags = False

    if combine_clusters:

        params = {'sigma': sigma, 'A': A, 'mu': mu[0, :], 'T': -np.log(1 - T)}
        sig_cluster = Cluster(params,
                              eigs=eigs,
                              diags=diags,
                              algorithm=algorithm,
                              distance_threshold=None,
                              nclusters=tot_clusters)
        sig_cluster.fit()

        new_labels = sig_cluster.labels

        print('Found %d clusters' % np.unique(sig_cluster.labels).size)

    else:

        sig_params = {'sigma': sigma}
        A_params = {'A': A}

        sig_cluster = Cluster(sig_params,
                              eigs=eigs,
                              diags=diags,
                              algorithm=algorithm,
                              distance_threshold=dt_sigma,
                              nclusters=nclusters_sigma)
        A_cluster = Cluster(A_params,
                            eigs=eigs,
                            diags=diags,
                            algorithm=algorithm,
                            distance_threshold=dt_A,
                            nclusters=nclusters_A)
        r_cluster = Cluster({'mu': mu[0, :]},
                            algorithm=algorithm,
                            nclusters=nclusters_r)
        T_cluster = Cluster({'T': -np.log(1 - T)},
                            algorithm=algorithm,
                            nclusters=nclusters_T)

        sig_cluster.fit()
        A_cluster.fit()
        r_cluster.fit()
        T_cluster.fit()

        nA_clusters = np.unique(A_cluster.labels).size
        nsig_clusters = np.unique(sig_cluster.labels).size
        print('Found %d sigma clusters' % nsig_clusters)
        print('Found %d A clusters' % nA_clusters)
        print('Found %d r clusters' % nclusters_r)
        print('Found %d T clusters' % nclusters_T)

        cluster_matrix = np.zeros([nA_clusters, nsig_clusters])

        # visualize r clusters
        # print(r_cluster.labels)
        # for i in range(nclusters_r):
        #    ndx = np.where(np.array(r_cluster.labels) == i)[0]
        #    plt.hist(mu_[0, ndx])
        #plt.show()
        #exit()

        new_clusters = np.zeros([A.shape[-1]])

        for state in range(A.shape[-1]):
            #new_clusters[state] = A_cluster.labels[state] * nsig_clusters + sig_cluster.labels[state]
            new_clusters[state] = A_cluster.labels[
                state] * nsig_clusters * nclusters_r * nclusters_T + sig_cluster.labels[
                    state] * nclusters_r * nclusters_T + r_cluster.labels[
                        state] * nclusters_T + T_cluster.labels[state]

        print('Found %d total clusters' % np.unique(new_clusters).size)

        all_labels = np.unique(new_clusters).astype(int)

        new_label_dict = {l: i for i, l in enumerate(all_labels)}

        new_labels = [new_label_dict[int(i)] for i in new_clusters]

        sig_cluster.labels = new_labels

    all_state_params = {
        'A': A,
        'sigma': sigma,
        'mu': mu,
        'state_labels': new_labels,
        'T': T
    }

    ndx = 0
    for i in traj_no:
        end = ndx + len(ihmmr[i].found_states)
        labels = new_labels[ndx:end]
        ndx = end
        ihmmr[i].reassign_state_sequence(sig_cluster, labels=labels)

    all_mu = None
    for t in traj_no:

        m = ihmmr[t].converged_params['mu'].mean(axis=0)
        phi = ihmmr[t].converged_params['A'][:, 0, ..., :].mean(axis=0)

        # convert to unconditional mean
        for i in range(m.shape[1]):
            m[:, i] = np.linalg.inv(np.eye(2) - phi[..., i]) @ m[:, i]

        if all_mu is None:
            all_mu = m
        else:
            all_mu = np.concatenate((all_mu, m), axis=1)

    nclusters = np.unique(sig_cluster.labels).size
    mu = np.zeros([nclusters, 2])
    for i in range(nclusters):
        ndx = np.where(np.array(sig_cluster.labels) == i)[0]
        mu[i, :] = all_mu[:, ndx].mean(axis=1)

    mean_zero = []

    for t in traj_no:

        zeroed = ihmmr[t].subtract_mean(traj_no=0, simple_mean=True)
        mean_zero.append(zeroed)

    mean_zero = np.array(mean_zero)

    z = None
    for t in traj_no:

        seq = ihmmr[t].clustered_state_sequence[:, :]
        if z is None:
            z = seq
        else:
            z = np.concatenate((z, seq), axis=0)

    ihmm_final = hdphmm.InfiniteHMM(
        (np.moveaxis(mean_zero, 0, 1), ihmmr[t].dt),
        traj_no=None,
        load_com=False,
        difference=False,
        order=order,
        max_states=mu.shape[0],
        dim=[0, 1],
        spline_params=spline_params,
        prior='MNIW',
        hyperparams=None,
        save_com=False,
        state_sequence=z[:, 1:])

    niter = 10  # state sequence is fixed therefore parameter inference is quick
    ihmm_final.inference(niter)

    nclusters = np.unique(z).size

    ntraj = len(traj_no)

    A = np.zeros([ntraj, nclusters, 2, 2])
    sigma = np.zeros_like(A)
    weights = np.zeros([ntraj, nclusters])

    for t in range(len(traj_no)):
        ihmm_final._get_params(traj_no=t, quiet=True)
        for i, ndx in enumerate(ihmm_final.found_states):
            A[t, ndx, ...] = ihmm_final.converged_params['A'][:, 0, ...,
                                                              i].mean(axis=0)
            sigma[t, ndx,
                  ...] = ihmm_final.converged_params['sigma'][:, ...,
                                                              i].mean(axis=0)
            weights[t, ndx] = np.where(ihmm_final.z[t, :] == ndx)[0].size

    A_final = np.zeros([nclusters, 1, 2, 2])
    sigma_final = np.zeros([nclusters, 2, 2])
    for c in range(nclusters):
        if weights[:, c].sum() > 0:
            A_final[c, 0, ...] = np.average(A[:, c, ...],
                                            axis=0,
                                            weights=weights[:, c])
            sigma_final[c, ...] = np.average(sigma[:, c, ...],
                                             axis=0,
                                             weights=weights[:, c])

    m = np.zeros_like(mu)
    for i in range(m.shape[0]):
        m[i, :] = (np.eye(2) - A_final[i, 0, ...]) @ mu[i, :]

    found_states = np.unique(ihmm_final.z)
    ndx_dict = {found_states[i]: i for i in range(len(found_states))}

    count_matrix = np.zeros([nclusters, nclusters])

    nT = ihmm_final.nT
    for frame in range(
            1, nT -
            1):  # start at frame 1. May need to truncate more as equilibration
        transitioned_from = [ndx_dict[i] for i in ihmm_final.z[:, frame - 1]]
        transitioned_to = [ndx_dict[i] for i in ihmm_final.z[:, frame]]
        for pair in zip(transitioned_from, transitioned_to):
            count_matrix[pair[0], pair[1]] += 1

    # Make sure there are no zero-rows. This can happen in the rare case where the last entry of
    # a sequence its own unique state, so it doesn't ever transition out.
    for i, row in enumerate(count_matrix):
        if row.sum() == 0:
            count_matrix[i, :] = np.ones(
                row.size
            )  # give uniform probability to transitions out of this rarely accessed state.

    # The following is very similar to ihmm3.pi_z. The difference is due to the dirichlet process.
    transition_matrix = (count_matrix.T / count_matrix.sum(axis=1)).T

    init_state = ihmm_final.z[:, 0]
    pi_init = np.zeros([nclusters])
    for i, c in enumerate(ihmm_final.found_states):
        pi_init[i] = np.where(init_state == c)[0].size

    pi_init /= pi_init.sum()

    final_parameters = {
        'A': A_final,
        'sigma': sigma_final,
        'mu': mu,
        'self_T': T,
        'T': transition_matrix,
        'pi_init': pi_init,
        'z': ihmm_final.z,
        'ihmmr': ihmmr,
        'ihmm': ihmm,
        'all_state_params': all_state_params,
        'ihmm_final': ihmm_final,
        'T_distribution': ihmm_final.convergence['T']
    }

    if combine_clusters:

        file_rw.save_object(
            final_parameters,
            'saved_parameters/final_parameters_agglomerative_%s_%s_combined_%d.pl'
            % (res, cluster_vars, tot_clusters))

    else:

        if nclusters_A is None:

            file_rw.save_object(
                final_parameters,
                'saved_parameters/final_parameters_agglomerative_%s_%s_dtsigma%.2f_dtA%.2f.pl'
                % (res, cluster_vars, dt_sigma, dt_A))

        else:

            file_rw.save_object(
                final_parameters,
                'saved_parameters/final_parameters_agglomerative_%s_%s_nsigma%d_nA%d_nr%d_nT%d.pl'
                % (res, cluster_vars, nclusters_sigma, nclusters_A,
                   nclusters_r, nclusters_T))

    return final_parameters
def ihmm(res, niter=100, cluster=1):

    cluster_variables = ['sig_diags', 'sig_A_diags', 'sig_A_eigs']
    cluster_vars = cluster_variables[cluster]

    # We will be applying the IHMM to each tr|ajectory independently
    ihmm = [[] for i in range(24)]

    # initialize the ihmm for each trajectory
    difference = False  # take first order difference of solute trajectories
    observation_model='AR'  # assume an autoregressive model (that's the only model implemented)
    order = 1  # autoregressive order
    max_states = 100  # More is usually better
    traj_no = np.arange(24).tolist() # np.arange(10).tolist()# [2]# None # np.arange(24)#2
    first_frame = 7000  # frame after which simulation is equilibrated
    dim = [0, 1, 2]  # dimensions of trajectory to keep
    prior = 'MNIW-N'  # MNIW-N (includes means) or MNIW (forces means to zero)
    link = False  # link trajectories and add phantom state
    keep_xy = True
    save_every = 1

    # You can define a dictionary with some spline paramters
    spline_params = {'npts_spline': 10, 'save': True, 'savename': 'trajectories/spline_%s.pl' % res}

    com_savename = 'com_xy_radial_%s.pl'

    com = 'trajectories/com_xy_radial_%s.pl' % res  # center of mass trajectories. If it exists, we can skip loading the MD trajectory and just load this
    gro = 'berendsen.gro'

    for t in traj_no:

        ihmm[t] = hdphmm.InfiniteHMM(com, traj_no=t, load_com=True, difference=difference,
                                 observation_model=observation_model, order=order, max_states=max_states,
                                 first_frame=first_frame, dim=dim, spline_params=spline_params, prior=prior,
                                 hyperparams=None, keep_xy=keep_xy, com_savename=com_savename, gro=gro,
                                 radial=True, save_com=True, save_every=save_every)

    for i in traj_no:
        ihmm[i].inference(niter)

    for i in traj_no:
        ihmm[i]._get_params(quiet=True)

    ihmmr = [[] for i in range(24)]

    # don't need a lot of iterations because we aren't using these parameters and the state sequence is fixed. The means are simple means
    for i in traj_no:

        radial = np.zeros([ihmm[i].com.shape[0], 1, 2])
        radial[:, 0, 0] = np.linalg.norm(ihmm[i].com[:, 0, :2], axis=1)
        radial[:, 0, 1] = ihmm[i].com[:, 0, 2]

        ihmmr[i] = hdphmm.InfiniteHMM((radial, ihmm[i].dt), traj_no=[0], load_com=False, difference=False,
                                   order=1, max_states=100,
                                   dim=[0, 1], spline_params=spline_params, prior='MNIW-N',
                                   hyperparams=None, save_com=False, state_sequence=ihmm[i].z)

        ihmmr[i].inference(10)

    for i in traj_no:
        ihmmr[i]._get_params(traj_no=0)

    mean_zero = []

    for t in traj_no:

        zeroed = ihmmr[t].subtract_mean(traj_no=0, simple_mean=True)
        mean_zero.append(zeroed)

    mean_zero = np.array(mean_zero)

    ihmm_zeroed = [[] for _ in traj_no]

    for i, t in enumerate(traj_no):

        zeroed = mean_zero[t, :, np.newaxis, :]

        # the first 'order' terms (where 'order' represented the autoregressive order) do not get a state assigned because
        # they are used to predict the state of the first possible data point at index 'order' + 1. Therefore, the first
        # value in the clustered state sequence should be discarded and everything shifted by one index.

        # MNIW prior?
        ihmm_zeroed[t] = hdphmm.InfiniteHMM((zeroed, ihmmr[t].dt), traj_no=[0], load_com=False, difference=False,
                                   order=1, max_states=100,
                                   dim=[0, 1], spline_params=spline_params, prior='MNIW',
                                   hyperparams=None, save_com=False, state_sequence=ihmmr[t].z[:, 1:])

        ihmm_zeroed[t].inference(niter)

    for t in traj_no:
        ihmm_zeroed[t]._get_params(traj_no=0)

    # Cluster

    # Get the parameters of all states

    A = None
    sigma = None
    mu = None

    for t in traj_no:

        estimated_states = ihmm_zeroed[t].z[0, :]
        found_states = list(np.unique(estimated_states))

        a = np.zeros([2, 2, len(found_states)])  # should probably an include a dimension for AR order
        s = np.zeros([2, 2, len(found_states)])
        m = np.zeros([2, len(found_states)])

        for i, state in enumerate(found_states):

            Amean = ihmm_zeroed[t].converged_params['A'][:, 0, ..., i].mean(axis=0)
            sigmamean = ihmm_zeroed[t].converged_params['sigma'][:, ..., i].mean(axis=0)

            a[..., i] = Amean
            s[..., i] = sigmamean

        if A is None:
            A = a
            sigma = s
        else:
            A = np.concatenate((A, a), axis=-1)
            sigma = np.concatenate((sigma, s), axis=-1)

    from hdphmm.cluster import Cluster

    # Reduce number of parameters via clustering.

    # default for sig_A_diags
    eigs = False
    diags = True
    params = {'A': A, 'sigma': sigma} # only include radial mean

    if cluster_vars == 'sig_diags':
        params = {'sigma': sigma} # only include radial mean

    elif cluster_vars == 'sig_A_eigs':
        eigs = True
        diags = False

    clusters = Cluster(params, eigs=False, diags=True)
    clusters.fit()

    nclusters = np.unique(clusters.labels).size

    print('Found %d clusters' % nclusters)

    nclusters = np.unique(clusters.labels).size

    ndx = 0
    for i in traj_no:
        end = ndx + len(ihmm_zeroed[i].found_states)
        labels = clusters.labels[ndx:end]
        ndx = end
        ihmm_zeroed[i].reassign_state_sequence(clusters, labels=labels)

    z = None
    for t in traj_no:

        seq = ihmm_zeroed[t].clustered_state_sequence#[:, :]

        if z is None:
            z = seq
        else:
            z = np.concatenate((z, seq), axis=0)

    ihmm_clustered = hdphmm.InfiniteHMM((np.moveaxis(mean_zero, 0, 1), ihmm_zeroed[t].dt), traj_no=None, load_com=False, difference=False,
                                   order=1, max_states=nclusters,
                                   dim=[0, 1], spline_params=spline_params, prior='MNIW',
                                   hyperparams=None, save_com=False, state_sequence=z)

    ihmm_clustered.inference(niter)

    ntraj = len(traj_no)

    A = np.zeros([ntraj, nclusters, 2, 2])
    sigma = np.zeros_like(A)
    weights = np.zeros([ntraj, nclusters])

    for t in range(len(traj_no)):
        ihmm_clustered._get_params(traj_no=t, quiet=True)
        for i, ndx in enumerate(ihmm_clustered.found_states):
            A[t, ndx, ...] = ihmm_clustered.converged_params['A'][:, 0, ..., i].mean(axis=0)
            sigma[t, ndx, ...] = ihmm_clustered.converged_params['sigma'][:, ..., i].mean(axis=0)
            weights[t, ndx] = np.where(ihmm_clustered.z[t, :] == ndx)[0].size

    A_final = np.zeros([nclusters, 1, 2, 2])
    sigma_final = np.zeros([nclusters, 2, 2])
    for c in range(nclusters):
        A_final[c, 0, ...] = np.average(A[:, c, ...], axis=0, weights=weights[:, c])
        sigma_final[c, ...] = np.average(sigma[:, c, ...], axis=0, weights=weights[:, c])

    count_matrix = np.zeros([nclusters, nclusters])

    for frame in range(1, ihmm_clustered.nT - 1):  # start at frame 1. May need to truncate more as equilibration
        transitioned_from = ihmm_clustered.z[:, frame - 1]
        transitioned_to = ihmm_clustered.z[:, frame]
        for pair in zip(transitioned_from, transitioned_to):
            count_matrix[pair[0], pair[1]] += 1

    # The following is very similar to ihmm3.pi_z. The difference is due to the dirichlet process.
    transition_matrix = (count_matrix.T / count_matrix.sum(axis=1)).T

    # Initial distribution of states
    init_state = ihmm_clustered.z[:, 0]
    pi_init = np.zeros([nclusters])
    for i, c in enumerate(ihmm_clustered.found_states):
        pi_init[i] = np.where(init_state == c)[0].size

    pi_init /= pi_init.sum()

    m = np.zeros([nclusters, 2])

    final_parameters = {'A': A_final, 'sigma': sigma_final, 'mu': m, 'T': transition_matrix, 'pi_init': pi_init}
    from LLC_Membranes.llclib import file_rw
    file_rw.save_object(final_parameters, 'saved_parameters/final_parameters_zero_%s_%s.pl' % (res, cluster_vars))

    return final_parameters
Example #4
0
def ihmm(res, traj_no, ntraj, hyperparams, plot=False, niter=100):

    print('Trajectory %d' % traj_no)
    difference = False  # take first order difference of solute trajectories
    observation_model = 'AR'  # assume an autoregressive model (that's the only model implemented)
    order = 1  # autoregressive order
    max_states = 100  # More is usually better
    dim = [0, 1, 2]  # dimensions of trajectory to keep
    prior = 'MNIW-N'  # MNIW-N (includes means) or MNIW (forces means to zero)
    link = False  # link trajectories and add phantom state
    keep_xy = True
    save_every = 1

    # You can define a dictionary with some spline paramters
    spline_params = {
        'npts_spline': 10,
        'save': True,
        'savename': 'spline_hdphmm.pl'
    }

    com_savename = 'trajectories/com_xy_radial_%s.pl' % res

    com = 'trajectories/com_xy_radial_%s.pl' % res  # center of mass trajectories. If it exists, we can skip loading the MD trajectory and just load this
    gro = 'berendsen.gro'

    ihmm = hdphmm.InfiniteHMM(com,
                              traj_no=traj_no,
                              load_com=True,
                              difference=difference,
                              observation_model=observation_model,
                              order=order,
                              max_states=max_states,
                              dim=dim,
                              spline_params=spline_params,
                              prior=prior,
                              hyperparams=hyperparams,
                              keep_xy=keep_xy,
                              com_savename=com_savename,
                              gro=gro,
                              radial=True,
                              save_com=True,
                              save_every=save_every)

    ihmm.inference(niter)

    #ihmm.summarize_results(traj_no=0)

    ihmm._get_params(quiet=True)

    radial = np.zeros([ihmm.com.shape[0], 1, 2])
    radial[:, 0, 0] = np.linalg.norm(ihmm.com[:, 0, :2], axis=1)
    radial[:, 0, 1] = ihmm.com[:, 0, 2]

    ihmmr = hdphmm.InfiniteHMM((radial, ihmm.dt),
                               traj_no=[0],
                               load_com=False,
                               difference=False,
                               order=1,
                               max_states=100,
                               dim=[0, 1],
                               spline_params=spline_params,
                               prior='MNIW-N',
                               hyperparams=None,
                               save_com=False,
                               state_sequence=ihmm.z)

    ihmmr.inference(niter)
    #ihmmr.summarize_results(traj_no=0)
    ihmmr._get_params(traj_no=0)

    estimated_states = ihmmr.z[0, :]

    found_states = list(np.unique(estimated_states))

    # for rare cases where there is a unique state found at the end of the trajectory
    for i, f in enumerate(found_states):

        ndx = np.where(ihmmr.z[0, :] == f)[0]

        if len(ndx) == 1:
            if ndx[0] >= ihmmr.nT - 2:
                del found_states[i]

    ihmmr.found_states = found_states

    A = np.zeros([len(found_states), 1, 2,
                  2])  # should probably an include a dimension for AR order
    sigma = np.zeros([len(found_states), 2, 2])
    mu = np.zeros([len(found_states), 2])

    for i in range(len(found_states)):

        A[i, 0, ...] = ihmmr.converged_params['A'][:, 0, ..., i].mean(axis=0)
        sigma[i, ...] = ihmmr.converged_params['sigma'][:, ..., i].mean(axis=0)

        # we want to cluster on unconditional mean
        mucond = ihmmr.converged_params['mu'][..., i].mean(
            axis=0)  # conditional mea
        mumean = np.linalg.inv(np.eye(2) -
                               A[i, 0, ...]) @ mucond  # unconditional mean
        mu[i, :] = mumean

    nstates = len(ihmmr.found_states)

    ndx_dict = {ihmmr.found_states[i]: i for i in range(nstates)}

    count_matrix = np.zeros([nstates, nstates])

    for frame in range(
            1, ihmmr.nT -
            1):  # start at frame 1. May need to truncate more as equilibration
        try:
            transitioned_from = [ndx_dict[i] for i in ihmmr.z[:, frame - 1]]
            transitioned_to = [ndx_dict[i] for i in ihmmr.z[:, frame]]
            for pair in zip(transitioned_from, transitioned_to):
                count_matrix[pair[0], pair[1]] += 1
        except KeyError:
            pass

    # The following is very similar to ihmm3.pi_z. The difference is due to the dirichlet process.
    transition_matrix = (count_matrix.T / count_matrix.sum(axis=1)).T

    # Initial distribution of states
    init_state = ihmmr.z[:, 0]
    pi_init = np.zeros([nstates])
    for i, c in enumerate(ihmmr.found_states):
        pi_init[i] = np.where(init_state == c)[0].size

    pi_init /= pi_init.sum()

    final_parameters = {
        'A': A,
        'sigma': sigma,
        'mu': mu,
        'T': transition_matrix,
        'pi_init': pi_init
    }

    MD_MSD = file_rw.load_object('trajectories/%s_msd.pl' % res)

    nboot = 200
    frac = 0.4
    nsteps = MD_MSD.MSD_average.shape[0]  #4806
    dt = 0.5
    endshow = 2000  #int(nsteps*frac)

    trajectory_generator = GenARData(params=final_parameters)
    trajectory_generator.gen_trajectory(nsteps, ntraj, bound_dimensions=[0])

    return trajectory_generator