def run(nstates, nsamples):
    # Load force data.
    from netCDF4 import Dataset
    ncfile = Dataset('rnase-h-d10a-trace47.nc', 'r')
    tau = 0.001  # 1 kHz
    obs_label = 'force / pN'
    time_units = 's'  # seconds
    o_t = ncfile.variables['force']  # load trace

    # copy data
    o_t = o_t[:]
    O = [o_t]  # form list of traces

    # Initialize MLHMM.
    print "Initializing MLHMM with " + str(nstates) + " states."
    estimator = bhmm.MLHMM(O, nstates)

    # Plot initial guess.
    plots.plot_state_assignments(
        estimator.hmm,
        None,
        O[0],
        time_units=time_units,
        obs_label=obs_label,
        tau=tau,
        pdf_filename='RNAseH_trace47-guess-stateassignments-nstates' +
        str(nstates) + '.pdf')

    # Fit HMM.
    mle = estimator.fit()

    # Plot.
    plots.plot_state_assignments(
        mle,
        mle.hidden_state_trajectories[0],
        o_t,
        time_units=time_units,
        obs_label=obs_label,
        tau=tau,
        pdf_filename='RNAseH_trace47-mlhmm-stateassignments-nstates' +
        str(nstates) + '.pdf')

    # Initialize BHMM, using MLHMM model as initial model.
    print "Initializing BHMM and running with " + str(nsamples) + " samples."
    sampler = bhmm.BHMM(O, nstates, initial_model=mle)

    # Sample models.
    bhmm_models = sampler.sample(nsamples=nsamples,
                                 save_hidden_state_trajectory=False)

    # Generate a sample saving a hidden state trajectory.
    final_models = sampler.sample(nsamples=1,
                                  save_hidden_state_trajectory=True)

    # Plot.
    model = final_models[0]
    s_t = model.hidden_state_trajectories[0]
    o_t = O[0]
    plots.plot_state_assignments(
        model,
        s_t,
        o_t,
        time_units=time_units,
        obs_label=obs_label,
        tau=tau,
        pdf_filename='RNAseH_trace47-bhmm-stateassignments-nstates' +
        str(nstates) + '.pdf')

    # write latex table with sample statistics
    conf = 0.95
    sampled_hmm = bhmm.SampledGaussianHMM(mle, bhmm_models)
    generate_latex_table(
        sampled_hmm,
        conf=conf,
        dt=tau,
        time_unit='s',
        caption='Bayesian HMM parameter estimates for RNAse-H data.',
        outfile='rnase-h-bhmm-statistics-table.tex')
예제 #2
0
def analyze_data(O, nstates, nsamples=1000, nobservations=None):
    """
    Analyze the data with the specified number of states.

    Parameters
    ----------
    O : numpy.float
        observation trajectory
    nstates : int
        Number of states to use for analysis.
    nsamples : int, optional, default=1000
        Number of iterations to sample from the Bayesian posterior for the BHMM.
    nobservations : int, optional, default=None
        If specified, number of observations to use from O.

    """

    # Time interval.
    tau = 0.001 # time interval (s) for plotting

    # Truncate O to number of observations.
    if nobservations:
        print "Using only %d observations" % nobservations
        O = [ o_t[0:nobservations] for o_t in O ]
    else:
        nobservations = len(O[0])

    # Generate MLHMM.
    print "Generating MLHMM..."
    estimator = bhmm.MLHMM(O, nstates)

    print "Initial guess:"
    print str(estimator.hmm.output_model)
    print estimator.hmm.transition_matrix
    print estimator.hmm.stationary_distribution

    # Plot initial guess.
    s_t = None
    o_t = O[0]
    filename = os.path.join('figures', 'synthetic-three-state-model-guess-nstates%(nstates)d-nobs%(nobservations)d.pdf' % vars())
    plots.plot_state_assignments(estimator.hmm, s_t, o_t, time_units='s', obs_label='force / pN', tau=tau, pdf_filename=filename)

    print "Fitting HMM..."
    mle = estimator.fit()

    # Plot.
    s_t = mle.hidden_state_trajectories[0]
    import numpy as np
    o_t = O[0]
    filename = os.path.join('figures', 'synthetic-three-state-model-mlhmm-nstates%(nstates)d-nobs%(nobservations)d.pdf' % vars())
    plots.plot_state_assignments(mle, s_t, o_t, time_units='s', obs_label='force / pN', tau=tau, pdf_filename=filename)

    # Initialize BHMM with MLHMM model.
    print "Sampling models from BHMM..."
    sampler = bhmm.BHMM(O, nstates, initial_model=mle)
    bhmm_models = sampler.sample(nsamples=nsamples, save_hidden_state_trajectory=False)

    # Generate a sample saving a hidden state trajectory.
    final_models = sampler.sample(nsamples=1, save_hidden_state_trajectory=True)

    # Plot final BHMM sample.
    model = final_models[0]
    s_t = model.hidden_state_trajectories[0]
    o_t = O[0]
    filename = os.path.join('figures', 'synthetic-three-state-model-bhmm-nstates%(nstates)d-nobs%(nobservations)d.pdf' % vars())
    plots.plot_state_assignments(model, s_t, o_t, time_units='s', obs_label='force / pN', tau=tau, pdf_filename=filename)

    return [mle, bhmm_models]
예제 #3
0
def run(nstates, nsamples):
    # Create model.
    true_model = testsystems.force_spectroscopy_model()
    nstates = true_model.nstates
    tau = 0.001  # time interval per observation

    # Generate synthetic data.
    print "Generating synthetic data..."
    [O, S] = true_model.generate_synthetic_observation_trajectories(
        ntrajectories=1, length=50000)

    # DEBUG
    print "synthetic observation trajectories:"
    print O
    print "Total state visits, min_state, max_state:"
    print testsystems.total_state_visits(nstates, S)

    # Generate MLHMM.
    print "Generating MLHMM..."
    estimator = bhmm.MLHMM(O, nstates)

    print "Initial guess:"
    print str(estimator.hmm.output_model)
    print estimator.hmm.transition_matrix
    print estimator.hmm.stationary_distribution

    # Plot initial guess.
    s_t = None
    o_t = O[0]
    plots.plot_state_assignments(
        estimator.hmm,
        s_t,
        o_t,
        time_units='s',
        obs_label='force / pN',
        tau=tau,
        pdf_filename='synthetic-three-state-model-guess-nstates' +
        str(nstates) + '.pdf')

    print "Fitting HMM..."
    mle = estimator.fit()

    # Plot.
    s_t = mle.hidden_state_trajectories[0]
    import numpy as np
    o_t = O[0]
    plots.plot_state_assignments(
        mle,
        s_t,
        o_t,
        time_units='s',
        obs_label='force / pN',
        tau=tau,
        pdf_filename='synthetic-three-state-model-mlhmm-nstates' +
        str(nstates) + '.pdf')

    # Initialize BHMM with MLHMM model.
    print "Sampling models from BHMM..."
    sampler = bhmm.BHMM(O, nstates, initial_model=mle)
    bhmm_models = sampler.sample(nsamples=nsamples,
                                 save_hidden_state_trajectory=False)

    # Generate a sample saving a hidden state trajectory.
    final_models = sampler.sample(nsamples=1,
                                  save_hidden_state_trajectory=True)

    # Plot final BHMM sample.
    model = final_models[0]
    s_t = model.hidden_state_trajectories[0]
    o_t = O[0]
    plots.plot_state_assignments(
        model,
        s_t,
        o_t,
        time_units='s',
        obs_label='force / pN',
        tau=tau,
        pdf_filename='synthetic-three-state-model-bhmm-nstates' +
        str(nstates) + '.pdf')

    # write latex table with sample statistics
    conf = 0.95
    sampled_hmm = bhmm.SampledGaussianHMM(mle, bhmm_models)
    generate_latex_table(
        sampled_hmm,
        conf=conf,
        dt=1,
        time_unit='step',
        caption=
        'Bayesian HMM parameter estimates for synthetic three-state model.',
        outfile='synthetic-three-state-model-bhmm-statistics.tex')
def run(nstates, nsamples):
    # Load force data.
    from netCDF4 import Dataset
    ncfile = Dataset('fiber3-trace011.nc', 'r')
    tau = 0.001  # 1 kHz
    obs_label = 'force / pN'
    time_units = 's'  # seconds
    o_t = ncfile.variables['force']  # load trace

    # force to make a copy because netCDF appears to cause problems
    nsubsample = 50  # subsampling rate
    o_t = o_t[::nsubsample]
    tau *= nsubsample
    # -------------------

    O = [o_t]  # form list of traces

    # Initialize MLHMM.
    print "Initializing MLHMM with " + str(nstates) + " states."
    estimator = bhmm.MLHMM(O, nstates)

    # Plot initial guess.
    plots.plot_state_assignments(
        estimator.hmm,
        None,
        O[0],
        time_units=time_units,
        obs_label=obs_label,
        tau=tau,
        pdf_filename='fiber3-trace11-guess-stateassignments-nstate' +
        str(nstates) + '.pdf')

    # Fit MLHMM
    mle = estimator.fit()

    # Plot.
    plots.plot_state_assignments(
        mle,
        mle.hidden_state_trajectories[0],
        o_t,
        time_units=time_units,
        obs_label=obs_label,
        tau=tau,
        pdf_filename='fiber3-trace11-mlhmm-stateassignments-nstate' +
        str(nstates) + '.pdf')

    # Initialize BHMM, using MLHMM model as initial model.
    print "Initializing BHMM and running with " + str(nsamples) + " samples."
    sampler = bhmm.BHMM(O, nstates, initial_model=mle)

    # Sample models.
    bhmm_models = sampler.sample(nsamples=nsamples,
                                 save_hidden_state_trajectory=False)

    # Generate a sample saving a hidden state trajectory.
    final_models = sampler.sample(nsamples=1,
                                  save_hidden_state_trajectory=True)

    # Plot.
    model = final_models[0]
    s_t = model.hidden_state_trajectories[0]
    o_t = O[0]
    plots.plot_state_assignments(
        model,
        s_t,
        o_t,
        time_units=time_units,
        obs_label=obs_label,
        tau=tau,
        pdf_filename='fiber3-trace11-bhmm-stateassignments-nstate' +
        str(nstates) + '.pdf')

    # write latex table with sample statistics
    conf = 0.95
    sampled_hmm = bhmm.SampledGaussianHMM(mle, bhmm_models)
    generate_latex_table(sampled_hmm,
                         conf=conf,
                         dt=tau,
                         time_unit='s',
                         caption='BHMM model estimates for RNA hairpin data.',
                         outfile='p5ab-bhmm-statistics-table' + str(nstates) +
                         '.tex')