Ejemplo n.º 1
0
def run_pcca_plus(num_macrostates,
                  assignments,
                  tProb,
                  output_dir,
                  flux_cutoff=0.0,
                  objective_function="crispness",
                  do_minimization=True):
    MacroAssignmentsFn = os.path.join(output_dir, "MacroAssignments.h5")
    MacroMapFn = os.path.join(output_dir, "MacroMapping.dat")
    ChiFn = os.path.join(output_dir, 'Chi.dat')
    AFn = os.path.join(output_dir, 'A.dat')
    arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn, ChiFn, AFn])

    logger.info("Running PCCA+...")
    A, chi, vr, MAP = lumping.pcca_plus(tProb,
                                        num_macrostates,
                                        flux_cutoff=flux_cutoff,
                                        do_minimization=do_minimization,
                                        objective_function=objective_function)

    MSMLib.apply_mapping_to_assignments(assignments, MAP)

    np.savetxt(ChiFn, chi)
    np.savetxt(AFn, A)
    np.savetxt(MacroMapFn, MAP, "%d")
    msmbuilder.io.saveh(MacroAssignmentsFn, assignments)
    logger.info('Saved output to: %s, %s, %s, %s', ChiFn, AFn, MacroMapFn,
                MacroAssignmentsFn)
Ejemplo n.º 2
0
def entry_point():
    args = parser.parse_args()

    T = scipy.io.mmread(args.tProb)
    U = np.loadtxt(args.starting).astype(int)
    F = np.loadtxt(args.ending).astype(int)

    # deal with case where have single start or end state
    # TJL note: This should be done in the library now... but leaving it
    if U.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(U)
        U = tmp.copy()
    if F.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(F)
        F = tmp.copy()

    # Check output isn't taken
    output_list = ["committors.dat", "net_flux.mtx"]
    output_flist = [os.path.join(args.output_dir, f) for f in output_list]
    arglib.die_if_path_exists(output_flist)

    Fc, NFlux = run(T, U, F)

    np.savetxt(output_flist[0], Fc)
    scipy.io.mmwrite(output_flist[1], NFlux)
    logger.info("Saved output to %s", ', '.join(output_flist))
Ejemplo n.º 3
0
def entry_point():
    args = parser.parse_args()

    T = scipy.io.mmread(args.tProb)
    U = np.loadtxt(args.starting).astype(int)
    F = np.loadtxt(args.ending).astype(int)

    # deal with case where have single start or end state
    # TJL note: This should be done in the library now... but leaving it
    if U.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(U)
        U = tmp.copy()
    if F.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(F)
        F = tmp.copy()

    # Check output isn't taken
    output_list = ["committors.dat", "net_flux.mtx"]
    output_flist = [os.path.join(args.output_dir, f) for f in output_list]
    arglib.die_if_path_exists(output_flist)

    Fc, NFlux = run(T, U, F)

    np.savetxt(output_flist[0], Fc)
    scipy.io.mmwrite(output_flist[1], NFlux)
    logger.info("Saved output to %s", ', '.join(output_flist))
def run(MinLagtime, MaxLagtime, Interval, NumEigen, AssignmentsFn, symmetrize,
        nProc, output):

    arglib.die_if_path_exists(output)

    # Setup some model parameters
    try:
        Assignments = io.loadh(AssignmentsFn, 'arr_0')
    except KeyError:
        Assignments = io.loadh(AssignmentsFn, 'Data')

    NumStates = max(Assignments.flatten()) + 1
    if NumStates <= NumEigen - 1:
        NumEigen = NumStates - 2
        logger.warning(
            "Number of requested eigenvalues exceeds the rank of the transition matrix! Defaulting to the maximum possible number of eigenvalues."
        )
    del Assignments

    logger.info("Getting %d eigenvalues (timescales) for each lagtime...",
                NumEigen)
    lagTimes = range(MinLagtime, MaxLagtime + 1, Interval)
    logger.info("Building MSMs at the following lag times: %s", lagTimes)

    # Get the implied timescales (eigenvalues)
    impTimes = msm_analysis.get_implied_timescales(AssignmentsFn,
                                                   lagTimes,
                                                   n_implied_times=NumEigen,
                                                   sliding_window=True,
                                                   symmetrize=symmetrize,
                                                   n_procs=nProc)
    numpy.savetxt(output, impTimes)
    return
def run(MinLagtime, MaxLagtime, Interval, NumEigen, AssignmentsFn, symmetrize, nProc, output):

    arglib.die_if_path_exists(output)

    # Setup some model parameters
    try:
        Assignments = io.loadh(AssignmentsFn, "arr_0")
    except KeyError:
        Assignments = io.loadh(AssignmentsFn, "Data")

    NumStates = max(Assignments.flatten()) + 1
    if NumStates <= NumEigen - 1:
        NumEigen = NumStates - 2
        logger.warning(
            "Number of requested eigenvalues exceeds the rank of the transition matrix! Defaulting to the maximum possible number of eigenvalues."
        )
    del Assignments

    logger.info("Getting %d eigenvalues (timescales) for each lagtime...", NumEigen)
    lagTimes = range(MinLagtime, MaxLagtime + 1, Interval)
    logger.info("Building MSMs at the following lag times: %s", lagTimes)

    # Get the implied timescales (eigenvalues)
    impTimes = msm_analysis.get_implied_timescales(
        AssignmentsFn, lagTimes, n_implied_times=NumEigen, sliding_window=True, symmetrize=symmetrize, n_procs=nProc
    )
    numpy.savetxt(output, impTimes)
    return
Ejemplo n.º 6
0
def main():
    parser = arglib.ArgumentParser(
        description='Assign data using a hierarchical clustering')
    parser.add_argument('hierarchical_clustering_zmatrix',
                        default='./Data/Zmatrix.h5',
                        help='Path to hierarchical clustering zmatrix')
    parser.add_argument('num_states', help='Number of States', default='none')
    parser.add_argument('cutoff_distance',
                        help='Maximum cophenetic distance',
                        default='none')
    parser.add_argument('assignments', type=str)
    args = parser.parse_args()

    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance'
        )
        sys.exit(1)

    arglib.die_if_path_exists(args.assignments)

    assignments = hierarchical_clustering_zmatrix.get_assignments(
        k=k, cutoff_distance=d)

    msmbuilder.io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Ejemplo n.º 7
0
def run(lagtime,
        assignments,
        symmetrize='MLE',
        input_mapping="None",
        trim=True,
        out_dir="./Data/"):

    # set the filenames for output
    FnTProb = os.path.join(out_dir, "tProb.mtx")
    FnTCounts = os.path.join(out_dir, "tCounts.mtx")
    FnMap = os.path.join(out_dir, "Mapping.dat")
    FnAss = os.path.join(out_dir, "Assignments.Fixed.h5")
    FnPops = os.path.join(out_dir, "Populations.dat")

    # make sure none are taken
    outputlist = [FnTProb, FnTCounts, FnMap, FnAss, FnPops]
    arglib.die_if_path_exists(outputlist)

    # Check for valid lag time
    assert lagtime > 0, 'Please specify a positive lag time.'

    # if given, apply mapping to assignments
    if input_mapping != "None":
        MSMLib.apply_mapping_to_assignments(assignments, input_mapping)

    n_assigns_before_trim = len(np.where(assignments.flatten() != -1)[0])

    counts = MSMLib.get_count_matrix_from_assignments(assignments,
                                                      lag_time=lagtime,
                                                      sliding_window=True)

    rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(
        counts, symmetrize=symmetrize, ergodic_trimming=trim)

    if trim:
        MSMLib.apply_mapping_to_assignments(assignments, mapping)
        n_assigns_after_trim = len(np.where(assignments.flatten() != -1)[0])
        # if had input mapping, then update it
        if input_mapping != "None":
            mapping = mapping[input_mapping]
        # Print a statement showing how much data was discarded in trimming
        percent = (1.0 - float(n_assigns_after_trim) /
                   float(n_assigns_before_trim)) * 100.0
        logger.warning("Ergodic trimming discarded: %f percent of your data",
                       percent)
    else:
        logger.warning("No ergodic trimming applied")

    # Save all output
    np.savetxt(FnPops, populations)
    np.savetxt(FnMap, mapping, "%d")
    scipy.io.mmwrite(str(FnTProb), t_matrix)
    scipy.io.mmwrite(str(FnTCounts), rev_counts)
    io.saveh(FnAss, assignments)

    for output in outputlist:
        logger.info("Wrote: %s", output)

    return
Ejemplo n.º 8
0
def entry_point():
    args = parser.parse_args()

    # load args
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    tProb = scipy.io.mmread(args.tProb)

    # workaround for arglib funniness?
    if args.do_minimization in ["False", "0"]:
        args.do_minimization = False
    else:
        args.do_minimization = True

    if args.algorithm == 'PCCA':
        MacroAssignmentsFn = os.path.join(args.output_dir,
                                          "MacroAssignments.h5")
        MacroMapFn = os.path.join(args.output_dir, "MacroMapping.dat")
        arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn])

        MAP, assignments = run_pcca(args.num_macrostates, assignments, tProb)

        np.savetxt(MacroMapFn, MAP, "%d")
        io.saveh(MacroAssignmentsFn, assignments)
        logger.info("Saved output to: %s, %s", MacroAssignmentsFn, MacroMapFn)

    elif args.algorithm == 'PCCA+':
        MacroAssignmentsFn = os.path.join(args.output_dir,
                                          "MacroAssignments.h5")
        MacroMapFn = os.path.join(args.output_dir, "MacroMapping.dat")
        ChiFn = os.path.join(args.output_dir, 'Chi.dat')
        AFn = os.path.join(args.output_dir, 'A.dat')

        arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn, ChiFn, AFn])

        chi, A, MAP, assignments = run_pcca_plus(
            args.num_macrostates,
            assignments,
            tProb,
            args.flux_cutoff,
            objective_function=args.objective_function,
            do_minimization=args.do_minimization)

        np.savetxt(ChiFn, chi)
        np.savetxt(AFn, A)
        np.savetxt(MacroMapFn, MAP, "%d")
        io.saveh(MacroAssignmentsFn, assignments)
        logger.info('Saved output to: %s, %s, %s, %s', ChiFn, AFn, MacroMapFn,
                    MacroAssignmentsFn)
    else:
        raise Exception()
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Ejemplo n.º 10
0
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Ejemplo n.º 11
0
def entry_point():
    args = parser.parse_args()

    # load args
    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    tProb = scipy.io.mmread(args.tProb)

    # workaround for arglib funniness?
    if args.do_minimization in ["False", "0"]:
        args.do_minimization = False
    else:
        args.do_minimization = True

    if args.algorithm == 'PCCA':
        MacroAssignmentsFn = os.path.join(
            args.output_dir, "MacroAssignments.h5")
        MacroMapFn = os.path.join(args.output_dir, "MacroMapping.dat")
        arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn])

        MAP, assignments = run_pcca(args.num_macrostates, assignments, tProb)

        np.savetxt(MacroMapFn, MAP, "%d")
        io.saveh(MacroAssignmentsFn, assignments)
        logger.info("Saved output to: %s, %s", MacroAssignmentsFn, MacroMapFn)

    elif args.algorithm == 'PCCA+':
        MacroAssignmentsFn = os.path.join(
            args.output_dir, "MacroAssignments.h5")
        MacroMapFn = os.path.join(args.output_dir, "MacroMapping.dat")
        ChiFn = os.path.join(args.output_dir, 'Chi.dat')
        AFn = os.path.join(args.output_dir, 'A.dat')

        arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn, ChiFn, AFn])

        chi, A, MAP, assignments = run_pcca_plus(args.num_macrostates,
                                                 assignments, tProb, args.flux_cutoff, objective_function=args.objective_function,
                                                 do_minimization=args.do_minimization)

        np.savetxt(ChiFn, chi)
        np.savetxt(AFn, A)
        np.savetxt(MacroMapFn, MAP, "%d")
        io.saveh(MacroAssignmentsFn, assignments)
        logger.info('Saved output to: %s, %s, %s, %s',
                    ChiFn, AFn, MacroMapFn, MacroAssignmentsFn)
    else:
        raise Exception()
Ejemplo n.º 12
0
def run(lagtime, assignments, symmetrize='MLE', input_mapping="None", trim=True, out_dir="./Data/"):

    # set the filenames for output
    FnTProb = os.path.join(out_dir, "tProb.mtx")
    FnTCounts = os.path.join(out_dir, "tCounts.mtx")
    FnMap = os.path.join(out_dir, "Mapping.dat")
    FnAss = os.path.join(out_dir, "Assignments.Fixed.h5")
    FnPops = os.path.join(out_dir, "Populations.dat")

    # make sure none are taken
    outputlist = [FnTProb, FnTCounts, FnMap, FnAss, FnPops]
    arglib.die_if_path_exists(outputlist)

    # Check for valid lag time
    assert lagtime > 0, 'Please specify a positive lag time.'

    # if given, apply mapping to assignments
    if input_mapping != "None":
        MSMLib.apply_mapping_to_assignments(assignments, input_mapping)

    n_assigns_before_trim = len(np.where(assignments.flatten() != -1)[0])

    counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lagtime, sliding_window=True)

    rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize=symmetrize, ergodic_trimming=trim)

    if trim:
        MSMLib.apply_mapping_to_assignments(assignments, mapping)
        n_assigns_after_trim = len(np.where(assignments.flatten() != -1)[0])
        # if had input mapping, then update it
        if input_mapping != "None":
            mapping = mapping[input_mapping]
        # Print a statement showing how much data was discarded in trimming
        percent = (1.0 - float(n_assigns_after_trim) / float(n_assigns_before_trim)) * 100.0
        logger.warning("Ergodic trimming discarded: %f percent of your data", percent)
    else:
        logger.warning("No ergodic trimming applied")

    # Save all output
    np.savetxt(FnPops, populations)
    np.savetxt(FnMap, mapping, "%d")
    scipy.io.mmwrite(str(FnTProb), t_matrix)
    scipy.io.mmwrite(str(FnTCounts), rev_counts)
    io.saveh(FnAss, assignments)

    for output in outputlist:
        logger.info("Wrote: %s", output)

    return
Ejemplo n.º 13
0
def check_paths(args):
    if args.alg == 'hierarchical':
        die_if_path_exists(args.hierarchical_save_zmatrix)
    else:
        die_if_path_exists(args.generators)
        if args.stride == 1:
            die_if_path_exists(args.assignments)
            die_if_path_exists(args.distances)
Ejemplo n.º 14
0
def check_paths(args):
    if args.alg == 'hierarchical':
        die_if_path_exists(args.hierarchical_save_zmatrix)
    else:
        die_if_path_exists(args.generators)
        if args.stride == 1:
            die_if_path_exists(args.assignments)
            die_if_path_exists(args.distances)
Ejemplo n.º 15
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance')
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(
        k, d, args.hierarchical_clustering_zmatrix, args.stride, project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Ejemplo n.º 16
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance'
        )
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(k, d, args.hierarchical_clustering_zmatrix, args.stride,
                       project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
Ejemplo n.º 18
0
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
Ejemplo n.º 19
0
def run_pcca(num_macrostates, assignments, tProb, output_dir):
    MacroAssignmentsFn = os.path.join(output_dir, "MacroAssignments.h5")
    MacroMapFn = os.path.join(output_dir, "MacroMapping.dat")
    arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn])

    logger.info("Running PCCA...")
    MAP = lumping.PCCA(tProb, num_macrostates)

    # MAP the new assignments and save, make sure don't
    # mess up negaitve one's (ie where don't have data)
    MSMLib.apply_mapping_to_assignments(assignments, MAP)

    np.savetxt(MacroMapFn, MAP, "%d")
    msmbuilder.io.saveh(MacroAssignmentsFn, assignments)

    logger.info("Saved output to: %s, %s", MacroAssignmentsFn, MacroMapFn)
Ejemplo n.º 20
0
def run_pcca(num_macrostates, assignments, tProb, output_dir):
    MacroAssignmentsFn = os.path.join(output_dir, "MacroAssignments.h5")
    MacroMapFn = os.path.join(output_dir, "MacroMapping.dat")
    arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn])

    logger.info("Running PCCA...")
    MAP = lumping.PCCA(tProb, num_macrostates)

    # MAP the new assignments and save, make sure don't
    # mess up negaitve one's (ie where don't have data)
    MSMLib.apply_mapping_to_assignments(assignments, MAP)

    np.savetxt(MacroMapFn, MAP, "%d")
    msmbuilder.io.saveh(MacroAssignmentsFn, assignments)
    
    logger.info("Saved output to: %s, %s", MacroAssignmentsFn, MacroMapFn)
Ejemplo n.º 21
0
def entry_point():
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
        distances = io.loadh(args.distances, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
        distances = io.loadh(args.distances, 'Data')

    trimmed = run(assignments, distances, args.rmsd_cutoff)

    io.saveh(args.output, trimmed)
    logger.info('Saved output to %s', args.output)
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    LagTimes = args.lagtime.split(',')
    MinLagtime = int(LagTimes[0])
    MaxLagtime = int(LagTimes[1])

    # Pass the symmetric flag
    if args.symmetrize in ["None", "none", None]:
        args.symmetrize = None

    impTimes = run(MinLagtime, MaxLagtime, args.interval, args.eigvals,
                   args.assignments, (not args.notrim), args.symmetrize,
                   args.procs)
    np.savetxt(args.output, impTimes)
    logger.info("Saved output to %s", args.output)
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    LagTimes = args.lagtime.split(',')
    MinLagtime = int(LagTimes[0])
    MaxLagtime = int(LagTimes[1])

    # Pass the symmetric flag
    if args.symmetrize in ["None", "none", None]:
        args.symmetrize = None

    impTimes = run(
        MinLagtime, MaxLagtime, args.interval, args.eigvals, args.assignments,
        (not args.notrim), args.symmetrize, args.procs)
    np.savetxt(args.output, impTimes)
    logger.info("Saved output to %s", args.output)
Ejemplo n.º 24
0
def entry_point():
    args, prep_metric = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    min_length = int(float(args.min_length))
    # need to convert to float first because int can't
    # convert a string that is '1E3' for example...weird.

    tica_obj = run(
        prep_metric, project, args.delta_time, atom_indices=atom_indices,
        output=args.output, min_length=min_length, stride=args.stride)
Ejemplo n.º 25
0
def run(LagTime, assignments, Symmetrize='MLE', input_mapping="None", Prior=0.0, OutDir="./Data/"):

    # set the filenames for output
    FnTProb = os.path.join(OutDir, "tProb.mtx")
    FnTCounts = os.path.join(OutDir, "tCounts.mtx")
    FnMap = os.path.join(OutDir, "Mapping.dat")
    FnAss = os.path.join(OutDir, "Assignments.Fixed.h5")
    FnPops = os.path.join(OutDir, "Populations.dat")
    
    # make sure none are taken
    outputlist = [FnTProb, FnTCounts, FnMap, FnAss, FnPops]
    arglib.die_if_path_exists(outputlist)

    # if given, apply mapping to assignments
    if input_mapping != "None":
        MSMLib.apply_mapping_to_assignments(assignments, input_mapping)

    n_states = np.max(assignments.flatten()) + 1
    n_assigns_before_trim = len( np.where( assignments.flatten() != -1 )[0] )
    
    rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(assignments,
        lag_time=LagTime, symmetrize=Symmetrize,
        sliding_window=True, trim=True)

    MSMLib.apply_mapping_to_assignments(assignments, mapping)
    n_assigns_after_trim = len( np.where( assignments.flatten() != -1 )[0] )

    # if had input mapping, then update it
    if input_mapping != "None":
        mapping = mapping[input_mapping]
    
    # Print a statement showing how much data was discarded in trimming
    percent = (1.0 - float(n_assigns_after_trim) / float(n_assigns_before_trim)) * 100.0
    logger.warning("Ergodic trimming discarded: %f percent of your data", percent)
 
    # Save all output
    np.savetxt(FnPops, populations)
    np.savetxt(FnMap, mapping,"%d")
    scipy.io.mmwrite(str(FnTProb), t_matrix)
    scipy.io.mmwrite(str(FnTCounts), rev_counts)
    msmbuilder.io.saveh(FnAss, assignments)

    for output in outputlist:
        logger.info("Wrote: %s", output)

    return
Ejemplo n.º 26
0
def run_pcca_plus(num_macrostates, assignments, tProb, output_dir, flux_cutoff=0.0,objective_function="crispness",do_minimization=True):
    MacroAssignmentsFn = os.path.join(output_dir, "MacroAssignments.h5")
    MacroMapFn = os.path.join(output_dir, "MacroMapping.dat")
    ChiFn = os.path.join(output_dir, 'Chi.dat')
    AFn = os.path.join(output_dir, 'A.dat')
    arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn, ChiFn, AFn])
    
    logger.info("Running PCCA+...")
    A, chi, vr, MAP = lumping.pcca_plus(tProb, num_macrostates, flux_cutoff=flux_cutoff,
        do_minimization=do_minimization, objective_function=objective_function)

    MSMLib.apply_mapping_to_assignments(assignments, MAP)    

    np.savetxt(ChiFn, chi)
    np.savetxt(AFn, A)
    np.savetxt(MacroMapFn, MAP,"%d")
    msmbuilder.io.saveh(MacroAssignmentsFn, assignments)
    logger.info('Saved output to: %s, %s, %s, %s', ChiFn, AFn, MacroMapFn, MacroAssignmentsFn)
def entry_point():
    args = parser.parse_args()

    T = scipy.io.mmread(args.tProb)
    state = int(args.state)
    print(args.state, state)

    # Check output isn't taken
    if state == -1:
        base_filename = "PairwiseMFPTs.dat"
    else:
        base_filename = "MFPTs_%d.dat" % state

    output_filename = os.path.join(args.output_dir, base_filename)
    arglib.die_if_path_exists(output_filename)

    MFPTs = run(T, state)

    np.savetxt(output_filename, MFPTs)
    logger.info("Saved output to %s" % output_filename)
Ejemplo n.º 28
0
def entry_point():
    args, prep_metric = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    min_length = int(float(args.min_length))
    # need to convert to float first because int can't
    # convert a string that is '1E3' for example...weird.

    tica_obj = run(prep_metric,
                   project,
                   args.delta_time,
                   atom_indices=atom_indices,
                   output=args.output,
                   min_length=min_length,
                   stride=args.stride)
Ejemplo n.º 29
0
def main():
    parser = arglib.ArgumentParser(description='Assign data using a hierarchical clustering')
    parser.add_argument('hierarchical_clustering_zmatrix', default='./Data/Zmatrix.h5', 
        help='Path to hierarchical clustering zmatrix' )
    parser.add_argument('num_states', help='Number of States', default='none')
    parser.add_argument('cutoff_distance', help='Maximum cophenetic distance', default='none')
    parser.add_argument('assignments', type=str)
    args = parser.parse_args()
    
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    if k is None and d is None:
        logger.error('You need to supply either a number of states or a cutoff distance')
        sys.exit(1)
    
    arglib.die_if_path_exists(args.assignments)
    
    assignments = hierarchical_clustering_zmatrix.get_assignments(k=k, cutoff_distance=d)
    
    msmbuilder.io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Ejemplo n.º 30
0
def save(confs_by_state, states, style, format, outdir):
    "Save the results to disk"

    if style == 'sep':
        for i, trj in enumerate(confs_by_state):
            for j in xrange(len(trj)):

                fn = os.path.join(outdir, 'State%d-%d.%s' % (states[i], j,
                                                             format))
                arglib.die_if_path_exists(fn)

                logger.info("Saving file: %s" % fn)
                trj[j].save(fn)

    elif style == 'tps':
        for i, trj in enumerate(confs_by_state):
            fn = os.path.join(outdir, 'State%d.%s' % (states[i], format))
            arglib.die_if_path_exists(fn)

            logger.info("Saving file: %s" % fn)
            trj.save(fn)

    elif style == 'one':
        fn = os.path.join(outdir, 'Confs.%s' % format)
        arglib.die_if_path_exists(fn)

        logger.info("Saving file: %s" % fn)
        concatenate_trajectories(confs_by_state).save(fn)

    else:
        raise ValueError('Invalid style: %s' % style)
def save(confs_by_state, states, style, format, outdir):
    "Save the results to disk"

    if style == 'sep':
        for i, trj in enumerate(confs_by_state):
            for j in xrange(len(trj)):

                fn = os.path.join(outdir,
                                  'State%d-%d.%s' % (states[i], j, format))
                arglib.die_if_path_exists(fn)

                logger.info("Saving file: %s" % fn)
                trj[j].save(fn)

    elif style == 'tps':
        for i, trj in enumerate(confs_by_state):
            fn = os.path.join(outdir, 'State%d.%s' % (states[i], format))
            arglib.die_if_path_exists(fn)

            logger.info("Saving file: %s" % fn)
            trj.save(fn)

    elif style == 'one':
        fn = os.path.join(outdir, 'Confs.%s' % format)
        arglib.die_if_path_exists(fn)

        logger.info("Saving file: %s" % fn)
        concatenate_trajectories(confs_by_state).save(fn)

    else:
        raise ValueError('Invalid style: %s' % style)
Ejemplo n.º 32
0
def entry_point():
    args = parser.parse_args()

    F = np.loadtxt(args.ending).astype(int)
    U = np.loadtxt(args.starting).astype(int)
    tprob = scipy.io.mmread(args.tprob)

    # deal with case where have single start or end state
    # TJL note: this should be taken care of in library now... keeping it just
    # in case
    if F.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(F)
        F = tmp.copy()
    if U.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(U)
        U = tmp.copy()

    arglib.die_if_path_exists(args.output)
    paths, bottlenecks, fluxes = run(tprob, U, F, args.number)

    io.saveh(args.output, Paths=paths, Bottlenecks=bottlenecks, fluxes=fluxes)
    logger.info('Saved output to %s', args.output)
Ejemplo n.º 33
0
def main_extract(args):
    "main method for the extract subcommand"
    project = Project.load_from(args.project_info)
    close = int(args.close)
    stride = int(args.stride)
    if args.far < 0:
        far = None
    else:
        far = args.far

    die_if_path_exists(args.output)

    if args.extract_method == 'rmsd':
        atomindices = np.loadtxt(args.atomindices, dtype=int)
        AtoB, AtoC = triplets.extract_rmsd(project, close, stride, atomindices, far)

    elif args.extract_method == 'dihedral':
        if 'types' in args:
            AtoB, AtoC = triplets.extract_dihedral(project, close, stride, types=args.types, far=far)
        else:
            indices = np.loadtxt(args.indices, dtype=int)
            AtoB, AtoC = triplets.extract_dihedral(project, close, stride, indices=indices, far=far)

    elif args.extract_method == 'recipcontact':
        AtoB, AtoC = triplets.extract_recipcontact(project, close, stride, far=far)

    elif args.extract_method == 'drmsd':
        indices = np.loadtxt(args.indices, dtype=int)
        AtoB, AtoC, atom_pairs = triplets.extract_drmsd(project, close, stride, indices=indices, far=far)
        io.saveh(args.output, atom_pairs=atom_pairs)
    else:
        raise NotImplementedError("Sorry, we don't have that metric")

    #Serializer({'AtoB': AtoB, 'AtoC': AtoC, 'metric': args.extract_method}).SaveToHDF(args.output)
    io.saveh(args.output, AtoB=AtoB, AtoC=AtoC, metric=np.array(list(args.extract_method)))
    print 'Saved triplets to {}'.format(args.output)
Ejemplo n.º 34
0
def entry_point():
    args = parser.parse_args()

    F = np.loadtxt(args.ending).astype(int)
    U = np.loadtxt(args.starting).astype(int)
    tprob = scipy.io.mmread(args.tprob)

    # deal with case where have single start or end state
    # TJL note: this should be taken care of in library now... keeping it just
    # in case
    if F.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(F)
        F = tmp.copy()
    if U.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(U)
        U = tmp.copy()

    arglib.die_if_path_exists(args.output)
    paths, bottlenecks, fluxes = run(tprob, U, F, args.number)

    io.saveh(args.output, Paths=paths, Bottlenecks=bottlenecks, fluxes=fluxes)
    logger.info('Saved output to %s', args.output)
Ejemplo n.º 35
0
def main(args, metric):

    if args.alg == "sclarans" and args.stride != 1:
        logger.error(
            """You don't want to use a stride with sclarans. The whole point of
sclarans is to use a shrink multiple to accomplish the same purpose, but in parallel with
stochastic subsampling. If you cant fit all your frames into  memory at the same time, maybe you
could stride a little at the begining, but its not recommended."""
        )
        sys.exit(1)

    # if we have a metric that explicitly operates on a subset of indices,
    # then we provide the option to only load those indices into memory
    # WARNING: I also do something a bit dirty, and inject `None` for the
    # RMSD.atomindices to get the metric to not splice
    if isinstance(metric, metrics.RMSD):
        atom_indices = metric.atomindices
        metric.atomindices = None  # probably bad...
        logger.info("RMSD metric - loading only the atom indices required")
    else:
        atom_indices = None

    # In case the clustering / algorithm needs extra arguments, use
    # this dictionary
    extra_kwargs = {}

    # Check to be sure we won't overwrite any data
    if args.alg == "hierarchical":
        zmatrix_fn = os.path.join(args.output_dir, "ZMatrix.h5")
        die_if_path_exists(zmatrix_fn)
        extra_kwargs["zmatrix_fn"] = zmatrix_fn
    else:
        generators_fn = os.path.join(args.output_dir, "Gens.lh5")
        die_if_path_exists(generators_fn)
        if args.stride == 1:
            assignments_fn = os.path.join(args.output_dir, "Assignments.h5")
            distances_fn = os.path.join(args.output_dir, "Assignments.h5.distances")
            die_if_path_exists([assignments_fn, distances_fn])

    trajs = load_trajectories(args.project, args.stride, atom_indices)
    logger.info("Loaded %d trajs", len(trajs))

    clusterer = cluster(metric, trajs, args, **extra_kwargs)

    if not isinstance(clusterer, clustering.Hierarchical):
        generators = clusterer.get_generators_as_traj()
        logger.info("Saving %s", generators_fn)
        generators.save_to_lhdf(generators_fn)
        if args.stride == 1:
            assignments = clusterer.get_assignments()
            distances = clusterer.get_distances()

            logger.info("Since stride=1, Saving %s", assignments_fn)
            logger.info("Since stride=1, Saving %s", distances_fn)
            io.saveh(assignments_fn, assignments)
            io.saveh(distances_fn, distances)
Ejemplo n.º 36
0
    parser.add_argument('symmetrize', choices=['none', 'transpose', 'mle'])
    parser.add_argument('outdir')
    args = parser.parse_args()
    assignments = Serializer.LoadData(args.assignments)

    ratemtx_fn = pjoin(args.outdir, 'K.mtx')
    tcounts_fn = pjoin(args.outdir, 'tCounts.mtx')
    unsym_fn = pjoin(args.outdir, 'tCounts.UnSym.mtx')
    mapping_fn = pjoin(args.outdir, 'Mapping.dat')
    fixed_fn = pjoin(args.outdir, 'Assignments.Fixed.h5')
    pops_fn = pjoin(args.outdir, 'Populations.dat')
    if not os.path.exists(args.outdir):
        os.mkdir(args.outdir)
    outlist = [ratemtx_fn, tcounts_fn, unsym_fn, fixed_fn, pops_fn]
    for e in outlist:
        arglib.die_if_path_exists(e)

    # if lag time is not one, there's going to be a unit mispatch between
    # what you get and what you're expecting.
    lag_time = 1
    counts, rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(
        assignments, lag_time=lag_time, symmetrize=args.symmetrize)
    K = MSMLib.estimate_rate_matrix(rev_counts, assignments)

    np.savetxt(pops_fn, populations)
    np.savetxt(mapping_fn, mapping, "%d")
    scipy.io.mmwrite(ratemtx_fn, K)
    scipy.io.mmwrite(tcounts_fn, rev_counts)
    scipy.io.mmwrite(unsym_fn, counts)
    Serializer.SaveData(fixed_fn, assignments)
Ejemplo n.º 37
0
        m = tpt.calculate_mfpt([state], T)
        logger.info("Finished calculating MFPTs to state %d" % state)
    else:
        logger.info("Calculating MFPTs to all states")
        m = tpt.calculate_all_to_all_mfpt(T)
        logger.info("Finished calculating MFPTs to all states")

    return m


if __name__ == "__main__":
    args = parser.parse_args()

    T = scipy.io.mmread(args.tProb)
    state = int(args.state)
    print(args.state, state)

    # Check output isn't taken
    if state == -1:
        base_filename = "PairwiseMFPTs.dat"
    else:
        base_filename = "MFPTs_%d.dat" % state

    output_filename = os.path.join(args.output_dir, base_filename)
    arglib.die_if_path_exists(output_filename)

    MFPTs = run(T, state)

    np.savetxt(output_filename, MFPTs)
    logger.info("Saved output to %s" % output_filename)
Ejemplo n.º 38
0
    of all atoms in a given trajectory, or for all trajectories in the project. The
    output is a hdf5 file which contains the SASA for each atom in each frame
    in each trajectory (or the single trajectory you passed in.""")
    parser.add_argument('project')
    parser.add_argument('atom_indices',
                        help='Indices of atoms to calculate SASA',
                        default='all')
    parser.add_argument('output',
                        help='''hdf5 file for output. Note this will
        be THREE dimensional: ( trajectory, frame, atom ), unless you just ask for
        one trajectory, in which case it will be shape (frame, atom).''',
                        default='SASA.h5')
    parser.add_argument('traj_fn',
                        help='''Pass a trajectory file if you only
        want to calclate the SASA for a single trajectory''',
                        default='all')
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)

    SASA = run(project, atom_indices, args.traj_fn)

    io.saveh(args.output, SASA)
Ejemplo n.º 39
0
    parser.add_argument('ending', help='''Vector of states in the
        ending/products/folded ensemble.''', default='F_states.dat')
    parser.add_argument('output_dir', default='.')
    args = parser.parse_args()
    
    T = scipy.io.mmread( args.tProb )
    U = np.loadtxt( args.starting ).astype(int)
    F = np.loadtxt( args.ending ).astype(int)

    # deal with case where have single start or end state
    # TJL note: This should be done in the library now... but leaving it
    if U.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(U)
        U = tmp.copy()
    if F.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(F)
        F = tmp.copy()

    # Check output isn't taken
    output_list = ["committors.dat", "net_flux.mtx"]
    output_flist = [os.path.join(args.output_dir, f) for f in output_list]
    arglib.die_if_path_exists(output_flist)
    
    Fc, NFlux = run(T, U, F)
    
    np.savetxt(output_flist[0], Fc)
    scipy.io.mmwrite(output_flist[1], NFlux)
    logger.info("Saved output to %s", ', '.join(output_flist))
Ejemplo n.º 40
0
def main(args, metric):
    
    if args.alg == 'sclarans' and args.stride != 1:
        logger.error("""You don't want to use a stride with sclarans. The whole point of
sclarans is to use a shrink multiple to accomplish the same purpose, but in parallel with
stochastic subsampling. If you cant fit all your frames into  memory at the same time, maybe you
could stride a little at the begining, but its not recommended.""")
        sys.exit(1)
        
    # if we have a metric that explicitly operates on a subset of indices,
    # then we provide the option to only load those indices into memory
    # WARNING: I also do something a bit dirty, and inject `None` for the
    # RMSD.atomindices to get the metric to not splice
    if isinstance(metric, metrics.RMSD):
        atom_indices = metric.atomindices
        metric.atomindices = None # probably bad...
        logger.info('RMSD metric - loading only the atom indices required')
    else:
        atom_indices = None

    # In case the clustering / algorithm needs extra arguments, use
    # this dictionary
    extra_kwargs = {}

    # Check to be sure we won't overwrite any data 
    if args.alg == 'hierarchical':
        zmatrix_fn = os.path.join(args.output_dir, 'ZMatrix.h5')
        die_if_path_exists(zmatrix_fn)
        extra_kwargs['zmatrix_fn'] = zmatrix_fn
    else:
        generators_fn = os.path.join(args.output_dir, 'Gens.h5') 
        die_if_path_exists(generators_fn)
        if args.stride == 1:
            assignments_fn = os.path.join(args.output_dir, 'Assignments.h5') 
            distances_fn = os.path.join(args.output_dir, 'Assignments.h5.distances')
            die_if_path_exists([assignments_fn, distances_fn])
        
    project = Project.load_from(args.project)

    if isinstance(metric, metrics.Vectorized) and not args.alg == 'hierarchical': 
        # if the metric is vectorized then
        # we can load prepared trajectories 
        # which may allow for better memory
        # efficiency
        ptrajs, which = load_prep_trajectories(project, args.stride, atom_indices, metric)
        trajectories = None
        n_trajs = len(ptrajs)

        num_frames = np.sum([len(p) for p in ptrajs])
        if num_frames != len(which):
            raise Exception("something went wrong in loading step (%d v %d)" % (num_frames, len(which)))
    else:
        trajectories = load_trajectories(project, args.stride, atom_indices)       
        ptrajs = None
        which = None
        n_trajs = len(trajectories)

    logger.info('Loaded %d trajs', n_trajs)

    clusterer = cluster(metric, trajectories, ptrajs, args, **extra_kwargs)

    if not isinstance(clusterer, clustering.Hierarchical):

        if isinstance(metric, metrics.Vectorized):
            gen_inds = clusterer.get_generator_indices()
            generators = project.load_frame(which[gen_inds,0], which[gen_inds,1])
        else:
            generators = clusterer.get_generators_as_traj()
        
        logger.info('Saving %s', generators_fn)
        generators.save(generators_fn)

        if args.stride == 1:
            assignments = clusterer.get_assignments()
            distances = clusterer.get_distances()
            
            logger.info('Since stride=1, Saving %s', assignments_fn)
            logger.info('Since stride=1, Saving %s', distances_fn)
            io.saveh(assignments_fn, assignments)
            io.saveh(distances_fn, distances)
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)
    indices = run(args.pdb, args.atom_type)
    np.savetxt(args.output, indices, '%d')
    logger.info('Saved output to %s', args.output)
Ejemplo n.º 42
0
import os, sys
from toy_systems.propagators import EDWProp
from msmbuilder import arglib, Project

parser = arglib.ArgumentParser(description='Create toy data: EDWProp')
parser.add_argument('k', description='Steepness f=0.5*k*x^2 in the harmonic directions',
    default=1, type=float)
parser.add_argument('dims', description='number of dimensions', default=2, type=int)
parser.add_argument('timesteps', description='number of timesteps',
    default=10000, type=int)
parser.add_argument('num_trajectories', description='number of trajectories',
    default=1, type=int)
parser.add_argument('outdir')
args = parser.parse_args()
print args
arglib.die_if_path_exists(args.outdir)
os.mkdir(args.outdir)

trj_dir = os.path.abspath(os.path.join(args.outdir, 'Trajectories'))
os.mkdir(trj_dir)

for i in range(args.num_trajectories):
    prop = EDWProp(args.dims, args.k)
    prop.run(args.timesteps)
    traj = prop.trajectory
    traj.SaveToHDF(os.path.join(trj_dir, 'trj{0}.h5'.format(i)))

pdbfn = os.path.abspath(os.path.join(args.outdir, 'conf.pdb'))
with open(pdbfn, 'w') as f:
    print >> f, "ATOM      1 1HH3 ACE     1       0.000  0.0   0.0\n"
Ejemplo n.º 43
0
        ending/products/folded ensemble.''',
                        default='F_states.dat')
    parser.add_argument('output_dir', default='.')
    args = parser.parse_args()

    T = scipy.io.mmread(args.tProb)
    U = np.loadtxt(args.starting).astype(int)
    F = np.loadtxt(args.ending).astype(int)

    # deal with case where have single start or end state
    # TJL note: This should be done in the library now... but leaving it
    if U.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(U)
        U = tmp.copy()
    if F.shape == ():
        tmp = np.zeros(1, dtype=int)
        tmp[0] = int(F)
        F = tmp.copy()

    # Check output isn't taken
    output_list = ["committors.dat", "net_flux.mtx"]
    output_flist = [os.path.join(args.output_dir, f) for f in output_list]
    arglib.die_if_path_exists(output_flist)

    Fc, NFlux = run(T, U, F)

    np.savetxt(output_flist[0], Fc)
    scipy.io.mmwrite(output_flist[1], NFlux)
    logger.info("Saved output to %s", ', '.join(output_flist))
Ejemplo n.º 44
0
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')

    tProb = scipy.io.mmread(args.tProb)

    # workaround for arglib funniness?
    if args.do_minimization in ["False", "0"]:
        args.do_minimization = False
    else:
        args.do_minimization = True

    if args.algorithm == 'PCCA':
        MacroAssignmentsFn = os.path.join(
            args.output_dir, "MacroAssignments.h5")
        MacroMapFn = os.path.join(args.output_dir, "MacroMapping.dat")
        arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn])

        MAP, assignments = run_pcca(args.num_macrostates, assignments, tProb)

        np.savetxt(MacroMapFn, MAP, "%d")
        io.saveh(MacroAssignmentsFn, assignments)
        logger.info("Saved output to: %s, %s", MacroAssignmentsFn, MacroMapFn)

    elif args.algorithm == 'PCCA+':
        MacroAssignmentsFn = os.path.join(
            args.output_dir, "MacroAssignments.h5")
        MacroMapFn = os.path.join(args.output_dir, "MacroMapping.dat")
        ChiFn = os.path.join(args.output_dir, 'Chi.dat')
        AFn = os.path.join(args.output_dir, 'A.dat')

        arglib.die_if_path_exists([MacroAssignmentsFn, MacroMapFn, ChiFn, AFn])
Ejemplo n.º 45
0
    parser.add_argument("symmetrize", choices=["none", "transpose", "mle"])
    parser.add_argument("outdir")
    args = parser.parse_args()
    assignments = Serializer.LoadData(args.assignments)

    ratemtx_fn = pjoin(args.outdir, "K.mtx")
    tcounts_fn = pjoin(args.outdir, "tCounts.mtx")
    unsym_fn = pjoin(args.outdir, "tCounts.UnSym.mtx")
    mapping_fn = pjoin(args.outdir, "Mapping.dat")
    fixed_fn = pjoin(args.outdir, "Assignments.Fixed.h5")
    pops_fn = pjoin(args.outdir, "Populations.dat")
    if not os.path.exists(args.outdir):
        os.mkdir(args.outdir)
    outlist = [ratemtx_fn, tcounts_fn, unsym_fn, fixed_fn, pops_fn]
    for e in outlist:
        arglib.die_if_path_exists(e)

    # if lag time is not one, there's going to be a unit mispatch between
    # what you get and what you're expecting.
    lag_time = 1
    counts, rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(
        assignments, lag_time=lag_time, symmetrize=args.symmetrize
    )
    K = MSMLib.estimate_rate_matrix(rev_counts, assignments)

    np.savetxt(pops_fn, populations)
    np.savetxt(mapping_fn, mapping, "%d")
    scipy.io.mmwrite(ratemtx_fn, K)
    scipy.io.mmwrite(tcounts_fn, rev_counts)
    scipy.io.mmwrite(unsym_fn, counts)
    Serializer.SaveData(fixed_fn, assignments)
Ejemplo n.º 46
0
def main(args, metric):

    if args.alg == 'sclarans' and args.stride != 1:
        logger.error(
            """You don't want to use a stride with sclarans. The whole point of
sclarans is to use a shrink multiple to accomplish the same purpose, but in parallel with
stochastic subsampling. If you cant fit all your frames into  memory at the same time, maybe you
could stride a little at the begining, but its not recommended.""")
        sys.exit(1)

    # if we have a metric that explicitly operates on a subset of indices,
    # then we provide the option to only load those indices into memory
    # WARNING: I also do something a bit dirty, and inject `None` for the
    # RMSD.atomindices to get the metric to not splice
    if isinstance(metric, metrics.RMSD):
        atom_indices = metric.atomindices
        metric.atomindices = None  # probably bad...
        logger.info('RMSD metric - loading only the atom indices required')
    else:
        atom_indices = None

    # In case the clustering / algorithm needs extra arguments, use
    # this dictionary
    extra_kwargs = {}

    # Check to be sure we won't overwrite any data
    if args.alg == 'hierarchical':
        zmatrix_fn = os.path.join(args.output_dir, 'ZMatrix.h5')
        die_if_path_exists(zmatrix_fn)
        extra_kwargs['zmatrix_fn'] = zmatrix_fn
    else:
        generators_fn = os.path.join(args.output_dir, 'Gens.h5')
        die_if_path_exists(generators_fn)
        if args.stride == 1:
            assignments_fn = os.path.join(args.output_dir, 'Assignments.h5')
            distances_fn = os.path.join(args.output_dir,
                                        'Assignments.h5.distances')
            die_if_path_exists([assignments_fn, distances_fn])

    project = Project.load_from(args.project)

    if isinstance(metric,
                  metrics.Vectorized) and not args.alg == 'hierarchical':
        # if the metric is vectorized then
        # we can load prepared trajectories
        # which may allow for better memory
        # efficiency
        ptrajs, which = load_prep_trajectories(project, args.stride,
                                               atom_indices, metric)
        trajectories = None
        n_trajs = len(ptrajs)

        num_frames = np.sum([len(p) for p in ptrajs])
        if num_frames != len(which):
            raise Exception("something went wrong in loading step (%d v %d)" %
                            (num_frames, len(which)))
    else:
        trajectories = load_trajectories(project, args.stride, atom_indices)
        ptrajs = None
        which = None
        n_trajs = len(trajectories)

    logger.info('Loaded %d trajs', n_trajs)

    clusterer = cluster(metric, trajectories, ptrajs, args, **extra_kwargs)

    if not isinstance(clusterer, clustering.Hierarchical):

        if isinstance(metric, metrics.Vectorized):
            gen_inds = clusterer.get_generator_indices()
            generators = project.load_frame(which[gen_inds, 0], which[gen_inds,
                                                                      1])
        else:
            generators = clusterer.get_generators_as_traj()

        logger.info('Saving %s', generators_fn)
        generators.save(generators_fn)

        if args.stride == 1:
            assignments = clusterer.get_assignments()
            distances = clusterer.get_distances()

            logger.info('Since stride=1, Saving %s', assignments_fn)
            logger.info('Since stride=1, Saving %s', distances_fn)
            io.saveh(assignments_fn, assignments)
            io.saveh(distances_fn, distances)
Ejemplo n.º 47
0
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)
    indices = run(args.pdb, args.atom_type)
    np.savetxt(args.output, indices, '%d')
    logger.info('Saved output to %s', args.output)
Ejemplo n.º 48
0
from msmbuilder import io
from msmbuilder.clustering import Hierarchical
from msmbuilder import arglib
import logging
logger = logging.getLogger('msmbuilder.scripts.AssignHierarchical')

parser = arglib.ArgumentParser(description='Assign data using a hierarchical clustering')
parser.add_argument('hierarchical_clustering_zmatrix', default='./Data/ZMatrix.h5',
    help='Path to hierarchical clustering zmatrix' )
parser.add_argument('num_states', help='Number of States', default='none')
parser.add_argument('cutoff_distance', help='Maximum cophenetic distance', default='none')
parser.add_argument('assignments', type=str)

def main(k, d, zmatrix_fn):
    hierarchical = Hierarchical.load_from_disk(zmatrix_fn)
    assignments = hierarchical.get_assignments(k=k, cutoff_distance=d)
    return assignments
    
if __name__ == "__main__":
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error('You need to supply either a number of states or a cutoff distance')
        sys.exit(1)
    
    assignments = main(k, d, args.hierarchical_clustering_zmatrix)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
radius is simply the average distance of all conformations in a cluster to its
generator. Does this by taking averaging the distance of each assigned state to
its generator.

Output: A flat txt file, 'ClusterRadii.dat', the average RMSD distance to the
generator, measured by what ever distance metric was used in assigning."""
    )

    parser.add_argument("assignments", type=str, default="Data/Assignments.Fixed.h5")
    parser.add_argument(
        "distances",
        help="""Path to assignment
        distances file.""",
        default="Data/Assignments.h5.distances",
    )
    parser.add_argument("output", default="ClusterRadii.dat")
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    try:
        assignments = msmbuilder.io.loadh(args.assignments, "arr_0")
        distances = msmbuilder.io.loadh(args.distances, "arr_0")
    except KeyError:
        assignments = msmbuilder.io.loadh(args.assignments, "Data")
        distances = msmbuilder.io.loadh(args.distances, "Data")

    radii = main(assignments, distances)

    np.savetxt(args.output, radii)
    logger.info("Wrote: %s", args.output)
Ejemplo n.º 50
0
def run(lag_time, assignments_list, symmetrize='MLE', input_mapping="None", 
        out_dir="./Data/"):

    # set the filenames for output
    tProb_fn = os.path.join(out_dir, "tProb.mtx")
    tCounts_fn = os.path.join(out_dir, "tCounts.mtx")
    map_fn = os.path.join(out_dir, "Mapping.dat")
    pops_fn = os.path.join(out_dir, "Populations.dat")
    if len(assignments_list) == 1:
        assignments_fn_list = [os.path.join(out_dir, "Assignments.Fixed.h5")]
    else:
        assignments_fn_list = [os.path.join(out_dir, 
                                            "Assignments.Fixed.%d.h5" % i)
                               for i in xrange(len(assignments_list))]


    # make sure none are taken
    output_list = [tProb_fn, tCounts_fn, map_fn, pops_fn] + assignments_fn_list
    arglib.die_if_path_exists(output_list)

    # if given, apply mapping to assignments
    for i in xrange(len(assignments_list)):
        if input_mapping != "None":
            MSMLib.apply_mapping_to_assignments(assignments_list[i], 
                                                input_mapping)

    n_assigns_before_trim = get_num_assignments(assignments_list)

    #num_states = np.unique(np.concatenate([ np.unique(ass[np.where(ass != -1)]) 
    #                                       for ass in assignments_list])).shape[0]

    num_states = np.max([np.max(ass) for ass in assignments_list]) + 1
    counts = MSMLib.get_count_matrix_from_assignments(assignments_list[0], 
                                                      n_states=None,
                                                      lag_time=lag_time, 
                                                      sliding_window=False)

    for i in xrange(1, len(assignments_list)):
        print i
        counts = counts + \
                 MSMLib.get_count_matrix_from_assignments(assignments_list[i],
                                                          n_states=num_states,
                                                          lag_time=lag_time,
                                                          sliding_window=False)

    rev_counts, t_matrix, populations, mapping = \
        MSMLib.build_msm(counts, symmetrize=symmetrize, ergodic_trimming=True)

    for i in xrange(len(assignments_list)):
        MSMLib.apply_mapping_to_assignments(assignments_list[i], mapping)

    n_assigns_after_trim = get_num_assignments(assignments_list)

    # if had input mapping, then update it
    if input_mapping != "None":
        mapping = mapping[input_mapping]

    # Print a statement showing how much data was discarded in trimming
    percent = (1.0 - float(n_assigns_after_trim) / 
                     float(n_assigns_before_trim)) * 100.0
    logger.warning("Ergodic trimming discarded: "
                   "%f percent of your data", percent)

    # Save all output
    scipy.io.mmwrite(tProb_fn, t_matrix)
    scipy.io.mmwrite(tCounts_fn, rev_counts)
    np.savetxt(map_fn, mapping, "%d")
    np.savetxt(pops_fn, populations)
    for i in xrange(len(assignments_fn_list)):
        assignments_fn = assignments_fn_list[i]
        assignments = assignments_list[i]
        msmbuilder.io.saveh(assignments_fn, assignments)

    for output in output_list:
        logger.info("Wrote: %s", output)

    return