def load_trjs_or_features(args): if args.features: lengths, data = load_features(args.features, stride=args.subsample) else: assert args.trajectories assert len(args.trajectories) == len(args.topologies) targets = { os.path.basename(topf): "%s files" % len(trjfs) for topf, trjfs in zip(args.topologies, args.trajectories) } logger.info("Beginning clustering; targets:\n%s", json.dumps(targets, indent=4)) with timed("Loading trajectories took %.1f s.", logger.info): lengths, xyz, select_top = load_trajectories( args.topologies, args.trajectories, selections=args.atoms, stride=args.subsample, processes=auto_nprocs()) logger.info("Clustering using %s atoms matching '%s'.", xyz.shape[1], args.atoms) # md.rmsd requires an md.Trajectory object, so wrap `xyz` in # the topology. data = md.Trajectory(xyz=xyz, topology=select_top) return lengths, data
def load_trajectories(topologies, trajectories, selections, stride, processes): for top, selection in zip(topologies, selections): sentinel_trj = md.load(top) try: # noop, but causes fast-fail w/bad args.atoms sentinel_trj.top.select(selection) except: raise exception.ImproperlyConfigured( ("The provided selection '{s}' didn't match the topology " "file, {t}").format(s=selection, t=top)) flat_trjs = [] configs = [] n_inds = None for topfile, trjset, selection in zip(topologies, trajectories, selections): top = md.load(topfile).top indices = top.select(selection) if n_inds is not None: if n_inds != len(indices): raise exception.ImproperlyConfigured( ("Selection on topology %s selected %s atoms, but " "other selections selected %s atoms.") % (topfile, len(indices), n_inds)) n_inds = len(indices) for trj in trjset: flat_trjs.append(trj) configs.append({ 'top': top, 'stride': stride, 'atom_indices': indices, }) logger.info( "Loading %s trajectories with %s atoms using %s processes " "(subsampling %s)", len(flat_trjs), len(top.select(selection)), processes, stride) assert len(top.select(selection)) > 0, "No atoms selected for clustering" with timed("Loading took %.1f sec", logger.info): lengths, xyz = mpi.io.load_trajectory_as_striped( flat_trjs, args=configs, processes=auto_nprocs()) with timed("Turned over array in %.2f min", logger.info): tmp_xyz = xyz.copy() del xyz xyz = tmp_xyz logger.info("Loaded %s frames.", len(xyz)) return lengths, xyz, top.subset(top.select(selection))
def process_command_line(argv): '''Parse the command line and do a first-pass on processing them into a format appropriate for the rest of the script.''' parser = argparse.ArgumentParser(formatter_class=argparse. ArgumentDefaultsHelpFormatter) parser.add_argument( "--assignments", required=True, help="File containing assignments to states.") parser.add_argument( "--n-eigenvalues", default=5, type=int, help="Number of eigenvalues to compute for each lag time.") parser.add_argument( "--lag-times", default="5:100:2", help="List of lagtimes (in frames) to compute eigenspectra for. " "Format is min:max:step.") parser.add_argument( "--symmetrization", default="transpose", choices=['transpose', 'row_normalize', 'prior_counts'], help="The method to use to fit transition probabilities from " "the transition counts matrix.") parser.add_argument( "--trj-ids", default=None, help="Computed the implied timescales for only the given " "trajectory ids. This is useful for handling assignments " "for shared state space clusterings.") parser.add_argument( "--processes", default=max(1, auto_nprocs()/4), type=int, help="Number of processes to use. Because eigenvector " "decompositions are thread-parallelized, this should " "usually be several times smaller than the number of " "cores availiable on your machine.") parser.add_argument( "--trim", default=False, action="store_true", help="Turn ergodic trimming on.") parser.add_argument( "--timestep", default=None, type=float, help='A conversion between frames and nanoseconds (i.e. frames ' 'per nanosecond) to scale the axes to physical units ' '(rather than frames).') parser.add_argument( "--infer-timestep", default=None, help="An example trajectory from which to infer the conversion " "from frame to nanoseconds.") parser.add_argument( "--plot", default=None, help="Path for the implied timescales plot.") parser.add_argument( "--logscale", action='store_true', help="Flag to output y-axis log scale plot.") args = parser.parse_args(argv[1:]) args.lag_times = range(*map(int, args.lag_times.split(':'))) if args.trj_ids is not None: args.trj_ids = slice(*map(int, args.trj_ids.split(':'))) if args.symmetrization == 'prior_counts': args.symmetrization = prior_counts else: args.symmetrization = getattr(builders, args.symmetrization) return args
def process_command_line(argv): '''Parse the command line and do a first-pass on processing them into a format appropriate for the rest of the script.''' parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="Compute CARDS matricies for a set of trajectories " "and save all matrices and dihedral mappings.\n \n" "Please cite the following papers if you use CARDS with enspara:\n" "[1] Singh, S. and Bowman, G.R.\n" " Journal of Chemical Theory and Computation\n" " 2017 13 (4), 1509-1517\n" " DOI: 10.1021/acs.jctc.6b01181\n" "\n" "[2] Porter,J.R., Zimmerman, M.I., and Bowman G.R.\n" " bioRxiv 431072; doi: https://doi.org/10.1101/431072\n") # INPUTS input_args = parser.add_argument_group("Input Settings") #input_data_group = parser.add_mutually_exclusive_group(required=True) input_args.add_argument( '--trajectories', required=True, nargs="+", action='append', help="List of paths to aligned trajectory files to cluster. " "All file types that MDTraj supports are supported here.") input_args.add_argument('--topology', required=True, action='append', help="The topology file for the trajectories.") # PARAMETERS cards_args = parser.add_argument_group("CARDS Settings") cards_args.add_argument( '--buffer-size', default=15, type=int, help="Size of buffer zone between rotameric states, in degrees.") cards_args.add_argument("--processes", default=max(1, auto_nprocs() / 4), type=int, help="Number of processes to use.") # OUTPUT output_args = parser.add_argument_group("Output Settings") output_args.add_argument( '--matrices', required=True, action=readable_dir, help="The folder location to write the four CARDS matrices (as pickle)." ) output_args.add_argument( '--indices', required=True, action=readable_dir, help="The location to write the dihedral indices file (as CSV).") args = parser.parse_args(argv[1:]) # CARDS FEATURES if not (0 < args.buffer_size < 360): raise exception.ImproperlyConfigured( "The given buffer size (%s) is not possible." % args.buffer_size) return args