def get_trajectory_lengths(self, keeplast=False, pool=None): """ @param keeplast=False (boolean): keep the frame between generations @param pool=DEFAULT_POOL (Pool) """ _logger.debug('Project.get_trajectory_lengths: self.outputfreq = %s' % self.outputfreq) pool = ezpool.get_pool(pool) if type(self.outputfreq) is not float or self.outputfreq <= 0: raise ValueError, 'I need to know the output frequency' myfn = functools.partial(_get_traj_lengths, self.outputfreq, keeplast=keeplast) result = pool.map(myfn, self.get_trajectories()) return result
def process_trajectories(proj, fn, pool=None): """ Map a function over the *Trajectories* in a *Project* @param proj (Project) @param fn (Trajectory -> r: a function accepting a single argument of type *Trajectory*) @param pool=DEFAULT_POOL (Pool) @return (sequence of r) """ pool = ezpool.get_pool(pool) func = functools.partial(_process_trajectories_processor, fn) _logger.info('Processing trajectories') results = pool.map(func, proj.get_trajectories()) return results
def map(self, fn, pool=None): """ Map a function over the values in the project @param fn (a -> b) @param pool=DEFAULT_POOL (Pool) @return a new transformed project """ pool = ezpool.get_pool(pool) _logger.info('Applying function %s to project' % fn) p = Project(outputfreq=self.outputfreq, description=self.description, extrafiles=self.extrafiles) trajs = self.get_trajectories() mapper = functools.partial(_trajectory_map, fn=fn) for t2 in pool.map(mapper, trajs): p.add_trajectory(t2.run, t2.clone, t2) return p
def load_project(root, runs=None, clones=None, gens=None, pool=None, coalesce=False, chunksize=None, **initprojkws): """ Reads the data into a Project object. @param root (string): The root to the analysis directory. For example, given a file analysis/rmsd/C-alpha/RUN1234/CLONE4567/GEN4242.dat, root would be 'analysis/rmsd/C-alpha' @param pool=DEFAULT_POOL (Pool): The pool of processors to use. By default a new *Pool* is created and destroyed on completion, unless one is provided. @param coalesce=False (boolean): Coalesce the project trajectories. @param runs=None (list of ints): list of runs to load @param clones=None (list of ints): a list of clones to load @param gens=None (list of ints): a list of generations to load @param **initprojkws: parameters to pass to the Project constructor @return (Project) """ _logger.debug('load_project: initprojkws=%s' % initprojkws) pool = ezpool.get_pool(pool) def filter_rcg(paths, runs, clones, gens): runs = runs or [] clones = clones or [] gens = gens or [] runsp = map(lambda v: rcg_path_name('RUN', v) , runs) clonesp = map(lambda v: rcg_path_name('CLONE', v), clones) gensp = map(lambda v: rcg_path_name('GEN', v) , gens) for p in paths: oks = [len(runs) < 1, len(clones) < 1, len(gens) < 1] for pat in runsp: if pat in p: oks[0] = True break for pat in clonesp: if pat in p: oks[1] = True break for pat in gensp: if pat in p: oks[2] = True break if all(oks): yield p _logger.info('Searching for data in %s' % root) myglob = os.path.join(root, 'RUN*', 'CLONE*', 'GEN*.dat') data_itr = glob.iglob(myglob) data_itr = filter_rcg(data_itr, runs, clones, gens) ## load the project data _logger.info('Loading data') myfn = functools.partial(_load_project_processor) _logger.debug('load_project: loadfn: %s' % myfn) projects = pool.map(myfn, data_itr) ## reduce to a single Project instance _logger.info('Accumulating project data') project = _merge_projects_seq(projects, **initprojkws) ## load the number of runs/clones/gens _logger.info('Reading the number of runs/clones/gens') rcgpath = os.path.join(root, project._rcg_file) if os.path.exists(rcgpath): with open(rcgpath) as fd: rcgs = fd.readline().strip().split() rs,cs,gs = map(lambda s: int(s.strip()), rcgs) project.runs = rs project.clones = cs project.gens = gs else: _logger.warning('Cannot find number of runs/clones/gens at %s' % rcgpath) ## load the metadata _logger.info('Loading metadata') mdpath = os.path.join(root, project._metadatafile) if os.path.exists(mdpath): with open(mdpath) as fd: for line in itertools.imap(str.strip, fd): splitted = line.split('=') # the values may have '=' that would have be split k, v = splitted[0], '='.join(splitted[1:]) k, v = map(str.strip, (k,v)) project.add_metadata(k, v) else: _logger.warning('Cannot find metadata file %s' % mdpath) _logger.debug('_load_project: loaded metadata: %s' % project.metadata) ## load the description descfile = os.path.join(root, project._descfile) _logger.info('Loading description') if os.path.exists(descfile): with open(descfile) as fd: desc = fd.read() project.set_description(desc) else: _logger.warning('Cannot find description file %s' % descfile) ## load the extra files extrasdir = os.path.join(root, project._extradir) _logger.info('Loading extra files') if os.path.exists(extrasdir): files = os.listdir(extrasdir) project.set_extrafiles(files) else: _logger.warning('Cannot find extrafiles directory %s' % extrasdir) if coalesce: _logger.info('Coalescing project') project.coalesce() return project
def write(self, root, pool=None): """ Write the project out to a root directory. This creates the root/RUNXXXX/CLONEYYYY/GENZZZZ.dat files. @param root (string): the root under which the RUN/CLONE/GEN files will be created @param pool=DEFAULT_POOL (Pool): The *Pool* to used (default with 1 processor) Example: root = '/tmp/testroot' myproject.write(root) # results in /tmp/testroot/RUN1234/CLONE5678/GEN9012.dat, etc """ _logger.info('Saving project under %s' % root) pool = ezpool.get_pool(pool) ## write the data for run, rundata in self.projdata.iteritems(): for clone, traj in rundata.iteritems(): dirname = os.path.join(root, rcg_path_name('RUN',run), rcg_path_name('CLONE',clone)) if not os.path.exists(dirname): os.makedirs(dirname) # force evaluation list(pool.map(functools.partial(_save_gen, dirname, traj), traj.get_generations())) ## write the number of runs, clones, and generations if self.runs > 0 and self.clones > 0 and self.gens > 0: rcgpath = os.path.join(root, self._rcg_file) with open(rcgpath, 'w') as fd: fd.write('%d %d %d' % (self.runs, self.clones, self.gens)) _logger.info('Wrote the number of runs (%s), clones (%d), and gens (%d) to %s' % (self.runs, self.clones, self.gens, rcgpath)) else: _logger.warning('Project: (RUNs, CLONEs, GENs) are (%d,%d,%d)' % (self.runs, self.clones, self.gens)) ## write the metadata if self.metadata: mdpath = os.path.join(root, self._metadatafile) with open(mdpath, 'w') as fd: for k, v in self.metadata.iteritems(): fd.write('%s = %s\n' % (k, v)) _logger.info('Wrote metadata to %s' % mdpath) else: _logger.warning('Project: no metadata provided') ## write the description if self.description: with open(os.path.join(root, self._descfile), 'w') as fd: fd.write(self.description) _logger.info('Wrote description') else: _logger.warning('Project: no description provided') ## copy the extra files if self.extrafiles and len(set(self.extrafiles)) == len(self.extrafiles): outdir = os.path.join(root, self._extradir) if not os.path.exists(outdir): os.makedirs(outdir) _logger.info('Created %s' % outdir) for p in self.extrafiles: outname = os.path.basename(p) target = os.path.join(outdir, outname) if os.path.exists(target): _logger.warning('Extrafile %s already exists: skipping' % target) continue shutil.copy(p, outdir) _logger.info('Copied %s to %s' % (p, target))