def scale_frames(frames, frame_files, iparams): """scale frames""" avg_mode = 'average' if iparams.flag_apply_b_by_frame: mean_of_mean_I = 0 else: #Calculate <I> for each frame frame_args = [(frame_file, iparams, avg_mode) for frame_file in frame_files] determine_mean_I_result = pool_map(iterable=frame_args, func=determine_mean_I_mproc, processes=iparams.n_processors) frames_mean_I = flex.double() for result in determine_mean_I_result: if result is not None: mean_I, txt_out_result = result if mean_I is not None: frames_mean_I.append(mean_I) mean_of_mean_I = np.median(frames_mean_I) #use the calculate <mean_I> to scale each frame frame_args = [(frame_no, frame_file, iparams, mean_of_mean_I, avg_mode) for frame_no, frame_file in zip(frames, frame_files)] scale_frame_by_mean_I_result = pool_map(iterable=frame_args, func=scale_frame_by_mean_I_mproc, processes=iparams.n_processors) observations_merge_mean_set = [] for result in scale_frame_by_mean_I_result: if result is not None: pres, txt_out_result = result if pres is not None: observations_merge_mean_set.append(pres) return observations_merge_mean_set
def eval_parallel(data, func_wrapper="simple", index_args=True, log=None, exercise_out_of_range=False, exercise_fail=False): size = len(data.array) args = range(size) if (exercise_out_of_range): args.append(size) from libtbx import easy_mp if (exercise_fail): mp_results = easy_mp.pool_map(func=data, args=args) else: if (func_wrapper == "simple" and exercise_out_of_range): func_wrapper = "buffer_stdout_stderr" mp_results = easy_mp.pool_map(fixed_func=data, args=args, func_wrapper=func_wrapper, index_args=index_args, log=log) if (not exercise_out_of_range): assert mp_results == range(3, size + 3) else: assert mp_results[:size] == zip([""] * size, range(3, size + 3)) assert mp_results[size][0].startswith("CAUGHT EXCEPTION:") assert mp_results[size][0].find("IndexError: ") > 0 assert mp_results[size][1] is None
def run(opts, files): if len(files) == 1 and files[0].endswith(".lst"): files = read_path_list(files[0]) make_geom(files[0], os.path.basename(files[0]) + ".geom") easy_mp.pool_map(fixed_func=run_each, args=files, processes=opts.nproc)
def eval_parallel( data, func_wrapper="simple", index_args=True, log=None, exercise_out_of_range=False, exercise_fail=False): size = len(data.array) args = range(size) if (exercise_out_of_range): args.append(size) from libtbx import easy_mp if (exercise_fail): mp_results = easy_mp.pool_map(func=data, args=args) else: if (func_wrapper == "simple" and exercise_out_of_range): func_wrapper = "buffer_stdout_stderr" mp_results = easy_mp.pool_map( fixed_func=data, args=args, func_wrapper=func_wrapper, index_args=index_args, log=log) if (not exercise_out_of_range): assert mp_results == range(3, size+3) else: assert mp_results[:size] == zip([""]*size, range(3, size+3)) assert mp_results[size][0].startswith("CAUGHT EXCEPTION:") assert mp_results[size][0].find("IndexError: ") > 0 assert mp_results[size][1] is None
def run(opts, files): if len(files) == 1 and files[0].endswith(".lst"): files = read_path_list(files[0]) make_geom(files[0], os.path.basename(files[0])+".geom") easy_mp.pool_map(fixed_func=run_each, args=files, processes=opts.nproc)
def calc_cchalf_by_removing(wdir, inp_head, inpfiles, with_sigma=False, stat_bin="total", nproc=1, nproc_each=None, batchjobs=None): assert not with_sigma # Not supported now assert stat_bin in ("total", "outer") if not os.path.exists(wdir): os.makedirs(wdir) datout = open(os.path.join(wdir, "cchalf.dat"), "w") datout.write("idx exfile cc1/2(%s) Nuniq\n" % stat_bin) cchalf_list = [] # (i_ex, CC1/2, Nuniq) # Prep runs tmpdirs = map( lambda x: _calc_cchalf_by_removing_worker_1(wdir, inp_head, inpfiles, x, nproc_each), xrange(len(inpfiles))) # Run XSCALE if batchjobs is not None: jobs = [] for tmpdir in tmpdirs: job = batchjob.Job(tmpdir, "xscale.sh", nproc=nproc_each) job.write_script(xscale_comm) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) else: easy_mp.pool_map(fixed_func=lambda x: util.call(xscale_comm, wdir=x), args=tmpdirs, processes=nproc) # Finish runs cchalf_list = map( lambda x: _calc_cchalf_by_removing_worker_2(wdir, x[1], x[0], stat_bin ), enumerate(tmpdirs)) for iex, cchalf_exi, nuniq in cchalf_list: datout.write("%3d %s %.4f %d\n" % (iex, inpfiles[iex], cchalf_exi, nuniq)) cchalf_list.sort(key=lambda x: -x[1]) print print "# Sorted table" for idx, cch, nuniq in cchalf_list: print "%3d %-.4f %4d %s" % (idx, cch, nuniq, inpfiles[idx]) # Remove unuseful (failed) data cchalf_list = filter(lambda x: x[1] == x[1], cchalf_list) return cchalf_list
def run(params): input_files = get_file_list(params.lstin) top_dirs = map( lambda i: os.path.join(params.topdir, "split_%.4d" % (i // params.split_num + 1)), xrange(len(input_files)) ) data_root_dir = os.path.dirname(os.path.commonprefix(input_files)) fun_local = lambda x: xds_sequence(x[0], x[1], data_root_dir, params) # for arg in input_files: fun_local(arg) easy_mp.pool_map(fixed_func=fun_local, args=zip(input_files, top_dirs), processes=params.nproc)
def run(params): input_files = get_file_list(params.lstin) top_dirs = map(lambda i: os.path.join(params.topdir, "split_%.4d" % (i//params.split_num+1)), xrange(len(input_files))) data_root_dir = os.path.dirname(os.path.commonprefix(input_files)) fun_local = lambda x: xds_sequence(x[0], x[1], data_root_dir, params) #for arg in input_files: fun_local(arg) easy_mp.pool_map(fixed_func=fun_local, args=zip(input_files, top_dirs), processes=params.nproc)
def run(h5_files): if len(h5_files) == 0: return if len(h5_files) > 1: easy_mp.pool_map(fixed_func=convert, args=h5_files, processes=nproc) else: h5in = h5_files[0] tags = h5py.File(h5in, "r").keys() fun = lambda x: convert_single(h5in, root="/%s" % x, cbfout="%s_%s.cbf" % (os.path.basename(h5in), x)) for tag in tags: fun(tag) return # parallel reading of single file seems buggy.. easy_mp.pool_map(fixed_func=fun, args=tags, processes=nproc)
def compute_functional_and_gradients(O): O.image_mdls.reset_scales(all_scales=O.x) f = O.image_mdls.refinement_target( O.work_params.usable_partiality_threshold) if (O.initial_functional is None): O.initial_functional = f O.number_of_function_evaluations += 1 n_mdls = O.x.size() from scitbx.array_family import flex g = flex.double() g.reserve(n_mdls) eps = O.work_params.refine_scales.finite_difference_eps if (not O.work_params.multiprocessing or n_mdls < 2): for im,x in zip(O.image_mdls.array, O.x): im.scale = x+eps f_eps = O.image_mdls.refinement_target( O.work_params.usable_partiality_threshold) im.scale = x g.append((f_eps-f)/eps) else: from libtbx import easy_mp mp_results = easy_mp.pool_map( fixed_func=refinement_target_eps( O.image_mdls, O.work_params.usable_partiality_threshold, eps), args=range(n_mdls), chunksize=1, log=sys.stdout) g.resize(n_mdls) for i,f_eps in mp_results: g[i] = (f_eps-f)/eps print "refine scale f, |g|: %.6g, %.6g" % (f, g.norm()) sys.stdout.flush() return f, g
def mp(): ij_list = [] for i_rem in xrange(n_imgs): for j_rem in xrange(i_rem+1, n_imgs): ij_list.append((i_rem,j_rem)) n_chunks = len(ij_list) // chunk_size print "Number of chunks for computing cluster pairs:", n_chunks print def process_chunk(i_chunk): for j_chunk in xrange(chunk_size): i = i_chunk * chunk_size + j_chunk if (i == len(ij_list)): break i_rem, j_rem = ij_list[i] process_cp(i_rem, j_rem) return cluster_pairs from libtbx import easy_mp mp_results = easy_mp.pool_map( fixed_func=process_chunk, args=range(n_chunks), chunksize=1, log=sys.stdout) for cps in mp_results: for main,sub in zip(cluster_pairs,cps): main.update(sub)
def run_parallel( method='multiprocessing', # multiprocessing, only choice for now qsub_command='qsub', # queue command, not supported yet nproc=1, # number of processors to use target_function=None, # the method to run kw_list=None): # list of kw dictionaries for target_function n=len(kw_list) # number of jobs to run, one per kw dict if nproc==1 or n<=1: # just run it for each case in list, no multiprocessing results=[] ra=run_anything(kw_list=kw_list,target_function=target_function) for i in xrange(n): results.append(ra(i)) elif 0: #(method == "multiprocessing") and (sys.platform != "win32") : # XXX Can crash 2015-10-13 TT so don't use it from libtbx.easy_mp import pool_map results = pool_map( func=run_anything(target_function=target_function,kw_list=kw_list), iterable=xrange(n), processes=nproc) else : from libtbx.easy_mp import parallel_map results=parallel_map( func=run_anything(target_function=target_function,kw_list=kw_list), iterable=xrange(n), method=method, processes=nproc, callback=None, preserve_exception_message=True, # 2016-08-17 qsub_command=qsub_command, use_manager=True )# Always use manager 2015-10-13 TT (sys.platform == "win32")) return results
def run(params): if params.datout is None: params.datout = os.path.basename(params.lstin)+".dat" xac_files = read_path_list(params.lstin) ofs_dat = open(params.datout, "w") ref_v6cell = None if params.ref_cell is not None: ref_v6cell = v6cell(uctbx.unit_cell(params.ref_cell).niggli_cell()) ofs_dat.write("# ref_cell= %s\n" % params.ref_cell) if params.n_residues is not None: ofs_dat.write("# n_residues= %d\n" % params.n_residues) ofs_dat.write("file ioversigma resnatsnr1 wilsonb abdist a b c al be ga\n") ret = easy_mp.pool_map(fixed_func=lambda x: calc_stats(x, params.stats, params.n_residues, ref_v6cell, params.min_peak, params.min_peak_percentile, params.correct_peak), args=xac_files, processes=params.nproc) for stat in ret: getornan = lambda x: stat.get(x, float("nan")) # get or nan ofs_dat.write("%s %.3f %.3f %.3f %.3e"%(stat["filename"], getornan("ioversigma"), getornan("resnatsnr1"), getornan("wilsonb"), getornan("abdist"))) ofs_dat.write(" %.3f %.3f %.3f %.2f %.2f %.2f\n" % stat["cell"]) ofs_dat.close()
def __init__( self, pdb_hierarchy, ensemble_xrs, restraints_manager, target_bond_rmsd, target_angle_rmsd, map_data, weight, nproc): adopt_init_args(self, locals()) self.crystal_symmetry = self.ensemble_xrs[0].crystal_symmetry() # initialize states collector self.states = mmtbx.utils.states( pdb_hierarchy = self.pdb_hierarchy.deep_copy()) # run minimization if(self.nproc>1): from libtbx import easy_mp stdout_and_results = easy_mp.pool_map( processes = self.nproc, fixed_func = self.run, args = self.ensemble_xrs, func_wrapper = "buffer_stdout_stderr") for so, sites_cart in stdout_and_results : self.states.add(sites_cart = sites_cart) else: for xrs in self.ensemble_xrs: sites_cart = self.run(xray_structure=xrs) self.states.add(sites_cart = sites_cart)
def run_one(self, selection=None): model = self.mmm.model() if (selection is not None): model = model.select(selection) values = model.get_b_iso() model.get_hierarchy().atoms().reset_i_seq() if (self.nproc == 1): args = [ model, ] return self.run_one_one(args=args) else: argss = [] selections = [] for c in model.get_hierarchy().chains(): sel = c.atoms().extract_i_seq() argss.append([ model.select(sel), ]) selections.append(sel) # XXX CAN BE BIG stdout_and_results = easy_mp.pool_map( processes=self.nproc, fixed_func=self.run_one_one, args=argss, func_wrapper="buffer_stdout_stderr") #values = model.get_b_iso() for i, result in enumerate(stdout_and_results): values = values.set_selected(selections[i], result[1]) model.set_b_iso(values=values) return values
def run(params): xds_dirs = [] print "Found xds directories:" for root, dirnames, filenames in os.walk(params.topdir, followlinks=True): if "XDS.INP" in filenames: print "", os.path.relpath(root, params.topdir) xds_dirs.append(root) print print "Start running.." npar = util.get_number_of_processors( ) if params.nproc is None else params.nproc fun_local = lambda x: evaluate_run(x) #, params) easy_mp.pool_map(fixed_func=fun_local, args=xds_dirs, processes=npar)
def postrefine_frames(i_iter, frames, frame_files, iparams, pres_set, miller_array_ref, avg_mode): """postrefine given frames and previous postrefinement results""" miller_array_ref = miller_array_ref.generate_bijvoet_mates() txt_merge_postref = 'Post-refinement cycle ' + str( i_iter + 1) + ' (' + avg_mode + ')\n' txt_merge_postref += ' * R and CC show percent change.\n' print txt_merge_postref frame_args = [ (frame_no, frame_file, iparams, miller_array_ref, pres_in, avg_mode) for frame_no, frame_file, pres_in in zip(frames, frame_files, pres_set) ] postrefine_by_frame_result = pool_map(iterable=frame_args, func=postrefine_by_frame_mproc, processes=iparams.n_processors) postrefine_by_frame_good = [] postrefine_by_frame_pres_list = [] for results in postrefine_by_frame_result: if results is not None: pres, txt_out_result = results postrefine_by_frame_pres_list.append(pres) if pres is not None: postrefine_by_frame_good.append(pres) else: postrefine_by_frame_pres_list.append(None) return postrefine_by_frame_good, postrefine_by_frame_pres_list, txt_merge_postref
def run(params): if params.datout is None: params.datout = os.path.basename(params.lstin) + ".dat" xac_files = read_path_list(params.lstin) ofs_dat = open(params.datout, "w") ref_v6cell = None if params.ref_cell is not None: ref_v6cell = v6cell(uctbx.unit_cell(params.ref_cell).niggli_cell()) ofs_dat.write("# ref_cell= %s\n" % params.ref_cell) if params.n_residues is not None: ofs_dat.write("# n_residues= %d\n" % params.n_residues) ofs_dat.write("file ioversigma resnatsnr1 wilsonb abdist a b c al be ga\n") ret = easy_mp.pool_map(fixed_func=lambda x: calc_stats( x, params.stats, params.n_residues, ref_v6cell, params.min_peak, params .min_peak_percentile, params.correct_peak), args=xac_files, processes=params.nproc) for stat in ret: getornan = lambda x: stat.get(x, float("nan")) # get or nan ofs_dat.write( "%s %.3f %.3f %.3f %.3e" % (stat["filename"], getornan("ioversigma"), getornan("resnatsnr1"), getornan("wilsonb"), getornan("abdist"))) ofs_dat.write(" %.3f %.3f %.3f %.2f %.2f %.2f\n" % stat["cell"]) ofs_dat.close()
def run(cbf_files, params): print "Attention - assuming cbf files given belong to a single dataset" print print "%d cbf files were given." % len(cbf_files) print if params.byteoffset: import yamtbx_byteoffset_h5_ext import pyublas last_shape = easy_mp.pool_map(fixed_func=lambda x: convert(x, params), args=cbf_files, processes=params.nproc)[-1] if params.decompose: make_geom_decomposed( XIO.Image(cbf_files[0]).header, last_shape, params.geom_out) else: make_geom(XIO.Image(cbf_files[0]).header, params.geom_out) make_beam(params.beam_out) print "Done." print print "Check %s and %s!" % (params.geom_out, params.beam_out)
def run_one(self, selection=None): model = self.mmm.model() if (selection is not None): model = model.select(selection) values = model.get_b_iso() model.get_hierarchy().atoms().reset_i_seq() if (self.nproc == 1): args = [ model, ] return self.run_one_one(args=args) else: argss = [] selections = [] for sel in model.macromolecule_plus_hetatms_by_chain_selections(): model_i = model.select(sel) if (model_i.size() == 1): chain_ids = " ".join( [c.id for c in model_i.get_hierarchy().chains()]) print("Skip one atom model, chains: (%s)" % chain_ids, file=self.log) continue argss.append([ model_i, ]) selections.append(sel) # XXX CAN BE BIG stdout_and_results = easy_mp.pool_map( processes=self.nproc, fixed_func=self.run_one_one, args=argss, func_wrapper="buffer_stdout_stderr") for i, result in enumerate(stdout_and_results): values = values.set_selected(selections[i], result[1]) model.set_b_iso(values=values) return values
def run(cmdargs): if (len(cmdargs) == 1): alg = cmdargs[0] assert alg in ["alg0", "alg2", "alg4", "alg4a"] if alg == "None": alg = None NPROC = 70 pdbs, mtzs, codes, sizes = get_files_sorted(pdb_files, hkl_files) argss = [] for pdb, mtz, code in zip(pdbs, mtzs, codes): if (os.path.isfile("%s.log" % code) and os.path.isfile("%s.pkl" % code) and os.path.isfile("%s_mc.mtz" % code)): continue argss.append([pdb, mtz, code, alg]) if (NPROC > 1): stdout_and_results = easy_mp.pool_map( processes=NPROC, fixed_func=run_one, args=argss, func_wrapper="buffer_stdout_stderr") else: for args in argss: run_one(args) else: assert len(cmdargs) == 3 # Usage: python example.py 4qnn.pdb 4qnn.mtz alg4 pdb, mtz, alg = cmdargs assert alg in ["alg0", "alg2", "alg4", "alg4a"] if alg == "None": alg = None assert os.path.isfile(pdb) assert os.path.isfile(mtz) code = os.path.abspath(pdb)[:-4] run_one([pdb, mtz, code, alg])
def __init__( self, pdb_hierarchies, # XXX these must be single-conformer reference_hierarchy=None, nproc=Auto, log=sys.stdout): self.pdb_hierarchies = pdb_hierarchies self.selection_caches = [ h.atom_selection_cache() for h in pdb_hierarchies ] if (reference_hierarchy == None): reference_hierarchy = self.pdb_hierarchies[0] self.residue_ids = [] self.residue_id_dict = {} for chain in reference_hierarchy.only_model().chains(): if (not chain.is_protein()): # TODO continue for residue_group in chain.residue_groups(): residue = residue_group.only_atom_group() id_str = residue.id_str() self.residue_ids.append(id_str) self.residue_id_dict[id_str] = len(self.residue_ids) - 1 self.residue_ensembles = [list([]) for id_str in self.residue_ids] for hierarchy in pdb_hierarchies: for chain in hierarchy.only_model().chains(): if (not chain.is_protein()): continue for residue_group in chain.residue_groups(): residue = residue_group.only_atom_group() id_str = residue.id_str() i_res = self.residue_id_dict.get(id_str) assert (i_res is not None) self.residue_ensembles[i_res].append(residue) self.validations = easy_mp.pool_map( fixed_func=self.validate_single_model, iterable=range(len(pdb_hierarchies)), processes=nproc) rama_by_residue = combine_model_validation_results( validation_objects=[rama for rama, rota in self.validations], ensemble_result_class=ramalyze.ramachandran_ensemble, residue_ids=self.residue_ids, ignore_unexpected_residues=False, log=log) rota_by_residue = combine_model_validation_results( validation_objects=[rota for rama, rota in self.validations], ensemble_result_class=rotalyze.rotamer_ensemble, residue_ids=self.residue_ids, ignore_unexpected_residues=False, log=log) assert len(rama_by_residue) == len(rota_by_residue) == len( self.residue_ids) self.residue_data = [] for i_res, id_str in enumerate(self.residue_ids): residues = self.residue_ensembles[i_res] self.residue_data.append( residue_analysis(id_str=id_str, residues=residues, rama=rama_by_residue[i_res], rota=rota_by_residue[i_res]))
def run(params): xds_dirs = [] print "Found xds directories:" for root, dirnames, filenames in os.walk(params.topdir, followlinks=True): if "XDS.INP" in filenames: print "", os.path.relpath(root, params.topdir) xds_dirs.append(root) print print "Start running.." npar = util.get_number_of_processors() if params.nproc is None else params.nproc fun_local = lambda x: evaluate_run(x)#, params) easy_mp.pool_map(fixed_func=fun_local, args=xds_dirs, processes=npar)
def run(h5_files): if len(h5_files) == 0: return if len(h5_files) > 1: easy_mp.pool_map(fixed_func=convert, args=h5_files, processes=nproc) else: h5in = h5_files[0] tags = h5py.File(h5in, "r").keys() fun = lambda x: convert_single(h5in, root="/%s"%x, cbfout="%s_%s.cbf" % (os.path.basename(h5in), x)) for tag in tags: fun(tag) return # parallel reading of single file seems buggy.. easy_mp.pool_map(fixed_func=fun, args=tags, processes=nproc)
def rescale_with_specified_symm(topdir, dirs, symms, out, sgnum=None, reference_symm=None, nproc=1, prep_dials_files=False, copyto_root=None): assert (sgnum, reference_symm).count(None) == 1 if sgnum is not None: sgnum_laue = sgtbx.space_group_info(sgnum).group( ).build_derived_reflection_intensity_group(False).type().number() matches = filter( lambda x: x.reflection_intensity_symmetry(False).space_group_info( ).type().number() == sgnum_laue, symms) matched_cells = numpy.array( map(lambda x: x.unit_cell().parameters(), matches)) median_cell = map(lambda x: numpy.median(matched_cells[:, x]), xrange(6)) reference_symm = crystal.symmetry(median_cell, sgnum) else: sgnum = reference_symm.space_group_info().type().number() sgnum_laue = reference_symm.space_group( ).build_derived_reflection_intensity_group(False).type().number() print >> out print >> out, "Re-scaling with specified symmetry:", reference_symm.space_group_info( ).symbol_and_number() print >> out, " reference cell:", reference_symm.unit_cell() print >> out print >> out out.flush() st_time = time.time() wd_ret = [] if copyto_root: for wd in dirs: assert wd.startswith(os.path.join(topdir, "")) tmp = os.path.join(copyto_root, os.path.relpath(wd, topdir)) if not os.path.exists(tmp): os.makedirs(tmp) wd_ret.append(tmp) else: wd_ret = dirs ret = easy_mp.pool_map( fixed_func=lambda x: rescale_with_specified_symm_worker( x, topdir, out, reference_symm, sgnum, sgnum_laue, prep_dials_files ), args=zip(symms, dirs, wd_ret), processes=nproc) cells = dict(filter(lambda x: x[1] is not None, ret)) # cell and file print >> out, "\nTotal wall-clock time for reindexing: %.2f sec (using %d cores)." % ( time.time() - st_time, nproc) return cells, reference_symm
def calc_cchalf_by_removing(wdir, inp_head, inpfiles, with_sigma=False, stat_bin="total", nproc=1, nproc_each=None, batchjobs=None): assert not with_sigma # Not supported now assert stat_bin in ("total", "outer") if not os.path.exists(wdir): os.makedirs(wdir) datout = open(os.path.join(wdir, "cchalf.dat"), "w") datout.write("idx exfile cc1/2(%s) Nuniq\n" % stat_bin) cchalf_list = [] # (i_ex, CC1/2, Nuniq) # Prep runs tmpdirs = map(lambda x: _calc_cchalf_by_removing_worker_1(wdir, inp_head, inpfiles, x, nproc_each), xrange(len(inpfiles))) # Run XSCALE if batchjobs is not None: jobs = [] for tmpdir in tmpdirs: job = batchjob.Job(tmpdir, "xscale.sh", nproc=nproc_each) job.write_script(xscale_comm) batchjobs.submit(job) jobs.append(job) batchjobs.wait_all(jobs) else: easy_mp.pool_map(fixed_func=lambda x: util.call(xscale_comm, wdir=x), args=tmpdirs, processes=nproc) # Finish runs cchalf_list = map(lambda x: _calc_cchalf_by_removing_worker_2(wdir, x[1], x[0], stat_bin), enumerate(tmpdirs)) for iex, cchalf_exi, nuniq in cchalf_list: datout.write("%3d %s %.4f %d\n" % (iex, inpfiles[iex], cchalf_exi, nuniq)) cchalf_list.sort(key=lambda x: -x[1]) print print "# Sorted table" for idx, cch, nuniq in cchalf_list: print "%3d %-.4f %4d %s" % (idx, cch, nuniq, inpfiles[idx]) # Remove unuseful (failed) data cchalf_list = filter(lambda x: x[1]==x[1], cchalf_list) return cchalf_list
def __init__ (self, pdb_hierarchies, # XXX these must be single-conformer reference_hierarchy=None, nproc=Auto, log=sys.stdout) : self.pdb_hierarchies = pdb_hierarchies self.selection_caches = [h.atom_selection_cache() for h in pdb_hierarchies] if (reference_hierarchy == None) : reference_hierarchy = self.pdb_hierarchies[0] self.residue_ids = [] self.residue_id_dict = {} for chain in reference_hierarchy.only_model().chains() : if (not chain.is_protein()) : # TODO continue for residue_group in chain.residue_groups() : residue = residue_group.only_atom_group() id_str = residue.id_str() self.residue_ids.append(id_str) self.residue_id_dict[id_str] = len(self.residue_ids) - 1 self.residue_ensembles = [ list([]) for id_str in self.residue_ids ] for hierarchy in pdb_hierarchies : for chain in hierarchy.only_model().chains() : if (not chain.is_protein()) : continue for residue_group in chain.residue_groups() : residue = residue_group.only_atom_group() id_str = residue.id_str() i_res = self.residue_id_dict.get(id_str) assert (i_res is not None) self.residue_ensembles[i_res].append(residue) self.validations = easy_mp.pool_map( fixed_func=self.validate_single_model, iterable=range(len(pdb_hierarchies)), processes=nproc) rama_by_residue = combine_model_validation_results( validation_objects=[ rama for rama, rota in self.validations ], ensemble_result_class=ramalyze.ramachandran_ensemble, residue_ids=self.residue_ids, ignore_unexpected_residues=False, log=log) rota_by_residue = combine_model_validation_results( validation_objects=[ rota for rama, rota in self.validations ], ensemble_result_class=rotalyze.rotamer_ensemble, residue_ids=self.residue_ids, ignore_unexpected_residues=False, log=log) assert len(rama_by_residue)==len(rota_by_residue)==len(self.residue_ids) self.residue_data = [] for i_res, id_str in enumerate(self.residue_ids) : residues = self.residue_ensembles[i_res] self.residue_data.append( residue_analysis( id_str=id_str, residues=residues, rama=rama_by_residue[i_res], rota=rota_by_residue[i_res]))
def run_multiprocessing_chunks_if_applicable(self, command_call): assert isinstance(command_call, list) n = self.options.max_proc if (n is not None and n > 1): if (self.chunk.n == 1): from libtbx.utils import escape_sh_double_quoted cmds = [] for i in xrange(n): cmd = command_call \ + self.options_and_args \ + ["--chunk=%d,%d" % (n,i)] cmd = " ".join(['"'+escape_sh_double_quoted(s=arg)+'"' for arg in cmd]) cmds.append(cmd) from libtbx import easy_mp easy_mp.pool_map(processes=n, func=run_multi, args=cmds, chunksize=1) return True self.chunk.redirect_chunk_stdout_and_stderr(have_array=True) return False
def __init__(self, pdb_hierarchy, n_models, nproc=Auto): assert (len(pdb_hierarchy.models()) == n_models) validate = parallel_driver(pdb_hierarchy) summaries = easy_mp.pool_map(processes=nproc, fixed_func=validate, args=range(n_models)) for name in self.__slots__: array = [] for s in summaries: array.append(getattr(s, name)) setattr(self, name, array)
def run(NPROC=10): path = "/net/cci/share/cryoem/maps_and_models/" bug_path = "/net/cci/share/cryoem/bugs/" args = [] size = flex.double() for folder in os.listdir(path): prefix = folder folder = path + folder + "/" pkl_result = "%s%s.pkl" % (folder, prefix) bug_log = "%s%s.log" % (bug_path, prefix) if (not os.path.isdir(folder)): continue # if (os.path.isfile(pkl_result)): can_load = True try: easy_pickle.load(pkl_result) except: can_load = False if (can_load): continue # pdb_file = folder + prefix + ".pdb" map_file = folder + prefix + ".map" map_file_1 = folder + prefix + "_1.map" map_file_2 = folder + prefix + "_2.map" info_file = folder + "source_info.pkl" assert os.path.isfile(pdb_file) assert os.path.isfile(map_file) assert os.path.isfile(info_file) if (not os.path.isfile(map_file_1)): map_file_1 = None if (not os.path.isfile(map_file_2)): map_file_2 = None arg = [ bug_log, pkl_result, pdb_file, map_file, map_file_1, map_file_2, info_file ] args.append(arg) size.append(easy_pickle.load(info_file).n_atoms) tmp = [] for i in flex.sort_permutation(size): tmp.append(args[i]) args = tmp[:] print "Total jobs:", len(args) sys.stdout.flush() # if (NPROC > 1): stdout_and_results = easy_mp.pool_map( processes=NPROC, fixed_func=run_one, args=args, func_wrapper="buffer_stdout_stderr") else: for arg in args: run_one(arg) return True
def index_and_integrate(work_params, image_mdls): n_mdls = image_mdls.size() if (not work_params.multiprocessing or n_mdls < 2): for im in image_mdls.array: n_spots, updated_im = index_and_integrate_one( work_params, image_mdls.miller_indices, im.pixels) im.reset_spot_model(other=updated_im) else: # import all before fork from rstbx.simage import \ run_spotfinder, \ run_labelit_index, \ refine_uc_cr, \ integrate_crude def mp_func(i_img): return index_and_integrate_one(work_params, image_mdls.miller_indices, image_mdls.array[i_img].pixels) from libtbx import easy_mp mp_results = easy_mp.pool_map(fixed_func=mp_func, args=list(range(n_mdls)), chunksize=1, log=sys.stdout, func_wrapper="buffer_stdout_stderr") print() sys.stdout.flush() for i_img, (log, mp_result) in enumerate(mp_results): if (mp_result is None): print("ERROR index_and_integrate_one:") print("-" * 80) sys.stdout.write(log) print("-" * 80) print() else: n_spots, updated_im = mp_result if (updated_im is None): uc = None else: uc = updated_im.unit_cell print("Refined unit cell %d (%d spots):" % (i_img, n_spots), uc) image_mdls.array[i_img].reset_spot_model(other=updated_im) sys.stdout.flush() print() if (work_params.show_refine_uc_cr): for _, (log, _) in enumerate(mp_results): print("v" * 80) sys.stdout.write(log) print("^" * 80) print() sys.stdout.flush()
def __init__ (self, pdb_hierarchy, n_models, nproc=Auto) : assert (len(pdb_hierarchy.models()) == n_models) validate = parallel_driver(pdb_hierarchy) summaries = easy_mp.pool_map( processes=nproc, fixed_func=validate, args=range(n_models)) for name in self.__slots__ : array = [] for s in summaries : array.append(getattr(s, name)) setattr(self, name, array)
def index_and_integrate(work_params, image_mdls): n_mdls = image_mdls.size() if (not work_params.multiprocessing or n_mdls < 2): for im in image_mdls.array: n_spots, updated_im = index_and_integrate_one( work_params, image_mdls.miller_indices, im.pixels) im.reset_spot_model(other=updated_im) else: # import all before fork from rstbx.simage import \ run_spotfinder, \ run_labelit_index, \ refine_uc_cr, \ integrate_crude def mp_func(i_img): return index_and_integrate_one( work_params, image_mdls.miller_indices, image_mdls.array[i_img].pixels) from libtbx import easy_mp mp_results = easy_mp.pool_map( fixed_func=mp_func, args=range(n_mdls), chunksize=1, log=sys.stdout, func_wrapper="buffer_stdout_stderr") print sys.stdout.flush() for i_img,(log,mp_result) in enumerate(mp_results): if (mp_result is None): print "ERROR index_and_integrate_one:" print "-"*80 sys.stdout.write(log) print "-"*80 print else: n_spots, updated_im = mp_result if (updated_im is None): uc = None else: uc = updated_im.unit_cell print "Refined unit cell %d (%d spots):" % (i_img, n_spots), uc image_mdls.array[i_img].reset_spot_model(other=updated_im) sys.stdout.flush() print if (work_params.show_refine_uc_cr): for _,(log,_) in enumerate(mp_results): print "v"*80 sys.stdout.write(log) print "^"*80 print sys.stdout.flush()
def run(params): xdsinp = "XDS.INP" kwds = dict(get_xdsinp_keyword(xdsinp)) orgx_org, orgy_org = map(float, (kwds["ORGX"], kwds["ORGY"])) dx, dy = params.dx, params.dy if params.unit == "mm": assert "QX" in kwds assert "QY" in kwds dx /= float(kwds["QX"]) dy /= float(kwds["QY"]) #backup_needed = files.generated_by_IDXREF + ("XDS.INP",) #bk_prefix = make_backup(backup_needed) orgxy_list = [] for i in xrange(-params.nx, params.nx+1): for j in xrange(-params.ny, params.ny+1): orgxy_list.append((orgx_org + i * dx, orgy_org + j * dy)) easy_mp.pool_map(fixed_func=lambda x: work(os.path.abspath(params.workdir), os.path.abspath(xdsinp), x), args=orgxy_list, processes=params.nproc)
def __init__( self, xray_structure, # XXX redundant pdb_hierarchy, restraints_manager, map_data, number_of_trials, nproc, weight): adopt_init_args(self, locals()) # Initialize states collector self.states = mmtbx.utils.states( xray_structure = self.xray_structure.deep_copy_scatterers(), pdb_hierarchy = self.pdb_hierarchy.deep_copy()) # SA params self.params = sa.master_params().extract() self.params.start_temperature=50000 self.params.final_temperature=0 self.params.cool_rate = 25000 self.params.number_of_steps = 50 # minimizer params self.grf = geometry_restraints.flags.flags(default=True) self.lbfgs_exception_handling_params = \ scitbx.lbfgs.exception_handling_parameters( ignore_line_search_failed_step_at_lower_bound = True, ignore_line_search_failed_step_at_upper_bound = True, ignore_line_search_failed_maxfev = True) # pre-compute random seeds random_seeds = [] for it in xrange(self.number_of_trials): random_seeds.append(random.randint(0,10000000)) # run SA self.results = [] if(self.nproc>1): from libtbx import easy_mp stdout_and_results = easy_mp.pool_map( processes = self.nproc, fixed_func = self.run, args = random_seeds, func_wrapper = "buffer_stdout_stderr") for so, xrs in stdout_and_results : self.results.append(xrs) self.states.add(sites_cart = xrs.sites_cart()) else: for random_seed in random_seeds: xrs = self.run(random_seed=random_seed).deep_copy_scatterers() self.results.append(xrs) self.states.add(sites_cart = xrs.sites_cart()) assert len(self.results) == self.number_of_trials
def __init__( self, xray_structure, # XXX redundant pdb_hierarchy, restraints_manager, map_data, number_of_trials, nproc, weight): adopt_init_args(self, locals()) # Initialize states collector self.states = mmtbx.utils.states( xray_structure=self.xray_structure.deep_copy_scatterers(), pdb_hierarchy=self.pdb_hierarchy.deep_copy()) # SA params self.params = sa.master_params().extract() self.params.start_temperature = 50000 self.params.final_temperature = 0 self.params.cool_rate = 25000 self.params.number_of_steps = 50 # minimizer params self.grf = geometry_restraints.flags.flags(default=True) self.lbfgs_exception_handling_params = \ scitbx.lbfgs.exception_handling_parameters( ignore_line_search_failed_step_at_lower_bound = True, ignore_line_search_failed_step_at_upper_bound = True, ignore_line_search_failed_maxfev = True) # pre-compute random seeds random_seeds = [] for it in xrange(self.number_of_trials): random_seeds.append(random.randint(0, 10000000)) # run SA self.results = [] if (self.nproc > 1): from libtbx import easy_mp stdout_and_results = easy_mp.pool_map( processes=self.nproc, fixed_func=self.run, args=random_seeds, func_wrapper="buffer_stdout_stderr") for so, xrs in stdout_and_results: self.results.append(xrs) self.states.add(sites_cart=xrs.sites_cart()) else: for random_seed in random_seeds: xrs = self.run(random_seed=random_seed).deep_copy_scatterers() self.results.append(xrs) self.states.add(sites_cart=xrs.sites_cart()) assert len(self.results) == self.number_of_trials
def run(params): xdsinp = "XDS.INP" kwds = dict(get_xdsinp_keyword(xdsinp)) orgx_org, orgy_org = map(float, (kwds["ORGX"], kwds["ORGY"])) dx, dy = params.dx, params.dy if params.unit == "mm": assert "QX" in kwds assert "QY" in kwds dx /= float(kwds["QX"]) dy /= float(kwds["QY"]) #backup_needed = files.generated_by_IDXREF + ("XDS.INP",) #bk_prefix = make_backup(backup_needed) orgxy_list = [] for i in xrange(-params.nx, params.nx + 1): for j in xrange(-params.ny, params.ny + 1): orgxy_list.append((orgx_org + i * dx, orgy_org + j * dy)) easy_mp.pool_map(fixed_func=lambda x: work(os.path.abspath(params.workdir), os.path.abspath(xdsinp), x), args=orgxy_list, processes=params.nproc)
def __call__( self, funcs, ): keys = funcs.keys() values = [funcs[key] for key in keys] results = easy_mp.pool_map( fixed_func=wrap_call, args=values, processes=int(self.cpus), ) results_dict = {key: results[i] for i, key in enumerate(keys)} return results_dict
def __init__(self, sources_and_models, sequence, first_chain_only=False, reset_chain_id=None, min_identity=0.95, nproc=Auto, log=null_out()): adopt_init_args(self, locals()) self._results = easy_mp.pool_map( fixed_func=self.examine_model, iterable=range(len(self.sources_and_models)), processes=self.nproc) for k, results in enumerate(self._results): if (len(results) == 0): print(" no matches for %s" % self.sources_and_models[k][0], file=log)
def __init__ (self, sources_and_models, sequence, first_chain_only=False, reset_chain_id=None, min_identity=0.95, nproc=Auto, log=null_out()) : adopt_init_args(self, locals()) self._results = easy_mp.pool_map( fixed_func=self.examine_model, iterable=range(len(self.sources_and_models)), processes=self.nproc) for k, results in enumerate(self._results) : if (len(results) == 0) : print >> log, " no matches for %s" % self.sources_and_models[k][0]
def run(hklin, output_dir=None, nproc=1): if output_dir is None: output_dir = os.getcwd() merged = xds_ascii.XDS_ASCII(hklin) merged_iobs = merged.i_obs().merge_equivalents( use_internal_variance=False).array() fwidth = max(map(lambda x: len(x[0]), merged.input_files.values())) formatf = "%" + str(fwidth) + "s" out_files = open(os.path.join(output_dir, "cc_files.dat"), "w") out_frames = open(os.path.join(output_dir, "cc_frames.dat"), "w") print >> out_files, "file name n.all n.common cc" print >> out_frames, "file name frame n.all n.common cc" cutforname1 = len( os.path.commonprefix(map(lambda x: x[0], merged.input_files.values()))) cutforname2 = len( os.path.commonprefix( map(lambda x: x[0][::-1], merged.input_files.values()))) formatn = "%" + str(fwidth - cutforname1 - cutforname2) + "s" results = easy_mp.pool_map(fixed_func=lambda x: eval_cc(x, merged_iobs), args=map( lambda x: x[0] if os.path.isabs(x[0]) else os.path.join( os.path.dirname(hklin), x[0]), merged.input_files.values()), processes=nproc) ret = collections.OrderedDict() for (f, wavelen), (ret1, ret2) in zip(merged.input_files.values(), results): name = f[cutforname1 + 1:-cutforname2] n_all, n_common, cc = ret1 print >> out_files, formatf % f, formatn % name, "%5d %5d %.4f" % ( n_all, n_common, cc) ret[f] = [] for frame, n_all, n_common, cc in ret2: print >> out_frames, formatf % f, formatn % name, "%6d %5d %5d %.4f" % ( frame, n_all, n_common, cc) ret[f].append([frame, n_all, n_common, cc]) return ret
def partition(self, mask=None, cpus=1): """Find the nearest neighbour for each grid point (or the subset defined by mask.outer_mask() if mask is not None)""" def find_sites(sites_tuple): ref_sites, query_sites = sites_tuple tree = spatial.KDTree(data=ref_sites) nn_dists, nn_groups = tree.query(query_sites) return nn_groups assert isinstance(cpus, int) and (cpus > 0) # Sites that we are partitioning if mask: query_sites = flex.vec3_double(mask.outer_mask()) else: query_sites = flex.vec3_double(self.parent.grid_points()) # Find the nearest grid_site for each query_site (returns index of the grid site) if cpus == 1: output = [find_sites((self.sites_grid, query_sites))] else: # Chunk the points into groups chunk_size = iceil(1.0 * len(query_sites) / cpus) chunked_points = [ query_sites[i:i + chunk_size] for i in range(0, len(query_sites), chunk_size) ] assert sum(map(len, chunked_points)) == len(query_sites) assert len(chunked_points) == cpus # Map to cpus arg_list = [(self.sites_grid, chunk) for chunk in chunked_points] output = easy_mp.pool_map(fixed_func=find_sites, args=arg_list, processes=cpus) assert len(output) == cpus, '{!s} != {!s}'.format(len(output), cpus) # Extract the indices of the mapped points nn_groups = [] [nn_groups.extend(o) for o in output] nn_groups = numpy.array(nn_groups) assert len(query_sites) == len(nn_groups) # Reformat into full grid size if mask: self.nn_groups = -1 * numpy.ones(self.parent.grid_size_1d(), dtype=int) self.nn_groups.put(mask.outer_mask_indices(), nn_groups) else: self.nn_groups = nn_groups return self
def realign (self, atom_selection_string) : self.atom_selection_string = atom_selection_string ref_atoms = self.reference_hierarchy.atoms() ref_atoms.reset_i_seq() if (self.atom_selection_string is not None) : assert (not self.calpha_only) and (self.backbone_only != True) elif (self.calpha_only) : self.atom_selection_string = "name CA" elif (self.backbone_only) : self.atom_selection_string = \ "name CA or name CB or name C or name N or name O" else : self.atom_selection_string = "all" from scitbx.array_family import flex sel_cache = self.reference_hierarchy.atom_selection_cache() self.atom_selection = sel_cache.selection(self.atom_selection_string) assert (self.atom_selection.count(True) > 0) self.atoms_ref = [] ref_sel = flex.size_t() ref_chain = self.reference_hierarchy.only_model().only_chain() for residue_group in ref_chain.residue_groups() : rg_atoms = residue_group.only_atom_group().atoms() for atom in rg_atoms : if (not self.atom_selection[atom.i_seq]) : continue resid = residue_group.resid() self.atoms_ref.append("%s %s" % (resid, atom.name.strip())) ref_sel.append(atom.i_seq) assert (len(ref_sel) > 0) sites_ref = ref_atoms.extract_xyz() self.reference_sites = sites_ref.select(ref_sel) sites_moved = easy_mp.pool_map( iterable=range(len(self.related_chains)), fixed_func=self.align_model, processes=self.nproc) self.selection_moved = [] for k, lsq_fit in enumerate(sites_moved) : hierarchy = self.related_chains[k].pdb_hierarchy if (lsq_fit is None) : print >> self.log, "No LSQ fit for model %s:%s" % \ (self.related_chains[k].source_info, self.related_chains[k].chain_id) continue self.selection_moved.append(k) pdb_atoms = hierarchy.atoms() sites_cart = lsq_fit.r.elems * pdb_atoms.extract_xyz() + lsq_fit.t.elems pdb_atoms.set_xyz(sites_cart)
def realign(self, atom_selection_string): self.atom_selection_string = atom_selection_string ref_atoms = self.reference_hierarchy.atoms() ref_atoms.reset_i_seq() if (self.atom_selection_string is not None): assert (not self.calpha_only) and (self.backbone_only != True) elif (self.calpha_only): self.atom_selection_string = "name CA" elif (self.backbone_only): self.atom_selection_string = \ "name CA or name CB or name C or name N or name O" else : self.atom_selection_string = "all" from scitbx.array_family import flex sel_cache = self.reference_hierarchy.atom_selection_cache() self.atom_selection = sel_cache.selection(self.atom_selection_string) assert (self.atom_selection.count(True) > 0) self.atoms_ref = [] ref_sel = flex.size_t() ref_chain = self.reference_hierarchy.only_model().only_chain() for residue_group in ref_chain.residue_groups(): rg_atoms = residue_group.only_atom_group().atoms() for atom in rg_atoms : if (not self.atom_selection[atom.i_seq]): continue resid = residue_group.resid() self.atoms_ref.append("%s %s" % (resid, atom.name.strip())) ref_sel.append(atom.i_seq) assert (len(ref_sel) > 0) sites_ref = ref_atoms.extract_xyz() self.reference_sites = sites_ref.select(ref_sel) sites_moved = easy_mp.pool_map( iterable=range(len(self.related_chains)), fixed_func=self.align_model, processes=self.nproc) self.selection_moved = [] for k, lsq_fit in enumerate(sites_moved): hierarchy = self.related_chains[k].pdb_hierarchy if (lsq_fit is None): print("No LSQ fit for model %s:%s" % \ (self.related_chains[k].source_info, self.related_chains[k].chain_id), file=self.log) continue self.selection_moved.append(k) pdb_atoms = hierarchy.atoms() sites_cart = lsq_fit.r.elems * pdb_atoms.extract_xyz() + lsq_fit.t.elems pdb_atoms.set_xyz(sites_cart)
def refine(self): b_isos = self.xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) if(self.nproc==1): for sel in self.chain_selections: b_isos_refined = self.refine_box_with_selected(selection=sel) b_isos = b_isos.set_selected(sel, b_isos_refined) else: stdout_and_results = easy_mp.pool_map( processes = self.nproc, fixed_func = self.refine_box_with_selected, args = self.chain_selections, func_wrapper = "buffer_stdout_stderr") for i, it in enumerate(stdout_and_results): so, b_isos_refined = it b_isos = b_isos.set_selected(self.chain_selections[i], b_isos_refined) print >> self.log, so self.xray_structure = self.xray_structure.set_b_iso(values = b_isos) self.pdb_hierarchy.adopt_xray_structure(self.xray_structure)
def __init__ (self, pdb_hierarchy, map_coeffs, params, log=None) : if (log is None) : log = sys.stdout adopt_init_args(self, locals()) models = pdb_hierarchy.models() if (len(models) > 1) : raise Sorry("Multi-model PDB files not supported.") if (params.sampling_method == "direct") : self.map_coeffs = self.map_coeffs.expand_to_p1() if (not map_coeffs.anomalous_flag()) : self.map_coeffs = self.map_coeffs.generate_bijvoet_mates() self.sigma = self.real_map = None if (params.sampling_method != "direct") or (params.scaling == "sigma") : fft_map = self.map_coeffs.fft_map(resolution_factor=params.grid_spacing) if (params.scaling == "sigma") : self.sigma = fft_map.statistics().sigma() fft_map.apply_sigma_scaling() else : fft_map.apply_volume_scaling() self.real_map = fft_map.real_map_unpadded() results = [] from mmtbx.rotamer import sidechain_angles self.angle_lookup = sidechain_angles.SidechainAngles(False) self.sites_cart = pdb_hierarchy.atoms().extract_xyz() self.residue_groups = [] for chain in models[0].chains() : self.residue_groups.extend(chain.residue_groups()) if (params.nproc in [None,Auto]) or (params.nproc > 1) : # this will be a list of lists results_ = easy_mp.pool_map( processes=params.nproc, fixed_func=self.sample_density, args=range(len(self.residue_groups))) # now flatten it out self.results = [] for result_list in results_ : self.results.extend(result_list) else : self.results = [] for i_res in range(len(self.residue_groups)) : self.results.extend(self.sample_density(i_res, verbose=True))
def __init__ (self, fmodel, pdb_hierarchy, processed_pdb_file, params, selection, selection_score=None, nproc=Auto, out=None) : adopt_init_args(self, locals()) from scitbx.array_family import flex if (self.out is None) : self.out = sys.stdout self.sites_start = fmodel.xray_structure.sites_cart().deep_copy() if (type(selection).__name__ == 'bool') : assert (self.selection.count(True) > 0) self.iselection = self.selection.iselection() else : # actually an iselection assert (len(self.selection) > 0) self.iselection = self.selection self.selection = flex.bool(self.sites_start.size(), False).set_selected( self.iselection, True) if (self.selection_score is None) : self.selection_score = self.selection use_mp = (self.params.n_trials > 1) assert (params.partial_occupancy is not None) if (len(params.partial_occupancy) > 1) : use_mp = True if (not use_mp) : self._trials = [ self.run_trial(params.partial_occupancy[0]) ] else : self.out = null_out() args = [] for occ in params.partial_occupancy : assert (occ > 0) and (occ <= 1) for k in range(self.params.n_trials) : args.append(occ) self._trials = easy_mp.pool_map( fixed_func=self.run_trial, args=args, processes=self.nproc)
def run () : from mmtbx.wwpdb import rcsb_web_services assert ("PDB_MIRROR_PDB" in os.environ) f = open("pdb_with_alt_confs.txt", "w") f.write("# d_max <= 2.5, protein, xray, exp. data\n") protein_xray_structures_at_high_resolution = rcsb_web_services.post_query( query_xml=None, xray_only=True, d_max=2.51, protein_only=True, data_only=True) # XXX why trust anything else? all_results = pool_map( processes=8, chunksize=16, args=protein_xray_structures_at_high_resolution, func=get_nconfs) n_alts = 0 for (pdb_id, n_confs) in all_results : if (n_confs > 1) : n_alts += 1 f.write(pdb_id + "\n") print "n_alts:", n_alts f.close()
def run(hklin, output_dir=None, nproc=1): if output_dir is None: output_dir = os.getcwd() merged = xds_ascii.XDS_ASCII(hklin) merged_iobs = merged.i_obs().merge_equivalents(use_internal_variance=False).array() fwidth = max(map(lambda x: len(x[0]), merged.input_files.values())) formatf = "%"+str(fwidth)+"s" out_files = open(os.path.join(output_dir, "cc_files.dat"), "w") out_frames = open(os.path.join(output_dir, "cc_frames.dat"), "w") print >>out_files, "file name n.all n.common cc" print >>out_frames, "file name frame n.all n.common cc" cutforname1 = len(os.path.commonprefix(map(lambda x: x[0], merged.input_files.values()))) cutforname2 = len(os.path.commonprefix(map(lambda x: x[0][::-1], merged.input_files.values()))) formatn = "%"+str(fwidth-cutforname1-cutforname2)+"s" results = easy_mp.pool_map(fixed_func=lambda x: eval_cc(x, merged_iobs), args=map(lambda x: x[0] if os.path.isabs(x[0]) else os.path.join(os.path.dirname(hklin), x[0]), merged.input_files.values()), processes=nproc) ret = collections.OrderedDict() for (f, wavelen), (ret1, ret2) in zip(merged.input_files.values(), results): name = f[cutforname1+1:-cutforname2] n_all, n_common, cc = ret1 print >>out_files, formatf%f, formatn%name, "%5d %5d %.4f" % (n_all, n_common, cc) ret[f] = [] for frame, n_all, n_common, cc in ret2: print >>out_frames, formatf%f, formatn%name, "%6d %5d %5d %.4f" % (frame, n_all, n_common, cc) ret[f].append([frame, n_all, n_common, cc]) return ret
def __init__ (self, fmodel, selection, occupancy, map_type="mFo-DFc", omit_fraction=0.02, selection_delete=None, fill_missing_f_obs=False, exclude_free_r_reflections=False, optimize_binning=True, box_cushion_radius=2.5, nproc=Auto, out=sys.stdout) : adopt_init_args(self, locals()) import mmtbx.maps.composite_omit_map occ_saved = fmodel.xray_structure.scatterers().extract_occupancies() if (omit_fraction == 1.0) : self.omit_groups = [ mmtbx.maps.composite_omit_map.omit_regions( serial=1, selection=selection) ] else : self.omit_groups = mmtbx.maps.composite_omit_map.create_omit_regions( xray_structure=fmodel.xray_structure, selection=selection, fraction_omit=self.omit_fraction, optimize_binning=optimize_binning, box_cushion_radius=box_cushion_radius, log=out) for group in self.omit_groups : group.show(out=out) self.omit_map_coeffs = easy_mp.pool_map( fixed_func=self, iterable=self.omit_groups, processes=nproc) fmodel.xray_structure.scatterers().set_occupancies(occ_saved) fmodel.update_xray_structure(update_f_calc=True)
def __init__ (self, residues, pdb_hierarchy, fmodel, restraints_manager, params, nproc=Auto, verbose=False, debug=None, log=sys.stdout) : adopt_init_args(self, locals()) nproc = easy_mp.get_processes(nproc) print >> log, "" if (nproc == 1) : print >> log, " running all residues serially" self.results = [] for i_res in range(len(residues)) : self.results.append(self.__call__(i_res, log=log)) else : print >> log, " will use %d processes" % nproc self.results = easy_mp.pool_map( fixed_func=self, iterable=range(len(residues)), processes=nproc)
def run(cbf_files, params): print "Attention - assuming cbf files given belong to a single dataset" print print "%d cbf files were given." % len(cbf_files) print if params.byteoffset: import yamtbx_byteoffset_h5_ext import pyublas last_shape = easy_mp.pool_map(fixed_func=lambda x: convert(x, params), args=cbf_files, processes=params.nproc)[-1] if params.decompose: make_geom_decomposed(XIO.Image(cbf_files[0]).header, last_shape, params.geom_out) else: make_geom(XIO.Image(cbf_files[0]).header, params.geom_out) make_beam(params.beam_out) print "Done." print print "Check %s and %s!" % (params.geom_out, params.beam_out)
Description : prime._genref_determine_mean_I is an internal command to suppport queuing system and mproc. """ from libtbx.easy_mp import pool_map import sys import cPickle as pickle from prime.postrefine import postref_handler def determine_mean_I_mproc(args): frame_no, pickle_filename, iparams = args prh = postref_handler() mean_I, txt_out = prh.calc_mean_intensity(pickle_filename, iparams) print frame_no, pickle_filename, mean_I if mean_I is not None: pickle.dump(mean_I, open(iparams.run_no+'/pickles/'+str(frame_no)+".o","wb"),pickle.HIGHEST_PROTOCOL) if (__name__ == "__main__"): if len(sys.argv)==1: print 'Not allowed. This is an internal command for queuing system.' exit() #load input inp_pickle = pickle.load(open(sys.argv[1], "rb")) iparams = inp_pickle['iparams'] frames = inp_pickle['frames'] #calculate mean of mean I mm_I = 0 determine_mean_I_result = pool_map( iterable=frames, func=determine_mean_I_mproc, processes=iparams.n_processors)
def __init__( self, fmodels, model, params, target_weights, macro_cycle, ncs_manager=None, log=None): if log is None: log = sys.stdout # self.ncs_manager = ncs_manager self.nproc = params.main.nproc if self.nproc is Auto: self.nproc = 1 self.verbose = params.den.verbose self.log = log self.fmodels = fmodels self.model = model self.params = params self.target_weights = target_weights self.adp_refinement_manager = None self.macro_cycle = macro_cycle self.tan_b_iso_max = 0 self.random_seed = params.main.random_seed den_manager = model.restraints_manager. \ geometry.den_manager print_statistics.make_header("DEN refinement", out=self.log) pdb_hierarchy = self.model.pdb_hierarchy(sync_with_xray_structure=True) if den_manager.get_n_proxies() == 0: print_statistics.make_sub_header( "DEN restraint nework", out = self.log) den_manager.build_den_proxies(pdb_hierarchy=pdb_hierarchy) den_manager.build_den_restraints() den_manager.show_den_summary( sites_cart=self.model.xray_structure.sites_cart()) if den_manager.params.output_kinemage: den_manager.output_kinemage( self.model.xray_structure.sites_cart()) print_statistics.make_sub_header( "coordinate minimization before annealing", out=self.log) self.minimize(ca_only=self.params.den.minimize_c_alpha_only) self.save_scatterers_local = fmodels.fmodel_xray().\ xray_structure.deep_copy_scatterers().scatterers() #DEN refinement start, turn on if params.den.optimize: grid = den_manager.get_optimization_grid() print >> log, \ "Running DEN torsion optimization on %d processors..." % \ params.main.nproc else: grid = [(params.den.gamma, params.den.weight)] grid_results = [] grid_so = [] if "torsion" in params.den.annealing_type: print >> self.log, "Running torsion simulated annealing" if ( (params.den.optimize) and ( (self.nproc is Auto) or (self.nproc > 1) )): stdout_and_results = easy_mp.pool_map( processes=params.main.nproc, fixed_func=self.try_den_weight_torsion, args=grid, func_wrapper="buffer_stdout_stderr") for so, r in stdout_and_results: if (r is None): raise RuntimeError(("DEN weight optimization failed:"+ "\n%s\nThis is a "+ "serious error; please contact [email protected].") % so) grid_so.append(so) grid_results.append(r) else: for grid_pair in grid: result = self.try_den_weight_torsion( grid_pair=grid_pair) grid_results.append(result) self.show_den_opt_summary_torsion(grid_results) elif "cartesian" in params.den.annealing_type: print >> self.log, "Running Cartesian simulated annealing" if ( (params.den.optimize) and ( (self.nproc is Auto) or (self.nproc > 1) )): stdout_and_results = easy_mp.pool_map( processes=params.main.nproc, fixed_func=self.try_den_weight_cartesian, args=grid, func_wrapper="buffer_stdout_stderr") for so, r in stdout_and_results: if (r is None): raise RuntimeError(("DEN weight optimization failed:"+ "\n%s\nThis is a "+ "serious error; please contact [email protected].") % so) grid_so.append(so) grid_results.append(r) else: for grid_pair in grid: result = self.try_den_weight_cartesian( grid_pair=grid_pair) grid_results.append(result) self.show_den_opt_summary_cartesian(grid_results) else: raise "error in DEN annealing type" low_r_free = 1.0 best_xray_structure = None best_eq_distances = None best_gamma = None best_weight = None best_so_i = None for i, result in enumerate(grid_results): cur_r_free = result[2] if cur_r_free < low_r_free: low_r_free = cur_r_free best_gamma = result[0] best_weight = result[1] best_xray_structure = result[3] best_eq_distances = result[4] best_so_i = i assert best_xray_structure is not None if params.den.optimize: print >> self.log, "\nbest gamma: %.1f" % best_gamma print >> self.log, "best weight: %.1f\n" % best_weight if params.den.verbose: if len(grid_so) >= (best_so_i+1): print >> self.log, "\nBest annealing results:\n" print >> self.log, grid_so[best_so_i] fmodels.fmodel_xray().xray_structure.replace_scatterers( best_xray_structure.deep_copy()) fmodels.update_xray_structure( xray_structure = fmodels.fmodel_xray().xray_structure, update_f_calc = True) utils.assert_xray_structures_equal( x1 = fmodels.fmodel_xray().xray_structure, x2 = model.xray_structure) model.restraints_manager.geometry.\ den_manager.import_eq_distances(eq_distances=best_eq_distances) self.model.restraints_manager.geometry.update_dihedral_ncs_restraints( sites_cart=self.model.xray_structure.sites_cart(), pdb_hierarchy=self.model.pdb_hierarchy(sync_with_xray_structure=True), log=self.log)
def build_images(work_params, i_calc, reindexing_assistant): result = [] from create import add_noise from rstbx.simage import image_simple from cctbx.array_family import flex if (not work_params.apply_random_reindexing): i_calc_data_perms = [i_calc.data()] else: i_calc_data_perms = [i_calc.data().select(perm) for perm in reindexing_assistant.inv_perms] n_mdls = work_params.number_of_shots use_mp = (work_params.multiprocessing and n_mdls > 1) def build_one_image(i_img): mt = flex.mersenne_twister(seed=work_params.noise.random_seed+i_img) scale = int(work_params.signal_max*(0.1+0.9*mt.random_double())) crystal_rotation = mt.random_double_r3_rotation_matrix_arvo_1992() i_perm = mt.random_size_t() % len(i_calc_data_perms) image = image_simple( store_miller_index_i_seqs=True, store_spots=True, store_signals=True, set_pixels=True).compute( unit_cell=i_calc.unit_cell(), miller_indices=i_calc.indices(), spot_intensity_factors=i_calc_data_perms[i_perm], crystal_rotation_matrix=crystal_rotation, ewald_radius=1/work_params.wavelength, ewald_proximity=work_params.ewald_proximity, signal_max=scale, detector_distance=work_params.detector.distance, detector_size=work_params.detector.size, detector_pixels=work_params.detector.pixels, point_spread=work_params.point_spread, gaussian_falloff_scale=work_params.gaussian_falloff_scale) add_noise(work_params, pixels=image.pixels) if (not work_params.index_and_integrate): pixels = None else: pixels = image.pixels miller_index_i_seqs = image.miller_index_i_seqs if (use_mp): # to by-pass portable but slower pickling if (pixels is not None): assert pixels.is_0_based() assert not pixels.is_padded() assert pixels.all() == tuple(work_params.detector.pixels) pixels = pixels.copy_to_byte_str() miller_index_i_seqs = miller_index_i_seqs.copy_to_byte_str() return image_model( pixels=pixels, spot_positions=image.spots, spot_intensities=image.signals, unit_cell=i_calc.unit_cell(), crystal_rotation=crystal_rotation, miller_index_i_seqs=miller_index_i_seqs, scale=scale, i_perm=i_perm) if (not use_mp): for i_img in xrange(n_mdls): result.append(build_one_image(i_img)) else: from libtbx import easy_mp result = easy_mp.pool_map( fixed_func=build_one_image, args=range(n_mdls), chunksize=1, log=sys.stdout) for im in result: if (im is None): raise RuntimeError("Failure building image.") if (im.pixels is not None): im.pixels = flex.int_from_byte_str(im.pixels) im.pixels.reshape(flex.grid(work_params.detector.pixels)) im.miller_index_i_seqs = flex.size_t_from_byte_str( byte_str=im.miller_index_i_seqs) for im in result: im.make_backup() return image_models(miller_indices=i_calc.indices(), array=result)