def sa_simple(model, map_data, log): tmp_xrs = model.get_xray_structure().deep_copy_scatterers() # ro = mmtbx.refinement.real_space.individual_sites.easy( # map_data = map_data, # xray_structure = tmp_xrs, # pdb_hierarchy = model.get_hierarchy().deep_copy(), # geometry_restraints_manager = model.get_restraints_manager(), # rms_bonds_limit = 0.01, # rms_angles_limit = 1.0, # selection = None, #TODO # log = log) # weight = ro.w weight = 50 # from mmtbx.dynamics import simulated_annealing as sa tmp = model.get_xray_structure().deep_copy_scatterers() params = sa.master_params().extract() params.start_temperature = 5000 params.cool_rate = 500 sa.run(params=params, xray_structure=tmp, real_space=True, target_map=map_data, restraints_manager=model.get_restraints_manager(), wx=weight, wc=1., verbose=False, log=log) model.set_sites_cart(sites_cart=tmp.sites_cart()) return model
def sa_simple(rm, xrs, ph, map_data, log): tmp_xrs = xrs.deep_copy_scatterers() ro = mmtbx.refinement.real_space.individual_sites.easy( map_data=map_data, xray_structure=tmp_xrs, pdb_hierarchy=ph.deep_copy(), geometry_restraints_manager=rm, rms_bonds_limit=0.01, rms_angles_limit=1.0, selection=None, #TODO log=log) weight = ro.w # from mmtbx.dynamics import simulated_annealing as sa tmp = xrs.deep_copy_scatterers() params = sa.master_params().extract() params.start_temperature = 5000 params.cool_rate = 500 sa.run(params=params, xray_structure=tmp, real_space=True, target_map=map_data, restraints_manager=rm, wx=weight, wc=1., verbose=False, log=log) return tmp.sites_cart()
def exercise_1(): random.seed(0) flex.set_random_seed(0) pi = get_pdb_inputs(pdb_str=pdb_str_1) f_obs = abs(pi.xrs.structure_factors(d_min=2.5).f_calc()) r_free_flags = f_obs.generate_r_free_flags(use_lattice_symmetry=False) if (0): pi.ph.adopt_xray_structure(pi.xrs) pi.ph.write_pdb_file(file_name="start.pdb", crystal_symmetry=pi.xrs.crystal_symmetry()) xrs_poor = shake_sites(xrs=pi.xrs.deep_copy_scatterers(), random=False, shift=1.5, grm=pi.grm) if (0): pi.ph.adopt_xray_structure(xrs_poor) pi.ph.write_pdb_file(file_name="poor.pdb", crystal_symmetry=xrs_poor.crystal_symmetry()) fmodel = mmtbx.f_model.manager(f_obs=f_obs, r_free_flags=r_free_flags, xray_structure=xrs_poor) print("start r_work:", fmodel.r_work()) # params = sa.master_params().extract() params.start_temperature = 3000 params.final_temperature = 0 params.cool_rate = 100 params.number_of_steps = 100 params.update_grads_shift = 0. # sa.run(params=params, fmodel=fmodel, restraints_manager=pi.grm, wx=20, wc=1, verbose=True) # r = fmodel.r_work() print("final r_work:", r) assert r < 0.03, r dist = flex.mean( flex.sqrt( (pi.xrs.sites_cart() - fmodel.xray_structure.sites_cart()).dot())) print("Distance(refined, answer): %6.4f" % dist) assert dist < 0.25, dist if (0): pi.ph.adopt_xray_structure(fmodel.xray_structure) pi.ph.write_pdb_file( file_name="refined.pdb", crystal_symmetry=fmodel.xray_structure.crystal_symmetry())
def __init__( self, xray_structure, # XXX redundant pdb_hierarchy, restraints_manager, map_data, number_of_trials, nproc, weight): adopt_init_args(self, locals()) # Initialize states collector self.states = mmtbx.utils.states( xray_structure=self.xray_structure.deep_copy_scatterers(), pdb_hierarchy=self.pdb_hierarchy.deep_copy()) # SA params self.params = sa.master_params().extract() self.params.start_temperature = 50000 self.params.final_temperature = 0 self.params.cool_rate = 25000 self.params.number_of_steps = 50 # minimizer params self.grf = geometry_restraints.flags.flags(default=True) self.lbfgs_exception_handling_params = \ scitbx.lbfgs.exception_handling_parameters( ignore_line_search_failed_step_at_lower_bound = True, ignore_line_search_failed_step_at_upper_bound = True, ignore_line_search_failed_maxfev = True) # pre-compute random seeds random_seeds = [] for it in xrange(self.number_of_trials): random_seeds.append(random.randint(0, 10000000)) # run SA self.results = [] if (self.nproc > 1): from libtbx import easy_mp stdout_and_results = easy_mp.pool_map( processes=self.nproc, fixed_func=self.run, args=random_seeds, func_wrapper="buffer_stdout_stderr") for so, xrs in stdout_and_results: self.results.append(xrs) self.states.add(sites_cart=xrs.sites_cart()) else: for random_seed in random_seeds: xrs = self.run(random_seed=random_seed).deep_copy_scatterers() self.results.append(xrs) self.states.add(sites_cart=xrs.sites_cart()) assert len(self.results) == self.number_of_trials
def __init__( self, xray_structure, # XXX redundant pdb_hierarchy, restraints_manager, map_data, number_of_trials, nproc, weight): adopt_init_args(self, locals()) # Initialize states collector self.states = mmtbx.utils.states( xray_structure = self.xray_structure.deep_copy_scatterers(), pdb_hierarchy = self.pdb_hierarchy.deep_copy()) # SA params self.params = sa.master_params().extract() self.params.start_temperature=50000 self.params.final_temperature=0 self.params.cool_rate = 25000 self.params.number_of_steps = 50 # minimizer params self.grf = geometry_restraints.flags.flags(default=True) self.lbfgs_exception_handling_params = \ scitbx.lbfgs.exception_handling_parameters( ignore_line_search_failed_step_at_lower_bound = True, ignore_line_search_failed_step_at_upper_bound = True, ignore_line_search_failed_maxfev = True) # pre-compute random seeds random_seeds = [] for it in xrange(self.number_of_trials): random_seeds.append(random.randint(0,10000000)) # run SA self.results = [] if(self.nproc>1): from libtbx import easy_mp stdout_and_results = easy_mp.pool_map( processes = self.nproc, fixed_func = self.run, args = random_seeds, func_wrapper = "buffer_stdout_stderr") for so, xrs in stdout_and_results : self.results.append(xrs) self.states.add(sites_cart = xrs.sites_cart()) else: for random_seed in random_seeds: xrs = self.run(random_seed=random_seed).deep_copy_scatterers() self.results.append(xrs) self.states.add(sites_cart = xrs.sites_cart()) assert len(self.results) == self.number_of_trials
def exercise_1(): random.seed(0) flex.set_random_seed(0) pi = get_pdb_inputs(pdb_str=pdb_str_1) f_obs = abs(pi.xrs.structure_factors(d_min = 2.5).f_calc()) r_free_flags = f_obs.generate_r_free_flags(use_lattice_symmetry=False) if(0): pi.ph.adopt_xray_structure(pi.xrs) pi.ph.write_pdb_file(file_name="start.pdb", crystal_symmetry = pi.xrs.crystal_symmetry()) xrs_poor = shake_sites(xrs = pi.xrs.deep_copy_scatterers(), random=False, shift = 1.5, grm=pi.grm) if(0): pi.ph.adopt_xray_structure(xrs_poor) pi.ph.write_pdb_file(file_name="poor.pdb", crystal_symmetry = xrs_poor.crystal_symmetry()) fmodel = mmtbx.f_model.manager( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xrs_poor) print "start r_work:", fmodel.r_work() # params = sa.master_params().extract() params.start_temperature=3000 params.final_temperature=0 params.cool_rate = 100 params.number_of_steps = 100 params.update_grads_shift = 0. # sa.run( params = params, fmodel = fmodel, restraints_manager = pi.grm, wx = 20, wc = 1, verbose = True) # r = fmodel.r_work() print "final r_work:", r assert r < 0.03, r dist = flex.mean(flex.sqrt((pi.xrs.sites_cart() - fmodel.xray_structure.sites_cart()).dot())) print "Distance(refined, answer): %6.4f"%dist assert dist < 0.25, dist if(0): pi.ph.adopt_xray_structure(fmodel.xray_structure) pi.ph.write_pdb_file(file_name="refined.pdb", crystal_symmetry = fmodel.xray_structure.crystal_symmetry())
def anneal(self, simulated_annealing_params=None, start_temperature=None, cool_rate=None, number_of_steps=50): """ Run real-space simulated annealing using the target map (not the RSR map, if this is different). In practice, the non-selection atoms in the box should almost always be restrained to their current positions, but the setup is left to the calling code. """ from mmtbx.dynamics import simulated_annealing import mmtbx.utils wx = self.real_space_refine(selection=self.selection_all_box) if (self.debug): self.box.write_pdb_file("box_start.pdb") states_collector = None if (self.debug): states_collector = mmtbx.utils.states( xray_structure=self.box.xray_structure_box, pdb_hierarchy=self.box.pdb_hierarchy_box) if (simulated_annealing_params is None): simulated_annealing_params = simulated_annealing.master_params( ).extract() if (start_temperature is not None): simulated_annealing_params.start_temperature = start_temperature if (cool_rate is not None): simulated_annealing_params.cool_rate = cool_rate if (number_of_steps is not None): simulated_annealing_params.number_of_steps = number_of_steps simulated_annealing.run(params=simulated_annealing_params, fmodel=None, xray_structure=self.box.xray_structure_box, real_space=True, target_map=self.target_map_box, restraints_manager=self.box_restraints_manager, wx=wx, wc=1.0, log=self.out, verbose=True, states_collector=states_collector) if (states_collector is not None): states_collector.write("box_traj.pdb") self.update_coordinates(self.box.xray_structure_box.sites_cart())
def anneal (self, simulated_annealing_params=None, start_temperature=None, cool_rate=None, number_of_steps=50) : """ Run real-space simulated annealing using the target map (not the RSR map, if this is different). In practice, the non-selection atoms in the box should almost always be restrained to their current positions, but the setup is left to the calling code. """ from mmtbx.dynamics import simulated_annealing import mmtbx.utils wx = self.real_space_refine(selection=self.selection_all_box) if (self.debug) : self.box.write_pdb_file("box_start.pdb") states_collector = None if (self.debug) : states_collector = mmtbx.utils.states( xray_structure=self.box.xray_structure_box, pdb_hierarchy=self.box.pdb_hierarchy_box) if (simulated_annealing_params is None) : simulated_annealing_params = simulated_annealing.master_params().extract() if (start_temperature is not None) : simulated_annealing_params.start_temperature = start_temperature if (cool_rate is not None) : simulated_annealing_params.cool_rate = cool_rate if (number_of_steps is not None) : simulated_annealing_params.number_of_steps = number_of_steps simulated_annealing.run( params = simulated_annealing_params, fmodel = None, xray_structure = self.box.xray_structure_box, real_space = True, target_map = self.target_map_box, restraints_manager = self.box_restraints_manager, wx = wx, wc = 1.0, log = self.out, verbose = True, states_collector = states_collector) if (states_collector is not None) : states_collector.write("box_traj.pdb") self.update_coordinates(self.box.xray_structure_box.sites_cart())
def exercise_3(): pi = get_pdb_inputs(pdb_str=pdb_str_1) xrs = pi.xrs.deep_copy_scatterers() sites_cart_start = xrs.sites_cart() states_collector = mmtbx.utils.states(pdb_hierarchy=pi.ph) # params = sa.master_params().extract() params.start_temperature = 5000 params.final_temperature = 0 params.cool_rate = 100 params.number_of_steps = 100 params.update_grads_shift = 0. params.time_step = 0.0005 params.interleave_minimization = True # sa.run(params=params, xray_structure=xrs, restraints_manager=pi.grm, states_collector=states_collector) states_collector.write(file_name="all.pdb")
def exercise_3(): pi = get_pdb_inputs(pdb_str=pdb_str_1) xrs = pi.xrs.deep_copy_scatterers() sites_cart_start = xrs.sites_cart() states_collector = mmtbx.utils.states( pdb_hierarchy = pi.ph, xray_structure = xrs) # params = sa.master_params().extract() params.start_temperature=5000 params.final_temperature=0 params.cool_rate = 100 params.number_of_steps = 100 params.update_grads_shift = 0. params.time_step = 0.0005 params.interleave_minimization=True # sa.run( params = params, xray_structure = xrs, restraints_manager = pi.grm, states_collector = states_collector) states_collector.write(file_name = "all.pdb")
def exercise_2(d_min = 1.5): random.seed(2679941) flex.set_random_seed(2679941) for shake in [True, False]: pi = get_pdb_inputs(pdb_str=pdb_str_1) f_obs = abs(pi.xrs.structure_factors(d_min = d_min).f_calc()) r_free_flags = f_obs.generate_r_free_flags(use_lattice_symmetry=False) xrs_poor = pi.xrs.deep_copy_scatterers() if(shake): xrs_poor = shake_sites(xrs = pi.xrs.deep_copy_scatterers(), random=False, shift = 2.0, grm=pi.grm) fmodel = mmtbx.f_model.manager( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xrs_poor) print("start r_work:", fmodel.r_work()) # f_calc = pi.xrs.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=0.25) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() # find optimal weight rsr_simple_refiner = mmtbx.refinement.real_space.individual_sites.simple( target_map = target_map, selection = flex.bool(pi.xrs.scatterers().size(), True), real_space_gradients_delta = d_min/4, max_iterations = 150, geometry_restraints_manager = pi.grm.geometry) refined = mmtbx.refinement.real_space.individual_sites.refinery( refiner = rsr_simple_refiner, xray_structure = xrs_poor.deep_copy_scatterers(), start_trial_weight_value = 1, rms_bonds_limit = 0.02, rms_angles_limit = 2) print(refined.weight_final, refined.rms_bonds_final, refined.rms_angles_final) # params = sa.master_params().extract() params.start_temperature=5000 params.final_temperature=0 params.cool_rate = 100 params.number_of_steps = 100 params.update_grads_shift = 0. # does not change runtime visibly # sa.run( params = params, fmodel = fmodel, real_space = True, target_map = target_map, restraints_manager = pi.grm, wx = refined.weight_final, wc = 1., verbose = True) # r = fmodel.r_work() print("final r_work:", r) if(shake): assert r < 0.07, r else: assert r < 0.07, r dist = flex.mean(flex.sqrt((pi.xrs.sites_cart() - fmodel.xray_structure.sites_cart()).dot())) print("Distance(refined, answer): %6.4f"%dist) if(shake): assert dist < 0.35, r else: assert dist < 0.06, r if(0): pi.ph.adopt_xray_structure(fmodel.xray_structure) pi.ph.write_pdb_file(file_name="refined.pdb", crystal_symmetry = fmodel.xray_structure.crystal_symmetry())
def run(self): hierarchy = self.model.get_hierarchy() map_data, grid_unit_cell = None, None #### <Begin> sanity check for map and model if self.map_inp is not None: base = map_and_model.input(map_data=self.map_inp.map_data(), model=self.model, crystal_symmetry=self.cs_consensus, box=False) hierarchy = base.model().get_hierarchy() map_data = base.map_data() grid_unit_cell = self.map_inp.grid_unit_cell() hierarchy.atoms().reset_i_seq() #### <End> sanity check for map and model # Initialize states accumulator # Pavel's original states = mmtbx.utils.states( pdb_hierarchy=self.model.get_hierarchy(), xray_structure=self.model.get_xray_structure()) states.add(sites_cart=self.model.get_xray_structure().sites_cart()) params = sa.master_params().extract( ) # because of params = sa.master_params().extract() above, core parameters need to be redefined params.start_temperature = self.params.start_temperature params.final_temperature = self.params.final_temperature params.cool_rate = self.params.cool_rate #params.MD_in_each_cycle = self.params.MD_in_each_cycle # "AttributeError: Assignment to non-existing attribute "MD_in_each_cycle" params.number_of_steps = self.params.number_of_steps max_steps_for_final_MD = '' if (self.params.max_steps_for_final_MD != None): max_steps_for_final_MD = self.params.max_steps_for_final_MD params.update_grads_shift = 0. params.interleave_minimization = False #Pavel will fix the error that occur when params.interleave_minimization=True #print ("params:",params) # object like <libtbx.phil.scope_extract object at 0x1146ae210> map_inp = self.map_inp user_map_weight = self.user_map_weight map_weight_multiply = self.map_weight_multiply if ( self.params.record_states == False ): # default choice to avoid > 160 GB memory issue with recording all states for L1 stalk states = None if (self.params.reoptimize_map_weight_after_each_cycle_during_final_MD == True): cycle_so_far_for_map_weight_reoptimization = 0 splited_model_name = self.model_name[:-4].split("/") model_file_name_only = splited_model_name[len(splited_model_name) - 1] #number_of_atoms_in_input_pdb = know_number_of_atoms_in_input_pdb(self.logfile, self.model_name) # tRNA : 1,563 # L1 stalk : 3,289 # Mg channel: 14,940 # number_of_atoms_in_input_pdb seems irrelevant to check_cc_after_these_cycles assignment. # but Mg channel with 10k check took 10 days! #### <begin> prepare/initialize for iteration check_cc_after_these_steps = '' # use '# of steps' not '# of iterations' if (("tst_cryo_fit2" in model_file_name_only) == True): #check_cc_after_these_steps = 1000 # tst_2 took 2 min? check_cc_after_these_steps = 700 # if this is too small (like 100), it may run forever # I confirmed that 500 is definitely too small to explore properly (a helix), #but this is just for test else: check_cc_after_these_steps = 10000 #check_cc_after_these_steps = 2000 #even if this value is so small like this, empty 1st_2nd_array error is avoided by following fail-proof hook number_of_MD_in_each_cycle = 1 + ( (params.start_temperature - params.final_temperature) / params.cool_rate) # same value as MD_in_each_cycle # Regardless of above assignment, re-assign check_cc_after_these_steps to avoid empty 1st_2nd_array situation check_cc_after_these_steps = check_cc_after_these_steps + params.number_of_steps * number_of_MD_in_each_cycle * 2 # reoptimize_map_weight_after_these_steps = '' # if (self.params.reoptimize_map_weight_after_each_cycle_during_final_MD == True): # if (("tst_cryo_fit2" in model_file_name_only) == True): # reoptimize_map_weight_after_these_steps = 5 # else: # reoptimize_map_weight_after_these_steps = 100 # after 123~171 cycles, full tRNA crashes (when map_weight is multiplied too crazy back then,,,) if (("tst_cryo_fit2_" in self.model_name) == True): self.params.max_steps_for_exploration = 100 #max_steps_for_final_MD = 10000 #max_steps_for_final_MD = 3000 map_weight_before_multiplication = self.params.map_weight self.params.map_weight = self.params.map_weight * map_weight_multiply #### This is the only place where map_weight_multiply is applied (other than reoptimize_map_weight_if_not_specified for final MD) best_cc_so_far = -999 # tRNA has a negative value of initial cc cc_1st_array = [] cc_2nd_array = [] result = '' total_steps_so_far_for_exploration_and_final_MD = 0 total_steps_so_far_for_cc_check = 0 # initialization #### <end> prepare/initialize for iteration grm = self.model.get_restraints_manager() pdb_hierarchy = self.model.get_hierarchy() ''' #get_stacking_proxies() takes exactly 5 arguments stacking_proxies = nucleic_acids.get_stacking_proxies( pdb_hierarchy = pdb_hierarchy, stacking_phil_params = self.params.secondary_structure.nucleic_acid.stacking_pair, grm=grm) #, #mon_lib_srv=self.mon_lib_srv, # AttributeError: 'cryo_fit2_class' object has no attribute 'mon_lib_sr #plane_cache=plane_cache) print(" %d stacking parallelities" % len(stacking_proxies), file=log) STOP() ''' write_this = "\nself.params.map_weight after multiplication (" + str( map_weight_multiply) + ") = " + str( round(self.params.map_weight, 1)) + "\n" print(write_this) self.logfile.write(str(write_this)) ########################### <begin> iterate until cryo_fit2 derived cc saturates for i in range( 100000000 ): # runs well with cryo_fit2.run_tests #for i in range(1000000000): # fails with cryo_fit2.run_tests with too much memory (bigger than 30 GB) write_this = "\n" + str(i + 1) + "th iteration: \n" print(write_this) self.logfile.write(str(write_this)) try: if (self.params.progress_on_screen == True): # default choice result = sa.run( params=params, xray_structure=self.model.get_xray_structure(), #restraints_manager = self.model.get_restraints_manager(), restraints_manager=grm, target_map=map_data, real_space=True, wx=self.params.map_weight, wc=1, # weight for geometry conformation states_collector=states) else: # (self.params.progress_on_screen = False): result = sa.run( params=params, xray_structure=self.model.get_xray_structure(), #restraints_manager = self.model.get_restraints_manager(), restraints_manager=grm, target_map=map_data, real_space=True, wx=self.params.map_weight, wc=1, # weight for geometry conformation states_collector=states, log=self.logfile ) # if this is commented, temp= xx dist_moved= xx angles= xx bonds= xx is shown on screen rather than cryo_fit2.log except Exception as ex: write_this = "exception message:" + str(ex) print(write_this) self.logfile.write(str(write_this)) write_this = "Failed during core map weight multiplied phenix.dynamics run.\n" print(write_this) self.logfile.write(str(write_this)) return self.output_dir total_steps_so_far_for_exploration_and_final_MD = total_steps_so_far_for_exploration_and_final_MD \ + int(params.number_of_steps*number_of_MD_in_each_cycle) cc_after_small_MD = calculate_overall_cc( map_data=map_data, model=self.model, resolution=self.params.resolution) write_this = "CC after this cycle (a small MD iteration): " + str( round(cc_after_small_MD, 7)) + "\n" self.logfile.write(str(write_this)) if (self.params.explore == True): if (total_steps_so_far_for_exploration_and_final_MD < self.params.max_steps_for_exploration): write_this = "\ntotal_steps_so_far_for_exploration_and_final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") < max_steps_for_exploration (" + str(self.params.max_steps_for_exploration) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) continue else: write_this = "\ntotal_steps_so_far_for_exploration_and_final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") >= max_steps_for_exploration (" + str(self.params.max_steps_for_exploration) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break ############# All below is for final MD total_steps_so_far_for_cc_check = total_steps_so_far_for_cc_check + int( params.number_of_steps * number_of_MD_in_each_cycle) cc_improvement_threshold = '' if (("tst_cryo_fit2" in model_file_name_only) == True): #cc_improvement_threshold = 0.01 # to finish regression quickly # took 2 min for tst_2? cc_improvement_threshold = 0.05 # to finish regression quickly # took 2 min for tst_2? else: cc_improvement_threshold = 0.00001 # even a 0.0001 improved cc further eventually significantly if (max_steps_for_final_MD != ''): if (total_steps_so_far_for_exploration_and_final_MD >= max_steps_for_final_MD): write_this = '' if (self.params.explore == True): write_this = "\ntotal_steps_so_far_for_exploration_and_final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") >= A specified max_steps_for_final_MD (" + str(max_steps_for_final_MD) + ")\n" else: write_this = "\ntotal steps final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") >= A specified max_steps_for_final_MD (" + str(max_steps_for_final_MD) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break if (float(total_steps_so_far_for_cc_check) < float( check_cc_after_these_steps / 2)): cc_1st_array.append(cc_after_small_MD) else: cc_2nd_array.append(cc_after_small_MD) ''' if (self.params.reoptimize_map_weight_after_each_cycle_during_final_MD == True): if (cycle_so_far_for_map_weight_reoptimization >= reoptimize_map_weight_after_these_steps): self.params.map_weight = reoptimize_map_weight_if_not_specified(self, user_map_weight, map_inp) self.params.map_weight = self.params.map_weight * map_weight_multiply cycle_so_far_for_map_weight_reoptimization = 0 # reinitialization # I confirmed that reoptimizing map_weight_after_each_cycle did change result (cc, SS stat) significantly ''' # total_steps_so_far_for_cc_check is thought to be re-initialized in all circumstances. However, it seems not. if (total_steps_so_far_for_cc_check >= check_cc_after_these_steps): if (cc_after_small_MD > best_cc_so_far): write_this = "current_cc (" + str( cc_after_small_MD) + ") > best_cc_so_far (" + str( best_cc_so_far ) + "). \nTherefore, cryo_fit2 will run longer MD.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) write_this = "cc_after_small_MD - best_cc_so_far = " + str( float_to_str(cc_after_small_MD - best_cc_so_far) ) + "\n" # this "\n" is essential for screen print print('%s' % (write_this)) self.logfile.write(str(write_this)) if ( float(cc_after_small_MD - best_cc_so_far) > cc_improvement_threshold ): # without this if clause, later MD cycles that improve just tiny fractions of cc take too long time write_this = "cc_after_small_MD - best_cc_so_far > cc_improvement_threshold (" + str( float_to_str(cc_improvement_threshold) ) + "). Iterates longer.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) best_cc_so_far = cc_after_small_MD cc_1st_array = [] # reset cc_2nd_array = [] # reset total_steps_so_far_for_cc_check = 0 # reset continue else: write_this = "cc_after_small_MD - best_cc_so_far <= " + str( float_to_str(cc_improvement_threshold) ) + ". Goes to mean_array_comparison.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) else: write_this = "current_cc (" + str( cc_after_small_MD) + ") <= best_cc_so_far (" + str( best_cc_so_far) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (np.mean(cc_2nd_array) > np.mean(cc_1st_array)): write_this = "mean of cc_2nd_array (" + str( np.mean(cc_2nd_array) ) + ") > mean of cc_1st_array (" + str( np.mean(cc_1st_array)) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) write_this = "(mean of cc_2nd_array) - (mean of cc_1st_array): " + str( np.mean(cc_2nd_array) - np.mean(cc_1st_array)) + "\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if ( (np.mean(cc_2nd_array) - np.mean(cc_1st_array)) > cc_improvement_threshold ): # without this if clause, later MD cycles that improve just tiny fractions of cc take too long time cc_1st_array = [] # reset cc_2nd_array = [] # reset total_steps_so_far_for_cc_check = 0 # reset else: write_this = "cc values are saturated\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (self.params.explore == True ): # no need to report cc after explore write_this = "total_steps_so_far_for_exploration_and_final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" else: # (self.params.explore = False): # no need to report cc after explore write_this = "total_steps for final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break else: #(np.mean(cc_2nd_array) <= np.mean(cc_1st_array)): write_this = "mean of cc_2nd_array (" + str( np.mean(cc_2nd_array) ) + ") <= mean of cc_1st_array (" + str( np.mean(cc_1st_array)) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) write_this = "cc values are saturated\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (self.params.explore == True ): # no need to report cc after explore write_this = "total_steps_so_far_for_exploration_and_final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" else: # (self.params.explore = False): # no need to report cc after explore write_this = "total_steps for final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break ######################### <end> iterate until cryo_fit2 derived cc saturates overall_cc_after_cryo_fit2 = calculate_overall_cc( map_data=map_data, model=self.model, resolution=self.params.resolution) write_this = "\nFinal MD of cryo_fit2 is done.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (self.params.explore == False ): # no need to report cc after explore ''' since this differs from CC_box, let's not use this (??) write_this = "\nCC_overall after cryo_fit2 (both exploration and final MD): " + str(round(overall_cc_after_cryo_fit2, 4)) + "\n" print('%s' %(write_this)) self.logfile.write(str(write_this)) #''' output_dir_w_CC = str(self.output_dir) + "_cc_" + str( round(overall_cc_after_cryo_fit2, 3)) if os.path.exists(output_dir_w_CC): shutil.rmtree(output_dir_w_CC) os.mkdir(output_dir_w_CC) if (self.params.record_states == True): all_state_file = os.path.join(output_dir_w_CC, "all_states.pdb") states.write(file_name=all_state_file) self.model.set_xray_structure(result.xray_structure) fitted_file_name = model_file_name_only + "_cryo_fit2_fitted.pdb" fitted_file_name_w_path = os.path.join(output_dir_w_CC, fitted_file_name) ##### this is essential to spit cryo_fitted2 file with open(fitted_file_name_w_path, "w") as f: f.write(self.model.model_as_pdb()) f.close() #print_this =''' ######## How to fix map origin problem in cryo_fit2 ####### ''' With 0,0,0 origin map, cryo_fit2 has no problem. However, with non-0,0,0 origin cryo-EM map, cryo_fit2 results cryo_fitted pdb model at "wrong" origin This is because probably dynamics part uses map at 0,0,0 origin. Therefore, cryo_fit2 identifies how much the map origin was moved, then update all xyz coordinates of output pdb file. In user's perspective, there is nothing to bother. All kinds of mrc files (e.g. "Regular", emdb downloaded, went through phenix.map_box, gaussian filtered by UCSF Chimera and went through relion_image_handler) work fine. ############################################################# #print (print_this,"\n") ''' try: bp_num_in_fitted_file, sp_num_in_fitted_file, H_num_in_fitted_file, E_num_in_fitted_file = \ count_bp_sp_H_E_in_fitted_file(fitted_file_name_w_path, output_dir_w_CC, self.logfile) except Exception as ex: write_this = "exception message:" + str(ex) print(write_this) self.logfile.write(str(write_this)) write_this = "(in task_obj loop) An exception occurred in cryo_fit2_run. \n" + \ " Maybe cryo_fit2 failed to run (\"nan\" or secondary_structure_restraint file generataion failure) for this condition:" + \ " cool_rate (" + str(round(params.cool_rate, 1)) + ")\n" + \ " number_of_steps (" + str(params.number_of_steps) + ")\n" + \ " start_temperature (" + str(params.start_temperature) + ")\n" + \ " map_weight_multiply (" + str(map_weight_multiply) + ")\n" + \ " final_temperature (" + str(params.final_temperature) + ")\n" + \ " map_weight (" + str(round(self.params.map_weight,2)) + ")\n" + \ " max_steps_for_final_MD (" + str(max_steps_for_final_MD) + ")" print(write_this) self.logfile.write(str(write_this)) if (os.path.isdir("parameters_exploration/bp_H_E_not_calculated") == False): os.mkdir("parameters_exploration/bp_H_E_not_calculated") command_string = "mv " + str( output_dir_w_CC ) + " parameters_exploration/bp_H_E_not_calculated" logfile.write(str(command_string)) libtbx.easy_run.fully_buffered( command=command_string).raise_if_errors().stdout_lines return output_dir_w_CC returned = know_how_much_map_origin_moved(str(self.map_name)) if (returned != "origin_is_all_zero" and self.params.keep_origin == True): write_this = "Restoring original xyz position for a cryo_fit2 fitted atomistic model\n" print(write_this) self.logfile.write(str(write_this)) return_to_origin_of_pdb_file(fitted_file_name_w_path, returned[0], returned[1], returned[2], returned[3]) if (("tst_cryo_fit2" in fitted_file_name_w_path) == False): calculate_RMSD(self, fitted_file_name_w_path) output_dir_final = output_dir_w_CC + "_bp_" + str(bp_num_in_fitted_file) + "_sp_" + str(sp_num_in_fitted_file) \ + "_H_" + str(H_num_in_fitted_file) + "_E_" + str(E_num_in_fitted_file) if os.path.exists(output_dir_final): shutil.rmtree(output_dir_final) mv_command_string = "mv " + output_dir_w_CC + " " + output_dir_final libtbx.easy_run.fully_buffered(mv_command_string) ############################ current_dir = os.getcwd() os.chdir(output_dir_final) command_string = "echo " + str( map_weight_before_multiplication ) + " >> used_map_weight_before_multiplication.txt" libtbx.easy_run.fully_buffered( command=command_string).raise_if_errors().stdout_lines os.chdir(current_dir) ############################ return output_dir_final
def run(args, prefix="tst_00", validated=False): user_input_pdb = '' user_input_map = '' # very simple parsing of model and map for i, arg in enumerate(args): if arg.endswith('.cif') or arg.endswith('.ent') or arg.endswith('.pdb'): # EMD-3981 has 6exv.ent instead of .pdb user_input_pdb = arg if arg.find('=')==-1: args[i]='model=%s' % arg elif arg.endswith('.ccp4') or arg.endswith('.map'): user_input_map = arg if arg.find('=')==-1: args[i]='map=%s' % arg argument_interpreter = libtbx.phil.command_line.argument_interpreter( master_phil=master_phil, home_scope="cryo_fit2", ) user_input_pdb = clean_pdb_for_phenix(user_input_pdb) pdbs = [] maps = [] phils = [] phil_args = [] for arg in args: if os.path.isfile(arg) : if iotbx.pdb.is_pdb_file(arg): pdbs.append(arg) elif arg.endswith('.ccp4') or arg.endswith('.map'): # not the smartest maps.append(arg) else: try : file_phil = phil.parse(file_name=arg) except RuntimeError : pass else : phils.append(file_phil) else : phil_args.append(arg) phils.append(argument_interpreter.process(arg)) working_phil = master_phil.fetch(sources=phils) working_phil.show() working_params = working_phil.extract() if (not validated): validate_params(working_params) # Compute a target map from iotbx import ccp4_map ccp4_map = ccp4_map.map_reader(user_input_map) print('Map read from %s' %(user_input_map)) target_map_data = ccp4_map.map_data() # initial atomic model that we want to fit to an EM-map pdb_inp = iotbx.pdb.input(file_name=user_input_pdb) model = mmtbx.model.manager(model_input = pdb_inp) # Initialize states accumulator states = mmtbx.utils.states( pdb_hierarchy = model.get_hierarchy(), xray_structure = model.get_xray_structure()) states.add(sites_cart = model.get_xray_structure().sites_cart()) # params = sa.master_params().extract() params.start_temperature=2000 params.final_temperature=0 params.cool_rate = 100 params.number_of_steps = 1000 params.update_grads_shift = 0. params.interleave_minimization=False #Pavel will fix the error that occur when params.interleave_minimization=True print('CC: %s' %(calculate_cc(map_data=target_map_data, model=model, resolution=3.))) #STOP() result = sa.run( params = params, xray_structure = model.get_xray_structure(), restraints_manager = model.get_restraints_manager(), target_map = target_map_data, real_space = True, wx = 100, # wx=5 broke helix conformation of tst_00_poor.pdb, wx=100 kept helix well wc = 1, states_collector = states) states.write(file_name = "all_states.pdb") model.set_xray_structure(result.xray_structure) with open("refined.pdb", "w") as f: f.write(model.model_as_pdb())
def exercise_2(d_min = 1.5): random.seed(2679941) flex.set_random_seed(2679941) for shake in [True, False]: pi = get_pdb_inputs(pdb_str=pdb_str_1) f_obs = abs(pi.xrs.structure_factors(d_min = d_min).f_calc()) r_free_flags = f_obs.generate_r_free_flags(use_lattice_symmetry=False) xrs_poor = pi.xrs.deep_copy_scatterers() if(shake): xrs_poor = shake_sites(xrs = pi.xrs.deep_copy_scatterers(), random=False, shift = 2.0, grm=pi.grm) fmodel = mmtbx.f_model.manager( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xrs_poor) print "start r_work:", fmodel.r_work() # f_calc = pi.xrs.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=0.25) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() # find optimal weight rsr_simple_refiner = mmtbx.refinement.real_space.individual_sites.simple( target_map = target_map, selection = flex.bool(pi.xrs.scatterers().size(), True), real_space_gradients_delta = d_min/4, max_iterations = 150, geometry_restraints_manager = pi.grm.geometry) refined = mmtbx.refinement.real_space.individual_sites.refinery( refiner = rsr_simple_refiner, xray_structure = xrs_poor.deep_copy_scatterers(), start_trial_weight_value = 1, rms_bonds_limit = 0.02, rms_angles_limit = 2) print refined.weight_final, refined.rms_bonds_final, refined.rms_angles_final # params = sa.master_params().extract() params.start_temperature=5000 params.final_temperature=0 params.cool_rate = 100 params.number_of_steps = 100 params.update_grads_shift = 0. # does not change runtime visibly # sa.run( params = params, fmodel = fmodel, real_space = True, target_map = target_map, restraints_manager = pi.grm, wx = refined.weight_final, wc = 1., verbose = True) # r = fmodel.r_work() print "final r_work:", r if(shake): assert r < 0.07, r else: assert r < 0.07, r dist = flex.mean(flex.sqrt((pi.xrs.sites_cart() - fmodel.xray_structure.sites_cart()).dot())) print "Distance(refined, answer): %6.4f"%dist if(shake): assert dist < 0.35, r else: assert dist < 0.06, r if(0): pi.ph.adopt_xray_structure(fmodel.xray_structure) pi.ph.write_pdb_file(file_name="refined.pdb", crystal_symmetry = fmodel.xray_structure.crystal_symmetry())