def run( params=None, # params for running from command line map_data=None, # map_data, as_double() pdb_inp=None, pdb_hierarchy=None, crystal_symmetry=None, resolution=None, scattering_table='n_gaussian', smoothing_window=5, crossover_atom='CA', minimum_matching_atoms=3, minimum_length=2, dist_max=1.0, minimum_improvement=0.01, max_regions_to_test=10, max_ends_per_region=5, maximum_fraction=0.5, max_keep=10, map_coeffs_file=None,map_coeffs_labels=None, pdb_in_file=None, pdb_out=None, verbose=None, out=sys.stdout): if out is None: out=sys.stdout # explode and refine calls it this way # get info from params if present if params: verbose=params.control.verbose map_coeffs_file=params.input_files.map_coeffs_file map_coeffs_labels=params.input_files.map_coeffs_labels pdb_in_file=params.input_files.pdb_in_file resolution=params.crystal_info.resolution scattering_table=params.crystal_info.scattering_table smoothing_window=params.crossover.smoothing_window crossover_atom=params.crossover.crossover_atom minimum_matching_atoms=params.crossover.minimum_matching_atoms minimum_length=params.crossover.minimum_length dist_max=params.crossover.dist_max minimum_improvement=params.crossover.minimum_improvement max_regions_to_test=params.crossover.max_regions_to_test max_ends_per_region=params.crossover.max_ends_per_region maximum_fraction=params.crossover.maximum_fraction max_keep=params.crossover.max_keep pdb_out=params.output_files.pdb_out # Consistency checks if(pdb_hierarchy is not None): assert pdb_in_file is None assert pdb_inp is None assert crystal_symmetry is not None # XXX more checks here! # Get map_data if not present if not map_data: if not map_coeffs_file or not os.path.isfile(map_coeffs_file): raise Sorry("Cannot find the map_coeffs_file '%s'" %( str(map_coeffs_file))) from mmtbx.building.minimize_chain import get_map_coeffs map_coeffs=get_map_coeffs(map_coeffs_file, map_coeffs_labels=map_coeffs_labels) fft_map = map_coeffs.fft_map(resolution_factor = 0.25) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() map_data=map_data.as_double() if map_coeffs and not crystal_symmetry: crystal_symmetry=map_coeffs.crystal_symmetry() if map_coeffs and not resolution: resolution=map_coeffs.d_min() # Get the starting model if(pdb_hierarchy is None): if pdb_inp is None: if not pdb_in_file or not os.path.isfile(pdb_in_file): raise Sorry("Cannot read input PDB file '%s'" %( str(pdb_in_file))) else: print("Taking models from %s" %(pdb_in_file), file=out) pdb_string=open(pdb_in_file).read() pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string) if pdb_inp is None: raise Sorry("Need a model or models") if not crystal_symmetry: crystal_symmetry=pdb_inp.crystal_symmetry() assert crystal_symmetry is not None hierarchy = pdb_inp.construct_hierarchy() else: hierarchy = pdb_hierarchy # XXX FIXME n_models=0 for model in hierarchy.models(): n_models+=1 if n_models==1: # nothing to do return hierarchy #xrs = pdb_inp.xray_structure_simple(crystal_symmetry=crystal_symmetry) xrs = hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) xrs.scattering_type_registry(table=scattering_table) if not resolution: from cctbx import maptbx resolution=maptbx.resolution_from_map_and_model.run( map_data=map_data, xray_structure=xrs).d_min if(resolution is None): raise Sorry("Resolution is required") print("\nResolution limit: %7.2f" %(resolution), file=out) print("\nSummary of input models", file=out) xrs.show_summary(f=out, prefix=" ") print("\nReady with %d models and map" %(n_models), file=out) # Get CC by residue for each model and map chain_id_and_resseq_list=[] # Instead set up chain_id and resseq (range) from mmtbx.secondary_structure.find_ss_from_ca import \ split_model,get_first_resno, get_last_resno,get_chain_id model_list=split_model(hierarchy=hierarchy,only_first_model=True) for m in model_list: h=m.hierarchy first_resno=get_first_resno(h) last_resno=get_last_resno(h) chain_id=get_chain_id(h) residue_range=[first_resno,last_resno] chain_id_and_resseq=[chain_id,residue_range] if not chain_id_and_resseq in chain_id_and_resseq_list: chain_id_and_resseq_list.append(chain_id_and_resseq) # Run through chains separately # NOTE: All models of each chain must match exactly # Save composite model, chain by chain composite_model_stream=StringIO() for chain_id_and_resseq in chain_id_and_resseq_list: f=StringIO() chain_id,[start_resno,end_resno]=chain_id_and_resseq atom_selection=get_atom_selection(chain_id=chain_id, start_resno=start_resno,end_resno=end_resno) asc=hierarchy.atom_selection_cache() sel=asc.selection(string = atom_selection) sel_hierarchy=hierarchy.select(sel) pdb_inp=sel_hierarchy.as_pdb_input(crystal_symmetry=crystal_symmetry) ph=pdb_inp.construct_hierarchy() print("\nWorking on chain_id='%s' resseq %d:%d\n" %( chain_id_and_resseq[0],chain_id_and_resseq[1][0],chain_id_and_resseq[1][1]), file=out) # get CC values for all residues cc_dict=get_cc_dict(hierarchy=ph,map_data=map_data,d_min=resolution, crystal_symmetry=crystal_symmetry, table=scattering_table,out=out) # smooth CC values with window of smoothing_window smoothed_cc_dict=smooth_cc_values(cc_dict=cc_dict, smoothing_window=smoothing_window, verbose=verbose,out=out) # figure out all the places where crossover can occur. # FIXME: order of keys changes in py2/3 vthis could be bad n_residues=cc_dict[list(cc_dict.keys())[0]].size() crossover_dict=get_crossover_dict( n_residues=n_residues, hierarchy=ph, crossover_atom=crossover_atom, dist_max=dist_max, minimum_matching_atoms=minimum_matching_atoms, verbose=verbose,out=out) # Now we are ready to identify the best composite model... # A composite has reside 0 from model x, residue 1 from model y etc. # Each change from model a to model b between residues i and i+1 must have # a crossover between a and b at either residue i or i+1 keys=list(cc_dict.keys()) keys.sort() sorted_working_model_list=[] for key in keys: working_model=model_object(source_id=key, cc_dict=cc_dict, smoothed_cc_dict=smoothed_cc_dict, crossover_dict=crossover_dict, minimum_length=minimum_length, minimum_improvement=minimum_improvement, max_regions_to_test=max_regions_to_test, max_ends_per_region=max_ends_per_region, maximum_fraction=maximum_fraction) if verbose: working_model.show_summary(out=out) sorted_working_model_list.append( [working_model.get_score(),working_model]) sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list=\ sorted_working_model_list[:max_keep] working_model_list=[] for s,m in sorted_working_model_list: working_model_list.append(m) # Go through all the working models and cross them with other models to # optimize...Then take all the best and cross... best_score,best_model=sorted_working_model_list[0] found=True cycle=0 while found: cycle+=1 print("\nCYCLE %d current best is %7.3f\n" %( cycle,best_model.get_score()), file=out) found=False sorted_working_model_list=[] new_best=best_model id=0 for working_model in working_model_list: id+=1 others=[] for m in working_model_list: if not working_model==m: others.append(m) new_working_model=working_model.optimize_with_others(others=others) if not new_working_model: print() continue aa=[new_working_model.get_score(),new_working_model] if not aa in sorted_working_model_list: sorted_working_model_list.append(aa) if not sorted_working_model_list: break # nothing to do sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list=sorted_working_model_list[:max_keep] new_working_score,new_working_model=sorted_working_model_list[0] if new_working_score>best_model.get_score(): best_model=new_working_model found=True if verbose: print("NEW BEST SCORE: %7.2f" %(best_model.get_score()), file=out) best_model.show_summary(out=out) print("\nDONE... best is %7.3f\n" %( best_model.get_score()), file=out) # Create composite of this chain # Note residue values. We are going to pick each residue from one of # the models for model in ph.models(): for chain in model.chains(): if chain.id != chain_id: continue residue_list=[] for rg in chain.residue_groups(): residue_list.append(rg.resseq) residue_list.sort() assert len(best_model.source_list)==len(residue_list) for i in range(len(residue_list)): atom_selection=get_atom_selection(model_id=best_model.source_list[i], resseq_sel=residue_list[i]) asc=ph.atom_selection_cache() sel=asc.selection(string = atom_selection) sel_hierarchy=ph.select(sel) print(remove_ter(sel_hierarchy.as_pdb_string()), file=composite_model_stream) # All done, make a new pdb_hierarchy pdb_string=composite_model_stream.getvalue() pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string) pdb_hierarchy=pdb_inp.construct_hierarchy() if pdb_out: f=open(pdb_out,'w') print(pdb_hierarchy.as_pdb_string(crystal_symmetry=crystal_symmetry), file=f) print("Final model is in: %s\n" %(f.name)) f.close() return pdb_hierarchy
def select_segments_that_match(params=None, chain_hierarchy=None, target_hierarchy=None, out=sys.stdout, ncs_obj=None, target_file=None, # model chain_file=None, # query crystal_symmetry=None, max_dist=None, quiet=None, verbose=None, use_crystal_symmetry=None, chain_type=None, target_length_from_matching_chains=None, distance_per_site=None, min_similarity=None): # Identify all the segments in chain_hierarchy that match target_hierarchy # and write them out from mmtbx.secondary_structure.find_ss_from_ca import split_model,model_info,\ merge_hierarchies_from_models chain_model=model_info(hierarchy=chain_hierarchy) if params.crystal_info.chain_type=="PROTEIN": distance_cutoff=5. else: distance_cutoff=15. chain_models=split_model(model=chain_model,distance_cutoff=distance_cutoff) print >>out,"Analyzing %s segments and identifying " %(len(chain_models)) +\ " those with "+\ "chain_type=%s and match percentage between %.1f %% and %.1f %% " %( params.crystal_info.chain_type, params.comparison.minimum_percent_match_to_select, params.comparison.maximum_percent_match_to_select) local_params=deepcopy(params) local_params.output_files.match_pdb_file=None # required models_to_keep=[] write_header=True for cm in chain_models: # one segment rv_list=[] file_list=[] rv=run( params=local_params, ncs_obj=ncs_obj, target_hierarchy=target_hierarchy, quiet=True, chain_hierarchy=cm.hierarchy,out=null_out(), crystal_symmetry=crystal_symmetry, max_dist=max_dist, verbose=verbose, use_crystal_symmetry=use_crystal_symmetry, chain_type=chain_type, target_length_from_matching_chains=target_length_from_matching_chains, distance_per_site=distance_per_site, min_similarity=min_similarity, ) rv_list.append(rv) file_list.append(params.crystal_info.chain_type) close_rmsd,close_n=rv.get_values('close') far_away_rmsd,far_away_n=rv.get_values('far_away') if close_n+far_away_n<1: continue # wrong chain type or other failure percent_matched=100.*close_n/max(1,close_n+far_away_n) if percent_matched < params.comparison.minimum_percent_match_to_select: continue if percent_matched > params.comparison.maximum_percent_match_to_select: continue write_summary(params=params,file_list=file_list,rv_list=rv_list, write_header=write_header,out=out) write_header=False models_to_keep.append(cm) new_model=merge_hierarchies_from_models(models=models_to_keep,resid_offset=5) ff=open(params.output_files.match_pdb_file,'w') print >>ff,new_model.hierarchy.as_pdb_string() ff.close() print >>out,"Wrote %s %s chains with %s residues to %s" %( len(models_to_keep),params.crystal_info.chain_type, new_model.hierarchy.overall_counts().n_residues, params.output_files.match_pdb_file) return new_model
def process_predicted_model(model, params, pae_matrix=None, log=sys.stdout): """ process_predicted_model: Purpose: Convert values in B-value field to pseudo-B-values, remove low_confidence residues, optionally split into compact regions. Rationale: predicted models may have regions of low and high confidence. This routine uses values in the B-value field to identify confidence, removes low-confidence regions, and then examines the remaining model to find regions that are compact (residues have high contact with neighbors) and that are separate from other regions (low contact with neigbors). Inputs (supplied as model and a params object): model: iotbx.model.model object containing model information. Normally contains a single chain. If multiple chains, process each separately. b_value_field_is: 'lddt' or 'rmsd' or 'b_value'. For AlphaFold models the b-value field is a value of LDDT (confidence) on scale of 0-1 or 0-100 For RoseTTAFold, the B-value field is rmsd (A) If b_value... it is left as is. input_lddt_is_fractional: if True, input lddt is scale of 0 to 1, otherwise 0 - 100 If None, set to True if all lddt are from 0 to 1 remove_low_confidence_residues: remove residues with low confidence (lddt or rmsd as set below) minimum_lddt: minimum lddt to keep residues (on same scale as b_value_field, if not set, calculated from maximum_rmsd). maximum_rmsd: alternative specification of minimum confidence based on rmsd. If not set, calculated from minimum_lddt. default_maximum_rmsd: used as default if nothing specified for maximum_rmsd or minimum_lddt .Default is 1.5 A, split_model_by_compact_regions: split resulting model into compact regions pae_matrix: matrix of predicted aligned errors (e.g., from AlphaFold2), NxN matrix of RMSD values, N = number of residues in model. Alternative to splitting by compact regions. Split to minimize predicted aligned errors in each grouping. pae_power (default=1): each edge in the graph will be weighted proportional to (1/pae**pae_power) pae_cutoff (optional, default=5): graph edges will only be created for residue pairs with pae<pae_cutoff domain_size: typical size of domains (resolution used for filtering is the domain size) minimum_domain_length: minimum length (residues) of a domain to keep maximum_domains: if more than this many domains, merge close ones to reduce number chain_id: if model contains more than one chain, split this chain only. NOTE: only one chain can be processed at a time. if subtract_minimum_b is set, subtract minimum(B values) from all B values after applying any B value cutoffs Output: processed_model_info: group_args object containing: processed_model: single model with regions identified in chainid field How to get the parameters object set up: You can set up a parameters object like this (see example at end of this file as well: master_phil = iotbx.phil.parse(master_phil_str) params = master_phil.extract() The default values are set in the master_phil_str string above. You can then set values of params: params.process_predicted_model.split_model_by_compact_regions = True """ # Make sure we have what we expect: import mmtbx.model assert isinstance(model, mmtbx.model.manager) # Decide what to do p = params.process_predicted_model # Determine if input lddt is fractional and get b values b_value_field = model.get_hierarchy().atoms().extract_b() if p.b_value_field_is == 'lddt': if p.input_lddt_is_fractional is None: sel = (b_value_field < 0) | (b_value_field > 1) p.input_lddt_is_fractional = (sel.count(True) == 0) b_values = get_b_values_from_lddt( b_value_field, input_lddt_is_fractional=p.input_lddt_is_fractional) if p.input_lddt_is_fractional: print("B-value field interpreted as LDDT %s" % ("(0 - 1)"), file=log) else: print("B-value field interpreted as LDDT %s" % ("(0 - 100)"), file=log) elif p.b_value_field_is == 'rmsd': b_values = get_b_values_rmsd(b_value_field) print("B-value field interpreted as rmsd %s" % ("(0 - 1)"), file=log) elif p.b_value_field_is == 'b_value': b_values = b_value_field print("B-value field interpreted as b_values", file=log) else: raise AssertionError( "Please set b_value_field_is to either lddt or rmsd") if (not p.input_lddt_is_fractional): if p.minimum_lddt is not None: # convert to fractional p.minimum_lddt = p.minimum_lddt * 0.01 print("Minimum LDDT converted to %.2f" % (p.minimum_lddt), file=log) # From here on we work only with fractional lddt # Get confidence cutoff if needed if p.remove_low_confidence_residues: maximum_b_value = get_cutoff_b_value( p.maximum_rmsd, p.minimum_lddt, default_maximum_rmsd=p.default_maximum_rmsd, log=log) else: maximum_b_value = None # Offset b-values and cutoff if requested if p.subtract_minimum_b: minimum_b = b_values.min_max_mean().min b_values -= minimum_b assert b_values.min_max_mean().min == 0 if maximum_b_value is not None: maximum_b_value -= minimum_b # offset this too print("Subtracting minimum B of " + "%.2f from values and from cutoff (now %s)" % (minimum_b, " %.2f" % maximum_b_value if maximum_b_value is not None else "None"), file=log) # Make a new model with new B-values ph = model.get_hierarchy().deep_copy() ph.atoms().set_b(b_values) # Remove low_confidence regions if desired if p.remove_low_confidence_residues: n_before = ph.overall_counts().n_residues selection_string = " (bfactor < %s)" % maximum_b_value asc1 = ph.atom_selection_cache() sel = asc1.selection(selection_string) new_ph = ph.select(sel) n_after = new_ph.overall_counts().n_residues print("Total of %s of %s residues kept after B-factor filtering" % (n_after, n_before), file=log) if n_after == 0: raise Sorry( "No residues remaining after filtering...please check if " + "B-value field is really '%s'" % (p.b_value_field_is)) removed_ph = ph.select(~sel) from mmtbx.secondary_structure.find_ss_from_ca import model_info, \ split_model from iotbx.bioinformatics import get_sequence_from_hierarchy remainder_sequence_str = "" for m in split_model(model_info(removed_ph)): seq = get_sequence_from_hierarchy(m.hierarchy) if len(seq) >= p.minimum_remainder_sequence_length: remainder_sequence_str += "\n> fragment sequence " remainder_sequence_str += "\n%s\n" % ( get_sequence_from_hierarchy(m.hierarchy)) else: remainder_sequence_str = None # Get a new model new_model = model.as_map_model_manager().model_from_hierarchy( ph, return_as_model=True) # Get high-confidence regions as domains if desired: if p.split_model_by_compact_regions: # Make sure we have just 1 chain or a chain ID supplied chain_id = get_chain_id(model, None, log=log) if pae_matrix is not None: # use pae matrix method info = split_model_with_pae( model, new_model, pae_matrix, maximum_domains=p.maximum_domains, pae_power=p.pae_power, pae_cutoff=p.pae_cutoff, pae_graph_resolution=p.pae_graph_resolution, minimum_domain_length=p.minimum_domain_length, log=log) else: # usual info = split_model_into_compact_units( new_model, d_min=p.domain_size, maximum_domains=p.maximum_domains, minimum_domain_length=p.minimum_domain_length, log=log) if info is None: print("No compact regions identified", file=log) chainid_list = [] model_list = [] else: new_model = info.model chainid_list = info.chainid_list print("Total of %s regions identified" % (len(chainid_list)), file=log) model_list = split_model_by_chainid(new_model, chainid_list) else: model_list = [] chainid_list = [] return group_args( group_args_type='processed predicted model', model=new_model, model_list=model_list, chainid_list=chainid_list, remainder_sequence_str=remainder_sequence_str, )
def run( params=None, # params for running from command line map_data=None, # map_data, as_double() pdb_inp=None, pdb_hierarchy=None, crystal_symmetry=None, resolution=None, scattering_table='n_gaussian', smoothing_window=5, crossover_atom='CA', minimum_matching_atoms=3, minimum_length=2, dist_max=1.0, minimum_improvement=0.01, max_regions_to_test=10, max_ends_per_region=5, maximum_fraction=0.5, max_keep=10, map_coeffs_file=None,map_coeffs_labels=None, pdb_in_file=None, pdb_out=None, verbose=None, out=sys.stdout): if out is None: out=sys.stdout # explode and refine calls it this way # get info from params if present if params: verbose=params.control.verbose map_coeffs_file=params.input_files.map_coeffs_file map_coeffs_labels=params.input_files.map_coeffs_labels pdb_in_file=params.input_files.pdb_in_file resolution=params.crystal_info.resolution scattering_table=params.crystal_info.scattering_table smoothing_window=params.crossover.smoothing_window crossover_atom=params.crossover.crossover_atom minimum_matching_atoms=params.crossover.minimum_matching_atoms minimum_length=params.crossover.minimum_length dist_max=params.crossover.dist_max minimum_improvement=params.crossover.minimum_improvement max_regions_to_test=params.crossover.max_regions_to_test max_ends_per_region=params.crossover.max_ends_per_region maximum_fraction=params.crossover.maximum_fraction max_keep=params.crossover.max_keep pdb_out=params.output_files.pdb_out # Consistency checks if(pdb_hierarchy is not None): assert pdb_in_file is None assert pdb_inp is None assert crystal_symmetry is not None # XXX more checks here! # Get map_data if not present if not map_data: if not map_coeffs_file or not os.path.isfile(map_coeffs_file): raise Sorry("Cannot find the map_coeffs_file '%s'" %( str(map_coeffs_file))) from mmtbx.building.minimize_chain import get_map_coeffs map_coeffs=get_map_coeffs(map_coeffs_file, map_coeffs_labels=map_coeffs_labels) fft_map = map_coeffs.fft_map(resolution_factor = 0.25) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() map_data=map_data.as_double() if map_coeffs and not crystal_symmetry: crystal_symmetry=map_coeffs.crystal_symmetry() if map_coeffs and not resolution: resolution=map_coeffs.d_min() # Get the starting model if(pdb_hierarchy is None): if pdb_inp is None: if not pdb_in_file or not os.path.isfile(pdb_in_file): raise Sorry("Cannot read input PDB file '%s'" %( str(pdb_in_file))) else: print >>out,"Taking models from %s" %(pdb_in_file) pdb_string=open(pdb_in_file).read() pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string) if pdb_inp is None: raise Sorry("Need a model or models") if not crystal_symmetry: crystal_symmetry=pdb_inp.crystal_symmetry() assert crystal_symmetry is not None hierarchy = pdb_inp.construct_hierarchy() else: hierarchy = pdb_hierarchy # XXX FIXME n_models=0 for model in hierarchy.models(): n_models+=1 if n_models==1: # nothing to do return hierarchy #xrs = pdb_inp.xray_structure_simple(crystal_symmetry=crystal_symmetry) xrs = hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) xrs.scattering_type_registry(table=scattering_table) if not resolution: from cctbx import maptbx resolution=maptbx.resolution_from_map_and_model( map_data=map_data, xray_structure=xrs) print >>out,"\nResolution limit: %7.2f" %(resolution) print >>out,"\nSummary of input models" xrs.show_summary(f=out, prefix=" ") print >>out, "\nReady with %d models and map" %(n_models) # Get CC by residue for each model and map chain_id_and_resseq_list=[] # Instead set up chain_id and resseq (range) from mmtbx.secondary_structure.find_ss_from_ca import \ split_model,get_first_resno, get_last_resno,get_chain_id model_list=split_model(hierarchy=hierarchy,only_first_model=True) for m in model_list: h=m.hierarchy first_resno=get_first_resno(h) last_resno=get_last_resno(h) chain_id=get_chain_id(h) residue_range=[first_resno,last_resno] chain_id_and_resseq=[chain_id,residue_range] if not chain_id_and_resseq in chain_id_and_resseq_list: chain_id_and_resseq_list.append(chain_id_and_resseq) # Run through chains separately # NOTE: All models of each chain must match exactly # Save composite model, chain by chain from cStringIO import StringIO composite_model_stream=StringIO() for chain_id_and_resseq in chain_id_and_resseq_list: from cStringIO import StringIO f=StringIO() chain_id,[start_resno,end_resno]=chain_id_and_resseq atom_selection=get_atom_selection(chain_id=chain_id, start_resno=start_resno,end_resno=end_resno) asc=hierarchy.atom_selection_cache() sel=asc.selection(string = atom_selection) sel_hierarchy=hierarchy.select(sel) pdb_inp=sel_hierarchy.as_pdb_input(crystal_symmetry=crystal_symmetry) ph=pdb_inp.construct_hierarchy() print >>out,"\nWorking on chain_id='%s' resseq %d:%d\n" %( chain_id_and_resseq[0],chain_id_and_resseq[1][0],chain_id_and_resseq[1][1]) # get CC values for all residues cc_dict=get_cc_dict(hierarchy=ph,map_data=map_data,d_min=resolution, crystal_symmetry=crystal_symmetry, table=scattering_table,out=out) # smooth CC values with window of smoothing_window smoothed_cc_dict=smooth_cc_values(cc_dict=cc_dict, smoothing_window=smoothing_window, verbose=verbose,out=out) # figure out all the places where crossover can occur. n_residues=cc_dict[cc_dict.keys()[0]].size() crossover_dict=get_crossover_dict( n_residues=n_residues, hierarchy=ph, crossover_atom=crossover_atom, dist_max=dist_max, minimum_matching_atoms=minimum_matching_atoms, verbose=verbose,out=out) # Now we are ready to identify the best composite model... # A composite has reside 0 from model x, residue 1 from model y etc. # Each change from model a to model b between residues i and i+1 must have # a crossover between a and b at either residue i or i+1 keys=cc_dict.keys() keys.sort() sorted_working_model_list=[] for key in keys: working_model=model_object(source_id=key, cc_dict=cc_dict, smoothed_cc_dict=smoothed_cc_dict, crossover_dict=crossover_dict, minimum_length=minimum_length, minimum_improvement=minimum_improvement, max_regions_to_test=max_regions_to_test, max_ends_per_region=max_ends_per_region, maximum_fraction=maximum_fraction) if verbose: working_model.show_summary(out=out) sorted_working_model_list.append( [working_model.get_score(),working_model]) sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list=\ sorted_working_model_list[:max_keep] working_model_list=[] for s,m in sorted_working_model_list: working_model_list.append(m) # Go through all the working models and cross them with other models to # optimize...Then take all the best and cross... best_score,best_model=sorted_working_model_list[0] found=True cycle=0 while found: cycle+=1 print >>out, "\nCYCLE %d current best is %7.3f\n" %( cycle,best_model.get_score()) found=False sorted_working_model_list=[] new_best=best_model id=0 for working_model in working_model_list: id+=1 others=[] for m in working_model_list: if not working_model==m: others.append(m) new_working_model=working_model.optimize_with_others(others=others) if not new_working_model: print continue aa=[new_working_model.get_score(),new_working_model] if not aa in sorted_working_model_list: sorted_working_model_list.append(aa) if not sorted_working_model_list: break # nothing to do sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list=sorted_working_model_list[:max_keep] new_working_score,new_working_model=sorted_working_model_list[0] if new_working_score>best_model.get_score(): best_model=new_working_model found=True if verbose: print >>out,"NEW BEST SCORE: %7.2f" %(best_model.get_score()) best_model.show_summary(out=out) print >>out, "\nDONE... best is %7.3f\n" %( best_model.get_score()) # Create composite of this chain # Note residue values. We are going to pick each residue from one of # the models for model in ph.models(): for chain in model.chains(): if chain.id != chain_id: continue residue_list=[] for rg in chain.residue_groups(): residue_list.append(rg.resseq) residue_list.sort() assert len(best_model.source_list)==len(residue_list) for i in xrange(len(residue_list)): atom_selection=get_atom_selection(model_id=best_model.source_list[i], resseq_sel=residue_list[i]) asc=ph.atom_selection_cache() sel=asc.selection(string = atom_selection) sel_hierarchy=ph.select(sel) print >>composite_model_stream,remove_ter(sel_hierarchy.as_pdb_string()) # All done, make a new pdb_hierarchy pdb_string=composite_model_stream.getvalue() pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string) pdb_hierarchy=pdb_inp.construct_hierarchy() if pdb_out: f=open(pdb_out,'w') print >>f,pdb_hierarchy.as_pdb_string(crystal_symmetry=crystal_symmetry) print "Final model is in: %s\n" %(f.name) f.close() return pdb_hierarchy