def run(args, log=sys.stdout): print >> log, "-"*79 print >> log, legend print >> log, "-"*79 inputs = mmtbx.utils.process_command_line_args(args = args, master_params = master_params()) params = inputs.params.extract() # model broadcast(m="Input PDB:", log=log) file_names = inputs.pdb_file_names if(len(file_names) != 1): raise Sorry("PDB file has to given.") pi = iotbx.pdb.input(file_name = file_names[0]) h = pi.construct_hierarchy() xrs = pi.xray_structure_simple(crystal_symmetry=inputs.crystal_symmetry) xrs.scattering_type_registry(table = params.scattering_table) xrs.show_summary(f=log, prefix=" ") # map broadcast(m="Input map:", log=log) if(inputs.ccp4_map is None): raise Sorry("Map file has to given.") inputs.ccp4_map.show_summary(prefix=" ") map_data = inputs.ccp4_map.map_data() # estimate resolution d_min = params.resolution broadcast(m="Map resolution:", log=log) if(d_min is None): d_min = maptbx.resolution_from_map_and_model( map_data=map_data, xray_structure=xrs) print >> log, " d_min: %6.4f"%d_min # various CC cc_calculator = mmtbx.maps.correlation.from_map_and_xray_structure_or_fmodel( xray_structure = xrs, map_data = map_data, d_min = d_min) broadcast(m="Map-model CC:", log=log) print >> log, "Overall:" # entire box print >> log, " box: %6.4f"%cc_calculator.cc() # all atoms print >> log, "around atoms: %6.4f"%cc_calculator.cc( selection=flex.bool(xrs.scatterers().size(),True)) # per chain print >> log, "Per chain:" for chain in h.chains(): print >> log, " chain %s: %6.4f"%(chain.id, cc_calculator.cc( selection=chain.atoms().extract_i_seq())) # per residue print >> log, "Per residue:" for rg in h.residue_groups(): cc = cc_calculator.cc(selection=rg.atoms().extract_i_seq()) print >> log, " chain id: %s resid %s: %6.4f"%( rg.parent().id, rg.resid(), cc)
def run( params=None, # params for running from command line map_data=None, # map_data, as_double() pdb_inp=None, pdb_hierarchy=None, crystal_symmetry=None, resolution=None, scattering_table='n_gaussian', smoothing_window=5, crossover_atom='CA', minimum_matching_atoms=3, minimum_length=2, dist_max=1.0, minimum_improvement=0.01, max_regions_to_test=10, max_ends_per_region=5, maximum_fraction=0.5, max_keep=10, map_coeffs_file=None,map_coeffs_labels=None, pdb_in_file=None, pdb_out=None, verbose=None, out=sys.stdout): if out is None: out=sys.stdout # explode and refine calls it this way # get info from params if present if params: verbose=params.control.verbose map_coeffs_file=params.input_files.map_coeffs_file map_coeffs_labels=params.input_files.map_coeffs_labels pdb_in_file=params.input_files.pdb_in_file resolution=params.crystal_info.resolution scattering_table=params.crystal_info.scattering_table smoothing_window=params.crossover.smoothing_window crossover_atom=params.crossover.crossover_atom minimum_matching_atoms=params.crossover.minimum_matching_atoms minimum_length=params.crossover.minimum_length dist_max=params.crossover.dist_max minimum_improvement=params.crossover.minimum_improvement max_regions_to_test=params.crossover.max_regions_to_test max_ends_per_region=params.crossover.max_ends_per_region maximum_fraction=params.crossover.maximum_fraction max_keep=params.crossover.max_keep pdb_out=params.output_files.pdb_out # Consistency checks if(pdb_hierarchy is not None): assert pdb_in_file is None assert pdb_inp is None assert crystal_symmetry is not None # XXX more checks here! # Get map_data if not present if not map_data: if not map_coeffs_file or not os.path.isfile(map_coeffs_file): raise Sorry("Cannot find the map_coeffs_file '%s'" %( str(map_coeffs_file))) from mmtbx.building.minimize_chain import get_map_coeffs map_coeffs=get_map_coeffs(map_coeffs_file, map_coeffs_labels=map_coeffs_labels) fft_map = map_coeffs.fft_map(resolution_factor = 0.25) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() map_data=map_data.as_double() if map_coeffs and not crystal_symmetry: crystal_symmetry=map_coeffs.crystal_symmetry() if map_coeffs and not resolution: resolution=map_coeffs.d_min() # Get the starting model if(pdb_hierarchy is None): if pdb_inp is None: if not pdb_in_file or not os.path.isfile(pdb_in_file): raise Sorry("Cannot read input PDB file '%s'" %( str(pdb_in_file))) else: print >>out,"Taking models from %s" %(pdb_in_file) pdb_string=open(pdb_in_file).read() pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string) if pdb_inp is None: raise Sorry("Need a model or models") if not crystal_symmetry: crystal_symmetry=pdb_inp.crystal_symmetry() assert crystal_symmetry is not None hierarchy = pdb_inp.construct_hierarchy() else: hierarchy = pdb_hierarchy # XXX FIXME n_models=0 for model in hierarchy.models(): n_models+=1 if n_models==1: # nothing to do return hierarchy #xrs = pdb_inp.xray_structure_simple(crystal_symmetry=crystal_symmetry) xrs = hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) xrs.scattering_type_registry(table=scattering_table) if not resolution: from cctbx import maptbx resolution=maptbx.resolution_from_map_and_model( map_data=map_data, xray_structure=xrs) print >>out,"\nResolution limit: %7.2f" %(resolution) print >>out,"\nSummary of input models" xrs.show_summary(f=out, prefix=" ") print >>out, "\nReady with %d models and map" %(n_models) # Get CC by residue for each model and map chain_id_and_resseq_list=[] # Instead set up chain_id and resseq (range) from mmtbx.secondary_structure.find_ss_from_ca import \ split_model,get_first_resno, get_last_resno,get_chain_id model_list=split_model(hierarchy=hierarchy,only_first_model=True) for m in model_list: h=m.hierarchy first_resno=get_first_resno(h) last_resno=get_last_resno(h) chain_id=get_chain_id(h) residue_range=[first_resno,last_resno] chain_id_and_resseq=[chain_id,residue_range] if not chain_id_and_resseq in chain_id_and_resseq_list: chain_id_and_resseq_list.append(chain_id_and_resseq) # Run through chains separately # NOTE: All models of each chain must match exactly # Save composite model, chain by chain from cStringIO import StringIO composite_model_stream=StringIO() for chain_id_and_resseq in chain_id_and_resseq_list: from cStringIO import StringIO f=StringIO() chain_id,[start_resno,end_resno]=chain_id_and_resseq atom_selection=get_atom_selection(chain_id=chain_id, start_resno=start_resno,end_resno=end_resno) asc=hierarchy.atom_selection_cache() sel=asc.selection(string = atom_selection) sel_hierarchy=hierarchy.select(sel) pdb_inp=sel_hierarchy.as_pdb_input(crystal_symmetry=crystal_symmetry) ph=pdb_inp.construct_hierarchy() print >>out,"\nWorking on chain_id='%s' resseq %d:%d\n" %( chain_id_and_resseq[0],chain_id_and_resseq[1][0],chain_id_and_resseq[1][1]) # get CC values for all residues cc_dict=get_cc_dict(hierarchy=ph,map_data=map_data,d_min=resolution, crystal_symmetry=crystal_symmetry, table=scattering_table,out=out) # smooth CC values with window of smoothing_window smoothed_cc_dict=smooth_cc_values(cc_dict=cc_dict, smoothing_window=smoothing_window, verbose=verbose,out=out) # figure out all the places where crossover can occur. n_residues=cc_dict[cc_dict.keys()[0]].size() crossover_dict=get_crossover_dict( n_residues=n_residues, hierarchy=ph, crossover_atom=crossover_atom, dist_max=dist_max, minimum_matching_atoms=minimum_matching_atoms, verbose=verbose,out=out) # Now we are ready to identify the best composite model... # A composite has reside 0 from model x, residue 1 from model y etc. # Each change from model a to model b between residues i and i+1 must have # a crossover between a and b at either residue i or i+1 keys=cc_dict.keys() keys.sort() sorted_working_model_list=[] for key in keys: working_model=model_object(source_id=key, cc_dict=cc_dict, smoothed_cc_dict=smoothed_cc_dict, crossover_dict=crossover_dict, minimum_length=minimum_length, minimum_improvement=minimum_improvement, max_regions_to_test=max_regions_to_test, max_ends_per_region=max_ends_per_region, maximum_fraction=maximum_fraction) if verbose: working_model.show_summary(out=out) sorted_working_model_list.append( [working_model.get_score(),working_model]) sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list=\ sorted_working_model_list[:max_keep] working_model_list=[] for s,m in sorted_working_model_list: working_model_list.append(m) # Go through all the working models and cross them with other models to # optimize...Then take all the best and cross... best_score,best_model=sorted_working_model_list[0] found=True cycle=0 while found: cycle+=1 print >>out, "\nCYCLE %d current best is %7.3f\n" %( cycle,best_model.get_score()) found=False sorted_working_model_list=[] new_best=best_model id=0 for working_model in working_model_list: id+=1 others=[] for m in working_model_list: if not working_model==m: others.append(m) new_working_model=working_model.optimize_with_others(others=others) if not new_working_model: print continue aa=[new_working_model.get_score(),new_working_model] if not aa in sorted_working_model_list: sorted_working_model_list.append(aa) if not sorted_working_model_list: break # nothing to do sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list=sorted_working_model_list[:max_keep] new_working_score,new_working_model=sorted_working_model_list[0] if new_working_score>best_model.get_score(): best_model=new_working_model found=True if verbose: print >>out,"NEW BEST SCORE: %7.2f" %(best_model.get_score()) best_model.show_summary(out=out) print >>out, "\nDONE... best is %7.3f\n" %( best_model.get_score()) # Create composite of this chain # Note residue values. We are going to pick each residue from one of # the models for model in ph.models(): for chain in model.chains(): if chain.id != chain_id: continue residue_list=[] for rg in chain.residue_groups(): residue_list.append(rg.resseq) residue_list.sort() assert len(best_model.source_list)==len(residue_list) for i in xrange(len(residue_list)): atom_selection=get_atom_selection(model_id=best_model.source_list[i], resseq_sel=residue_list[i]) asc=ph.atom_selection_cache() sel=asc.selection(string = atom_selection) sel_hierarchy=ph.select(sel) print >>composite_model_stream,remove_ter(sel_hierarchy.as_pdb_string()) # All done, make a new pdb_hierarchy pdb_string=composite_model_stream.getvalue() pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string) pdb_hierarchy=pdb_inp.construct_hierarchy() if pdb_out: f=open(pdb_out,'w') print >>f,pdb_hierarchy.as_pdb_string(crystal_symmetry=crystal_symmetry) print "Final model is in: %s\n" %(f.name) f.close() return pdb_hierarchy
def run(args, log=sys.stdout): print >> log, "-"*79 print >> log, legend print >> log, "-"*79 inputs = mmtbx.utils.process_command_line_args(args = args, master_params = master_params()) params = inputs.params.extract() # model broadcast(m="Input PDB:", log=log) file_names = inputs.pdb_file_names if(len(file_names) != 1): raise Sorry("PDB file has to given.") pi = iotbx.pdb.input(file_name = file_names[0]) h = pi.construct_hierarchy() xrs = pi.xray_structure_simple(crystal_symmetry=inputs.crystal_symmetry) xrs.scattering_type_registry(table = params.scattering_table) xrs.show_summary(f=log, prefix=" ") # map broadcast(m="Input map:", log=log) if(inputs.ccp4_map is None): raise Sorry("Map file has to given.") inputs.ccp4_map.show_summary(prefix=" ") map_data = inputs.ccp4_map.map_data() # shift origin if needed shift_needed = not \ (map_data.focus_size_1d() > 0 and map_data.nd() == 3 and map_data.is_0_based()) if(shift_needed): N = map_data.all() O=map_data.origin() map_data = map_data.shift_origin() # apply same shift to the model a,b,c = xrs.crystal_symmetry().unit_cell().parameters()[:3] sites_cart = xrs.sites_cart() sx,sy,sz = a/N[0]*O[0], b/N[1]*O[1], c/N[2]*O[2] sites_cart_shifted = sites_cart-\ flex.vec3_double(sites_cart.size(), [sx,sy,sz]) xrs.set_sites_cart(sites_cart_shifted) # estimate resolution d_min = params.resolution broadcast(m="Map resolution:", log=log) if(d_min is None): d_min = maptbx.resolution_from_map_and_model( map_data=map_data, xray_structure=xrs) print >> log, " d_min: %6.4f"%d_min # Compute FSC(map, model) broadcast(m="Model-map FSC:", log=log) mmtbx.maps.correlation.fsc_model_map( xray_structure=xrs, map=map_data, d_min=d_min, log=log) # # various CC cc_calculator = mmtbx.maps.correlation.from_map_and_xray_structure_or_fmodel( xray_structure = xrs, map_data = map_data, d_min = d_min) broadcast(m="Map-model CC:", log=log) print >> log, "Overall:" # entire box print >> log, " box: %6.4f"%cc_calculator.cc() # all atoms print >> log, "around atoms: %6.4f"%cc_calculator.cc( selection=flex.bool(xrs.scatterers().size(),True)) # per chain print >> log, "Per chain:" for chain in h.chains(): print >> log, " chain %s: %6.4f"%(chain.id, cc_calculator.cc( selection=chain.atoms().extract_i_seq())) # per residue print >> log, "Per residue:" for rg in h.residue_groups(): cc = cc_calculator.cc(selection=rg.atoms().extract_i_seq()) print >> log, " chain id: %s resid %s: %6.4f"%( rg.parent().id, rg.resid(), cc)
def run( params=None, # params for running from command line map_data=None, # map_data, as_double() pdb_inp=None, pdb_hierarchy=None, crystal_symmetry=None, resolution=None, scattering_table='n_gaussian', smoothing_window=5, crossover_atom='CA', minimum_matching_atoms=3, minimum_length=2, dist_max=1.0, minimum_improvement=0.01, max_regions_to_test=10, max_ends_per_region=5, maximum_fraction=0.5, max_keep=10, map_coeffs_file=None, map_coeffs_labels=None, pdb_in_file=None, pdb_out=None, verbose=None, out=sys.stdout): if out is None: out = sys.stdout # explode and refine calls it this way # get info from params if present if params: verbose = params.control.verbose map_coeffs_file = params.input_files.map_coeffs_file map_coeffs_labels = params.input_files.map_coeffs_labels pdb_in_file = params.input_files.pdb_in_file resolution = params.crystal_info.resolution scattering_table = params.crystal_info.scattering_table smoothing_window = params.crossover.smoothing_window crossover_atom = params.crossover.crossover_atom minimum_matching_atoms = params.crossover.minimum_matching_atoms minimum_length = params.crossover.minimum_length dist_max = params.crossover.dist_max minimum_improvement = params.crossover.minimum_improvement max_regions_to_test = params.crossover.max_regions_to_test max_ends_per_region = params.crossover.max_ends_per_region maximum_fraction = params.crossover.maximum_fraction max_keep = params.crossover.max_keep pdb_out = params.output_files.pdb_out # Consistency checks if (pdb_hierarchy is not None): assert pdb_in_file is None assert pdb_inp is None assert crystal_symmetry is not None # XXX more checks here! # Get map_data if not present if not map_data: if not map_coeffs_file or not os.path.isfile(map_coeffs_file): raise Sorry("Cannot find the map_coeffs_file '%s'" % (str(map_coeffs_file))) from mmtbx.building.minimize_chain import get_map_coeffs map_coeffs = get_map_coeffs(map_coeffs_file, map_coeffs_labels=map_coeffs_labels) fft_map = map_coeffs.fft_map(resolution_factor=0.25) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() map_data = map_data.as_double() if map_coeffs and not crystal_symmetry: crystal_symmetry = map_coeffs.crystal_symmetry() if map_coeffs and not resolution: resolution = map_coeffs.d_min() # Get the starting model if (pdb_hierarchy is None): if pdb_inp is None: if not pdb_in_file or not os.path.isfile(pdb_in_file): raise Sorry("Cannot read input PDB file '%s'" % (str(pdb_in_file))) else: print >> out, "Taking models from %s" % (pdb_in_file) pdb_string = open(pdb_in_file).read() pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_string) if pdb_inp is None: raise Sorry("Need a model or models") if not crystal_symmetry: crystal_symmetry = pdb_inp.crystal_symmetry() assert crystal_symmetry is not None hierarchy = pdb_inp.construct_hierarchy() else: hierarchy = pdb_hierarchy # XXX FIXME n_models = 0 for model in hierarchy.models(): n_models += 1 if n_models == 1: # nothing to do return hierarchy #xrs = pdb_inp.xray_structure_simple(crystal_symmetry=crystal_symmetry) xrs = hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) xrs.scattering_type_registry(table=scattering_table) if not resolution: from cctbx import maptbx resolution = maptbx.resolution_from_map_and_model(map_data=map_data, xray_structure=xrs) print >> out, "\nResolution limit: %7.2f" % (resolution) print >> out, "\nSummary of input models" xrs.show_summary(f=out, prefix=" ") print >> out, "\nReady with %d models and map" % (n_models) # Get CC by residue for each model and map chain_id_and_resseq_list = [] # Instead set up chain_id and resseq (range) from mmtbx.secondary_structure.find_ss_from_ca import \ split_model,get_first_resno, get_last_resno,get_chain_id model_list = split_model(hierarchy=hierarchy, only_first_model=True) for m in model_list: h = m.hierarchy first_resno = get_first_resno(h) last_resno = get_last_resno(h) chain_id = get_chain_id(h) residue_range = [first_resno, last_resno] chain_id_and_resseq = [chain_id, residue_range] if not chain_id_and_resseq in chain_id_and_resseq_list: chain_id_and_resseq_list.append(chain_id_and_resseq) # Run through chains separately # NOTE: All models of each chain must match exactly # Save composite model, chain by chain from cStringIO import StringIO composite_model_stream = StringIO() for chain_id_and_resseq in chain_id_and_resseq_list: from cStringIO import StringIO f = StringIO() chain_id, [start_resno, end_resno] = chain_id_and_resseq atom_selection = get_atom_selection(chain_id=chain_id, start_resno=start_resno, end_resno=end_resno) asc = hierarchy.atom_selection_cache() sel = asc.selection(string=atom_selection) sel_hierarchy = hierarchy.select(sel) pdb_inp = sel_hierarchy.as_pdb_input(crystal_symmetry=crystal_symmetry) ph = pdb_inp.construct_hierarchy() print >> out, "\nWorking on chain_id='%s' resseq %d:%d\n" % ( chain_id_and_resseq[0], chain_id_and_resseq[1][0], chain_id_and_resseq[1][1]) # get CC values for all residues cc_dict = get_cc_dict(hierarchy=ph, map_data=map_data, d_min=resolution, crystal_symmetry=crystal_symmetry, table=scattering_table, out=out) # smooth CC values with window of smoothing_window smoothed_cc_dict = smooth_cc_values(cc_dict=cc_dict, smoothing_window=smoothing_window, verbose=verbose, out=out) # figure out all the places where crossover can occur. n_residues = cc_dict[cc_dict.keys()[0]].size() crossover_dict = get_crossover_dict( n_residues=n_residues, hierarchy=ph, crossover_atom=crossover_atom, dist_max=dist_max, minimum_matching_atoms=minimum_matching_atoms, verbose=verbose, out=out) # Now we are ready to identify the best composite model... # A composite has reside 0 from model x, residue 1 from model y etc. # Each change from model a to model b between residues i and i+1 must have # a crossover between a and b at either residue i or i+1 keys = cc_dict.keys() keys.sort() sorted_working_model_list = [] for key in keys: working_model = model_object( source_id=key, cc_dict=cc_dict, smoothed_cc_dict=smoothed_cc_dict, crossover_dict=crossover_dict, minimum_length=minimum_length, minimum_improvement=minimum_improvement, max_regions_to_test=max_regions_to_test, max_ends_per_region=max_ends_per_region, maximum_fraction=maximum_fraction) if verbose: working_model.show_summary(out=out) sorted_working_model_list.append( [working_model.get_score(), working_model]) sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list=\ sorted_working_model_list[:max_keep] working_model_list = [] for s, m in sorted_working_model_list: working_model_list.append(m) # Go through all the working models and cross them with other models to # optimize...Then take all the best and cross... best_score, best_model = sorted_working_model_list[0] found = True cycle = 0 while found: cycle += 1 print >> out, "\nCYCLE %d current best is %7.3f\n" % ( cycle, best_model.get_score()) found = False sorted_working_model_list = [] new_best = best_model id = 0 for working_model in working_model_list: id += 1 others = [] for m in working_model_list: if not working_model == m: others.append(m) new_working_model = working_model.optimize_with_others( others=others) if not new_working_model: print continue aa = [new_working_model.get_score(), new_working_model] if not aa in sorted_working_model_list: sorted_working_model_list.append(aa) if not sorted_working_model_list: break # nothing to do sorted_working_model_list.sort() sorted_working_model_list.reverse() sorted_working_model_list = sorted_working_model_list[:max_keep] new_working_score, new_working_model = sorted_working_model_list[0] if new_working_score > best_model.get_score(): best_model = new_working_model found = True if verbose: print >> out, "NEW BEST SCORE: %7.2f" % ( best_model.get_score()) best_model.show_summary(out=out) print >> out, "\nDONE... best is %7.3f\n" % (best_model.get_score()) # Create composite of this chain # Note residue values. We are going to pick each residue from one of # the models for model in ph.models(): for chain in model.chains(): if chain.id != chain_id: continue residue_list = [] for rg in chain.residue_groups(): residue_list.append(rg.resseq) residue_list.sort() assert len(best_model.source_list) == len(residue_list) for i in xrange(len(residue_list)): atom_selection = get_atom_selection( model_id=best_model.source_list[i], resseq_sel=residue_list[i]) asc = ph.atom_selection_cache() sel = asc.selection(string=atom_selection) sel_hierarchy = ph.select(sel) print >> composite_model_stream, remove_ter( sel_hierarchy.as_pdb_string()) # All done, make a new pdb_hierarchy pdb_string = composite_model_stream.getvalue() pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_string) pdb_hierarchy = pdb_inp.construct_hierarchy() if pdb_out: f = open(pdb_out, 'w') print >> f, pdb_hierarchy.as_pdb_string( crystal_symmetry=crystal_symmetry) print "Final model is in: %s\n" % (f.name) f.close() return pdb_hierarchy