def get_inputs(args): data = args[0] d_max = float(args[1]) if (len(args) > 2): rg = float(args[2]) else: rg = 0 prior = None data = saxs_read_write.read_standard_ascii_qis(data) if (rg == 0): msga = guinier_analyses.multi_step_rg_engine(data) rg = msga.median_rg m = 1.0 / data.i[0] data.multiply_add(m, 0.0) n_params = 10 n_fst_pass = 4 # fitter = fixed_dmax_fitter(prior, data, d_max, n_params, n_fst_pass, n_trial=4, n_simplex=10) # fitter.best_fit.show_pr( open("best.pr",'w') ) # fitter.best_fit.show_obs_vs_calc( open("best.qii",'w') ) delta = rg step = 2 d_max_scan = dmax_scan(prior, data, d_max, delta, step, rg, n_params, n_fst_pass, n_trial=1, n_simplex=10) d_max_scan.get_best_dmax()
def run(args): params = get_input(args, master_params, "zrefine", banner, help) if params is None: return np = 30 nmax = params.zrefine.nmax start_file = params.zrefine.start target_file = params.zrefine.target rmax = params.zrefine.rmax qmax = params.zrefine.qmax prefix = params.zrefine.prefix splat_range = params.zrefine.splat_range pdb = params.zrefine.pdb n_trial = params.zrefine.n_trial if pdb is not None: pdb_obj = model_interface.container(pdbfile=pdb, rmax=rmax, nmax=nmax) pdb_nlm = pdb_obj.nlm_array else: pdb_nlm = None data = saxs_read_write.read_standard_ascii_qis(target_file) refine_obj = zm_xplor_refine(data, start_file, rmax, qmax=qmax, nmax=nmax, np_on_grid=np, prefix=prefix, splat_range=splat_range, pdb_nlm=pdb_nlm, n_trial=n_trial) if pdb is not None: out_pdb_filename = pdb.split('.')[0] + '_shift.pdb' pdb_obj.write_pdb(rmax=refine_obj.rmax, filename=out_pdb_filename)
def __init__(self, pdb_file, target, nstruct=500, np=50,max_np=100, prefix='prefix'): self.pdb_file = pdb_file self.obs = saxs_read_write.read_standard_ascii_qis(target) if(self.obs.q.size() > max_np): self.obs= self.reduction(self.obs) # reduce number_of_point in q-array self.she_obj = she.she(pdb_file,self.obs.q)
def run(args): params = get_input(args, master_params, "zrefine", banner, help) if params is None: return nmax = params.zrefine.nmax start_file = params.zrefine.start target_file = params.zrefine.target rmax = params.zrefine.rmax qmax = params.zrefine.qmax np_on_grid = params.zrefine.np_on_grid # number of grids covering [0,1] nbr_dist = params.zrefine.nbr_dist prefix = params.zrefine.prefix splat_range = params.zrefine.splat_range pdb = params.zrefine.pdb n_trial = params.zrefine.n_trial if pdb is not None: pdb_nlm = model_interface.container(pdbfile=pdb, rmax=rmax, nmax=nmax).nlm_array else: pdb_nlm = None data = saxs_read_write.read_standard_ascii_qis(target_file) zm_xplor_refine(data, start_file, rmax, qmax=qmax, nmax=nmax, np_on_grid=np_on_grid, prefix=prefix, splat_range=splat_range, pdb_nlm=pdb_nlm, n_trial=n_trial, nbr_dist=nbr_dist)
def test(args): rbs = [] file = args[0] expt_data = saxs_read_write.read_standard_ascii_qis(file) dmax = expt_data.q[-1] + 1 pdb = None max_num_fibonacci = 17 count = 0 group_size = 2 # this determines the size of each atom-group: the larger the size, the faster the calculation main_body = False for arg in args[1:]: pdb = rbe.PDB(arg) pdb.CA_indx = flex.int(range(0, pdb.xyz.size(), group_size)) if (count == 0): mainbody = True rbs.append(rb(pdb.xyz, pdb.CA_indx, dmax, max_num_fibonacci, main_body)) count += 1 target_xyz = rbs[1].get_crd().deep_copy() rb_eng = rbe.rb_engine(rbs, int(dmax)) top_n = 100 sample = grid_sample(rb_eng, expt_data, top_n=top_n) for ii in range(top_n): rbs[1].translate_after_rotation(sample.top_solutions[ii]) new_xyz = rbs[1].get_crd() outname = "refined" + str(ii) + ".pdb" pdb.writePDB(new_xyz, outname) RMSD = target_xyz.rms_difference(new_xyz) for x in sample.top_solutions[ii]: print x, print RMSD
def run_withoutrmax(start_file, iq_file=None, rmax_center=None): global iq_path log_distance = [] start_file = start_file.reshape((-1)) if iq_file is None: iq_file = iq_path if rmax_center is None: rmax_center = start_file[-1] data = saxs_read_write.read_standard_ascii_qis(iq_file) this_map = start_file[:-1] if rmax_center > 10: choosermax = rmax_center bestdistance = 200 for ii in range(int(rmax_center - 6), int(rmax_center + 7), 3): ed_map_obj = ED_map(data, ii) distance = ed_map_obj.target(this_map) if distance < bestdistance: bestdistance = distance choosermax = ii return bestdistance, choosermax else: ed_map_obj = ED_map(data, rmax_center) distance = ed_map_obj.target(this_map) return distance, rmax_center
def run(filename, dmax0): t1 = time.time() flex.set_random_seed(0) data = saxs_read_write.read_standard_ascii_qis(filename) data.multiply_add(1.0 / data.i[0], 0.0) dmax = dmax0 for ii in xrange(1): fitters = random_start_fixed_dmax(data, dmax, 6, 10, 0) fitters.collect_scores()
def run(filename, dmax0): t1 = time.time() flex.set_random_seed(0) data = saxs_read_write.read_standard_ascii_qis(filename) data.multiply_add(1.0 / data.i[0], 0.0) # fitttit = rcs_fitter( 6, dmax, data, alpha=1e1) for ii in xrange(1): dmax = dmax0 + ii * 10 fitters = random_start_fixed_dmax(data, dmax, 6, 15, 0.2) fitters.collect_scores()
def go(params, log=None): if log is None: log = sys.stdout log2 = sys.stdout data_array = [] multies = [] d_max=int(params.pregxs.d_max+0.5) rg=params.pregxs.fitting.rg increment = math.pi / d_max n_params=params.pregxs.fitting.n_coeff n_fst_pass=params.pregxs.fitting.n_fst_pass n_trial=params.pregxs.fitting.n_trials n_simplex=params.pregxs.fitting.n_trials_simplex prior = None targetfile = os.path.join(os.path.split(sys.path[0])[0],"pregxs.txt") for item in params.pregxs.data: data = saxs_read_write.read_standard_ascii_qis(item) qmax = data.q[-1] bandwidth = 0.01 # can be changed is needed data = reduce_raw_data( data, qmax, bandwidth, level=0.0001, outfile=targetfile) m = 1.0/data.i[0] data.multiply_add(m,0.0) data_array.append( data ) multies.append( m ) q_min = data.q[0] q_max0 = data.q[ data.q.size() - 1 ] nparams_max = int ((q_max0 - q_min) / increment) # print "n_params: ",n_params # print "nparams_max: ", nparams_max if(nparams_max < n_params): with open (targetfile,"a") as f: f.write("WARNING: number of parameters is larger than maximum number of shannon channels covered by expt data\n") print "WARNING: number of parameters is larger than maximum number of shannon channels covered by expt data" if params.pregxs.scan: delta = int(params.pregxs.fitting.delta) step = params.pregxs.fitting.step scanner = pretls.dmax_scan(prior, data, d_max, delta, step, rg, n_params, n_fst_pass, n_trial=n_trial, n_simplex=n_simplex,entropy_thresh=1.24, outfile=targetfile) scanner.print_pr( open(params.pregxs.output+"average.pr", 'w') ) scanner.get_best_dmax() fitter = scanner.fitter fitter.best_fit.show_pr( open(params.pregxs.output+"best.pr",'w') ) fitter.best_fit.show_obs_vs_calc( open(params.pregxs.output+"best.qii",'w') ) write_pr_json(params.pregxs.output+"data.json", scanner.r, scanner.average_pr) write_json( params.pregxs.output+"qii.json", data.q, scanner.calc_i, data.i ) else: fitter = pretls.fixed_dmax_fitter(prior, data, d_max, n_params, n_fst_pass, n_trial=n_trial, n_simplex=n_simplex) fitter.best_fit.show_pr( open(params.pregxs.output+"best.pr",'w') ) fitter.best_fit.show_obs_vs_calc( open(params.pregxs.output+"best.qii",'w') ) write_pr_json(params.pregxs.output+"data.json", fitter.r, fitter.pr) write_json( params.pregxs.output+"qii.json", data.q, fitter.calc_i, data.i)
def run(start_file, iq_file=None, rmax_center=None): ''' params = get_input(args, master_params, "zrefine", banner, help) if params is None: return #np=30 nmax=params.zrefine.nmax start_file=params.zrefine.start print start_file[0],type(start_file[0]) target_file=params.zrefine.target rmax = params.zrefine.rmax qmax = params.zrefine.qmax prefix = params.zrefine.prefix splat_range = params.zrefine.splat_range pdb = params.zrefine.pdb n_trial = params.zrefine.n_trial if pdb is not None: pdb_nlm = model_interface.container( pdbfile=pdb, rmax=rmax, nmax=nmax ).nlm_array else: pdb_nlm = None ''' #start_file=np.load(start_file[0]) global ip_path global rmax global first_time log_distance = [] if iq_file is None: iq_file = iq_path if rmax_center is None: rmax_center = rmax data = saxs_read_write.read_standard_ascii_qis(iq_file) this_map = start_file.reshape((-1)) ''' if first_time: print 'firsttiem map2iq' first_time=False rmax_start=rmax_center-rrange if rmax_center-rrange>0 else 0 rmax_end=rmax_center+rrange for rmax_i in range(int(rmax_start),int(rmax_end),5): ed_map_obj = ED_map(data, rmax_i) distance=ed_map_obj.target(this_map) log_distance.append(distance) else: ed_map_obj = ED_map(data, rmax_center) distance=ed_map_obj.target(this_map) log_distance.append(distance) ''' ed_map_obj = ED_map(data, rmax_center) distance = ed_map_obj.target(this_map) return distance
def test(args): rbs = [] file = args[0] expt_data = saxs_read_write.read_standard_ascii_qis(file) dmax = expt_data.q[-1] + 1 pdb = None for arg in args[1:]: pdb = rbe.PDB(arg) rbs.append(rb(pdb.xyz, pdb.CA_indx, dmax)) rb_eng = rbe.rb_engine(rbs, int(dmax)) refine = refine_rb(rb_eng, expt_data) pdb.writePDB(refine.rbe.rbs[1].get_crd(), 'refined.pdb')
def run_withrmax(start_file, iq_file=None, rmax_center=None): global iq_path global rmax log_distance = [] if iq_file is None: iq_file = iq_path if rmax_center is None: rmax_center = rmax data = saxs_read_write.read_standard_ascii_qis(iq_file) this_map = start_file.reshape((-1)) ed_map_obj = ED_map(data, rmax_center) distance = ed_map_obj.target(this_map) return distance
def run(params, log): print >> log, "Changing from input scale : %s" % ( angular_names[params.change_scale.input.scale]) print >> log, "to output scale : %s" % ( angular_names[params.change_scale.output.scale]) # read in the SAXS data mydata = saxs_read_write.read_standard_ascii_qis( params.change_scale.input.data) #first change the scale to 2stol_A mydata.q = mydata.q / angular_scales[params.change_scale.input.scale] #now go to where we have to go mydata.q = mydata.q * angular_scales[params.change_scale.output.scale] new_file_name = params.change_scale.input.data + "." + params.change_scale.output.postfix print >> log, "Writing output file: ", new_file_name saxs_read_write.write_standard_ascii_qis(mydata, new_file_name)
def run_get_voxel_iq(start_file, iq_file=None, r=None): global ip_path global rmax if iq_file is None: iq_file = iq_path if r is None: r = rmax data = saxs_read_write.read_standard_ascii_qis(iq_file) sourcedata = copy.copy(data) ed_map_obj = ED_map(data, r) this_map = start_file.reshape((-1)) calc_i = ed_map_obj.compute_saxs_profile(this_map) scale, offset = linear_fit(calc_i, data.i, data.s) calc_i = scale * calc_i + offset #calc_i = calc_i/calc_i[0] return calc_i, sourcedata
def run(start_file, iq_file=None, r=None): ''' params = get_input(args, master_params, "zrefine", banner, help) if params is None: return #np=30 nmax=params.zrefine.nmax start_file=params.zrefine.start print start_file[0],type(start_file[0]) target_file=params.zrefine.target rmax = params.zrefine.rmax qmax = params.zrefine.qmax prefix = params.zrefine.prefix splat_range = params.zrefine.splat_range pdb = params.zrefine.pdb n_trial = params.zrefine.n_trial if pdb is not None: pdb_nlm = model_interface.container( pdbfile=pdb, rmax=rmax, nmax=nmax ).nlm_array else: pdb_nlm = None ''' #start_file=np.load(start_file[0]) global ip_path global rmax if iq_file is None: iq_file = iq_path if r is None: r = rmax data = saxs_read_write.read_standard_ascii_qis(iq_file) ed_map_obj = ED_map(data, r) #this_map = ed_map_obj.raw_map this_map = start_file.reshape((-1)) calc_i = ed_map_obj.compute_saxs_profile(this_map) ''' for q,d1,d2 in zip(data.q,data.i,calc_i): print q,d1,d2 ''' distance = ed_map_obj.target(this_map) return distance
def __init__(self, pdb_file, target, nstruct=500, np=50, max_np=100, prefix='prefix'): self.pdb_file = pdb_file self.obs = saxs_read_write.read_standard_ascii_qis(target) if (self.obs.q.size() > max_np): self.obs = self.reduction( self.obs) # reduce number_of_point in q-array self.she_obj = she.she(pdb_file, self.obs.q) # More options are available, see line #10 for class she definition ## self.run_concoord(nstruct, prefix=prefix) self.files, self.scores = self.compute_score(nstruct, prefix=prefix) self.min_indx = flex.min_index(self.scores) self.min_file = self.files[self.min_indx] self.min_score = self.scores[self.min_indx]
def __init__(self, data_def, out=None): self.out = out if self.out is None: self.out = None self.data_def = data_def self.data_id = self.data_def.id self.file_names = [] self.concentration = [] self.ssd = [] for data in self.data_def.data: file_name = data.file_name conc = data.concentration tmp_sd = saxs_read_write.read_standard_ascii_qis(file_name) tmp_sd.show_summary() self.ssd.append(tmp_sd) self.file_names.append(file_name) self.concentration.append(conc) msga = guinier_analyses.multi_step_rg_engine(tmp_sd, out=self.out) kratky_tools.kratky_analyses(tmp_sd)
def go(params, out=None): if out is None: out = sys.stdout data_array = [] multies = [] dmax = params.pregxs.d_max nparam = params.pregxs.fitting.n_coeff nfst = params.pregxs.fitting.n_fst_pass ntrials = params.pregxs.fitting.n_trials strials = params.pregxs.fitting.n_trials_simplex for item in params.pregxs.data: data = saxs_read_write.read_standard_ascii_qis(item) # m = 1.0/data.i[0] # data.multiply_add(m,0.0) data_array.append(data) # multies.append( m ) if params.pregxs.scan: d_max_start = dmax - params.pregxs.fitting.delta d_max_stop = dmax + params.pregxs.fitting.delta n_step = params.pregxs.fitting.n_step scanner = d_max_scan(data, nparam, nfst, ntrials, d_max_start, d_max_stop, n_step, strials) else: fitters = random_start_fixed_dmax(data, dmax, nparam, nfst, ntrials, n_simplex=strials) coefs = fitters.trials[fitters.chi_index].solution for cc in coefs: print cc, print item, "COEF" pr_fit = fitters.trials[fitters.chi_index].get_best_pofr().f( data.q) print flex.mean(flex.pow2( (data.i - pr_fit) / (data.i + pr_fit))) * 4.0, "CHI2"
def run(args): global f f = os.path.join(os.path.split(sys.path[0])[0],"she.txt") with open(f,"w") as tempf: tempf.truncate() #check if we have experimental data t1=time.time() exp_data = None q_values = None var = None with open(f,"a") as tempf: params = get_input( args, master_params, "sas_I", banner, print_help,tempf) if (params is None): exit() if params.sas_I.experimental_data is not None: exp_data = saxs_read_write.read_standard_ascii_qis(params.sas_I.experimental_data) #exp_data.s = flex.sqrt( exp_data.i ) if params.sas_I.data_reduct: qmax = exp_data.q[-1] bandwidth = 0.5/(params.sas_I.n_step-1.0) exp_data=reduce_raw_data( exp_data, qmax, bandwidth,outfile=f ) q_values = exp_data.q var = flex.pow(exp_data.s,2.0) if q_values is None: q_values = params.sas_I.q_start + \ (params.sas_I.q_stop-params.sas_I.q_start )*flex.double( range(params.sas_I.n_step) )/( params.sas_I.n_step-1) # read in pdb file pdbi = pdb.hierarchy.input(file_name=params.sas_I.structure) #atoms = pdbi.hierarchy.atoms() atoms = pdbi.hierarchy.models()[0].atoms() # predefine some arrays we will need dummy_atom_types = flex.std_string() radius= flex.double() b_values = flex.double() occs = flex.double() xyz = flex.vec3_double() # keep track of the atom types we have encountered dummy_at_collection = [] for atom in atoms: #if(not atom.hetero): #### temporarily added b_values.append( atom.b ) occs.append( atom.occ ) xyz.append( atom.xyz ) # Hydrogen controls whether H is treated explicitly or implicitly Hydrogen = not params.sas_I.internals.implicit_hydrogens ### Using Zernike Expansion to Calculate Intensity ### if(params.sas_I.method == 'zernike'): znk_nmax=params.sas_I.znk_nmax absolute_Io = znk_model.calc_abs_Io( atoms, Hydrogen) if( absolute_Io == 0.0): ## in case pdb hierarchy parse did not work out correctly absolute_Io = sas_library.calc_abs_Io_from_pdb( params.sas_I.structure, Hydrogen ) if(Hydrogen): density = znk_model.get_density( atoms ) ## Get number of electrons as density else: density = znk_model.get_density( atoms ) + 1 ## add one H-atom to each heavy atom as a correction znk_engine = znk_model.xyz2znk(xyz,absolute_Io,znk_nmax, density=density) calc_i, calc_i_vac, calc_i_sol, calc_i_layer=znk_engine.calc_intensity(q_values) if(params.sas_I.experimental_data is not None): if params.sas_I.internals.solvent_scale: znk_engine.optimize_solvent(exp_data) calc_i = znk_engine.best_i_calc else: #quick scaling scale, offset = linear_fit( calc_i, exp_data.i, exp_data.s ) calc_i = calc_i*scale + offset CHI2 = flex.mean(flex.pow((calc_i-exp_data.i)/exp_data.s,2.0)) CHI=math.sqrt(CHI2) with open(f,"a") as log: print >>log, "fitting to experimental curve, chi = %5.4e"%CHI print "fitting to experimental curve, chi = %5.4e"%CHI write_debye_data(q_values, calc_i, params.sas_I.output+".fit") write_json(params.sas_I.output+"data.json", q_values, calc_i, y2=exp_data.i) else: ## scaled to the absolute I(0) write_she_data(q_values, calc_i, calc_i_vac, calc_i_layer, calc_i_sol, params.sas_I.output) write_json(params.sas_I.output+"data.json", q_values, calc_i) with open(f,"a") as log: print >>log, znk_engine.summary() print >>log, "Done! total time used: %5.4e (seconds)"%(time.time()-t1) print znk_engine.summary() print "Done! total time used: %5.4e (seconds)"%(time.time()-t1) return ### End of Zernike Model ### dummy_ats= sas_library.read_dummy_type(file_name=params.sas_I.structure) for at in dummy_ats: if at not in dummy_at_collection: dummy_at_collection.append( at ) radius_dict={} ener_lib=server.ener_lib() for dummy in dummy_at_collection: if(Hydrogen): radius_dict[dummy]=ener_lib.lib_atom[dummy].vdw_radius else: if ener_lib.lib_atom[dummy].vdwh_radius is not None: radius_dict[dummy]=ener_lib.lib_atom[dummy].vdwh_radius else: radius_dict[dummy]=ener_lib.lib_atom[dummy].vdw_radius if(radius_dict[dummy] is None): with open(f,"a") as log: print >> log, "****************** WARNING WARNING *******************" print >> log, "Did not find atom type: ", dummy, "default value 1.58 A was used" print >> log, "*******************************************************" print "****************** WARNING WARNING *******************" print "Did not find atom type: ", dummy, "default value 1.58 A was used" print "*******************************************************" radius_dict[dummy]=1.58 for at in dummy_ats: dummy_atom_types.append( at) radius.append(radius_dict[at]) Scaling_factors=sas_library.load_scaling_factor() #------------------ # B_factor_on=params.sas_I.internals.use_adp max_i = params.sas_I.internals.max_i max_L = params.sas_I.internals.max_L f_step= params.sas_I.internals.f_step q_step= params.sas_I.internals.integration_q_step solvent_radius_scale=params.sas_I.internals.solvent_radius_scale protein_radius_scale=params.sas_I.internals.protein_radius_scale rho=params.sas_I.internals.rho drho=params.sas_I.internals.drho delta=params.sas_I.internals.delta #------------------ scat_lib_dummy = sas_library.build_scattering_library( dummy_at_collection, q_values, radius_dict, solvent_radius_scale, Hydrogen, Scaling_factors) new_indx =flex.int() new_coord = flex.vec3_double() model=intensity.model(xyz, radius*protein_radius_scale, b_values, occs, dummy_ats, scat_lib_dummy, B_factor_on) t2=time.time() if(params.sas_I.method == 'she'): max_z_eps=0.02 max_z=model.get_max_radius()*(q_values[-1]+max_z_eps) + max_z_eps engine = intensity.she_engine( model, scat_lib_dummy,max_i,max_L,f_step, q_step,max_z, delta,rho,drho ) engine.update_solvent_params(rho,drho) i = engine.I() a = engine.get_IA() b = engine.get_IB() c = engine.get_IC() attri = engine.Area_Volume() with open(f,"a") as log: print >> log, "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; print >> log, "Inner Volume of the Envelop is (A^3.0): ", attri[1]; print >> log, "Volume of the Envelop shell is (A^3.0): ", attri[2]; print "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; print "Inner Volume of the Envelop is (A^3.0): ", attri[1]; print "Volume of the Envelop shell is (A^3.0): ", attri[2]; if params.sas_I.output is not None: write_she_data( q_values, i,a,b,c, params.sas_I.output ) write_json(params.sas_I.output+"data.json", q_values, i) if params.sas_I.pdblist is not None: pdblist=params.sas_I.pdblist if(os.path.isfile(pdblist)): list= open(pdblist,'r') for line in list: filename=line.split('\n')[0] pdbi = pdb.hierarchy.input(file_name=filename) t21 = time.time() atoms = pdbi.hierarchy.atoms() new_coord.clear() new_indx.clear() i=0 for atom in atoms: new_coord.append( atom.xyz ) new_indx.append(i) i=i+1 engine.update_coord(new_coord,new_indx) i = engine.I() a = engine.get_IA() b = engine.get_IB() c = engine.get_IC() attri = engine.Area_Volume() with open(f,"a") as log: print >> log, "Inner surface Area of the Envelop is (A^2.0): ", attri[0] print >> log, "Inner Volume of the Envelop is (A^3.0): ", attri[1] print >> log, "Volume of the Envelop shell is (A^3.0): ", attri[2] print "Inner surface Area of the Envelop is (A^2.0): ", attri[0] print "Inner Volume of the Envelop is (A^3.0): ", attri[1] print "Volume of the Envelop shell is (A^3.0): ", attri[2] write_she_data( q_values, i,a,b,c, filename+'.int' ) with open(f,"a") as log: print >> log, '\nfininshed pdb ', filename, 'at: ',time.ctime(t21),'\n' print '\nfininshed pdb ', filename, 'at: ',time.ctime(t21),'\n' # attri = engine.Area_Volume2() # print "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; elif(params.sas_I.method == 'debye'): engine = intensity.debye_engine (model, scat_lib_dummy) i = engine.I() if params.sas_I.output is not None: write_debye_data(q_values, i, params.sas_I.output) write_json(params.sas_I.output+"data.json", q_values, i) if(params.sas_I.experimental_data is not None): if params.sas_I.internals.solvent_scale: # more thorough scaling solvent_optim = solvent_parameter_optimisation(she_object=engine, observed_data=exp_data ) scale, offset, drho, a = solvent_optim.get_scales() i = solvent_optim.get_scaled_data() else: #quick scaling scale, offset = linear_fit( i, exp_data.i, exp_data.s ) i = scale*i+offset with open(f,"a") as log: print >>log, "Scaled calculated data against experimental data" print >>log, "Scale factor : %5.4e"%scale print >>log,"Offset : %5.4e"%offset print "Scaled calculated data against experimental data" print "Scale factor : %5.4e"%scale print "Offset : %5.4e"%offset if params.sas_I.internals.solvent_scale: with open(f,"a") as log: print >> log, " Solvent average R ra : ", a print >> log, " Solvation Contrast drho: ", drho print " Solvent average R ra : ", a print " Solvation Contrast drho: ", drho print write_debye_data(q_values, i, params.sas_I.output+".fit") write_json(params.sas_I.output+"data.json", q_values, i, y2=exp_data.i) CHI2 = flex.mean(flex.pow((i-exp_data.i)/exp_data.s,2.0)) CHI=math.sqrt(CHI2) with open(f,"a") as log: print >>log, "fitting to experimental curve, chi = %5.4e"%CHI print "fitting to experimental curve, chi = %5.4e"%CHI t3=time.time() with open(f,"a") as log: print >> log, "Done! total time used: %5.4e (seconds)"%(t3-t1) print >>log, 'start running at: ',time.ctime(t1) print >>log, 'finished PDB file processing at: ',time.ctime(t2) print >>log, 'got all desired I(q) at : ',time.ctime(t3) print "Done! total time used: %5.4e (seconds)"%(t3-t1) print 'start running at: ',time.ctime(t1) print 'finished PDB file processing at: ',time.ctime(t2) print 'got all desired I(q) at : ',time.ctime(t3) with open(f,"a") as log: log.write("__END__")
def construct(target_file, rmax): data = saxs_read_write.read_standard_ascii_qis(target_file) ed_map_obj = ED_map(data, rmax) return ed_map_obj
def run_single_pdb(params, log): group_size = params.refine.group_size max_num_fibonacci = 12 target_data = saxs_read_write.read_standard_ascii_qis(params.refine.target) if (params.refine.data_type == 'pr'): dmax = int(target_data.q[-1] + 0.5) new_r = flex.double(range(dmax)) target_data = flex.linear_interpolation(target_data.q, target_data.i, new_r) rbs = [] center = [] pdb_objects = [] main_body = True pdb_inp = pdb.hierarchy.input(params.refine.model[0]) cache = pdb_inp.hierarchy.atom_selection_cache() max_size = 0 for item in params.refine.rigid_body: fix_location = item.fixed_position fix_orientation = item.fixed_orientation cache_selected = cache.selection(string=item.selection) pdb_obj = pdb_inp.hierarchy.atoms().select(cache_selected) size = pdb_obj.size() atom_indx = flex.int(range(0, size, group_size)) xyz = flex.vec3_double() atoms = pdb_obj for a in atoms: xyz.append(a.xyz) if (size > max_size): max_size = size rbs.insert( 0, rb(xyz, atom_indx, dmax, max_num_fibonacci, fix_location, fix_orientation)) pdb_objects.insert(0, pdb_obj) else: rbs.append( rb(xyz, atom_indx, dmax, max_num_fibonacci, fix_location, fix_orientation)) pdb_objects.append(pdb_obj) num_body = len(pdb_objects) shift = [(0, 0, 0)] if (params.refine.data_type == 'pr'): rb_eng = rbe.rb_engine(rbs, int(dmax)) else: rb_eng = rbe.rb_engine(rbs, int(dmax), q_array=target_data.q) for ii in range(1, num_body): shift.append( flex.double(rbs[ii].center()) - flex.double(rbs[0].center())) # rb_eng.rbs[ii].rotate_only(list( flex.random_double(3,) ), 10.0/180.0*pi) rb_eng.rbs[ii].translate_after_rotation(list(shift[ii])) filename = "initial_model.pdb" write_pdb_single(filename, num_body, rb_eng, pdb_inp, pdb_objects) refine = refine_rb(rb_eng, target_data, data_type=params.refine.data_type, shift=shift, both=True) solution = refine.solution refine.target(solution) filename = params.refine.output + ".pdb" write_pdb_single(filename, num_body, rb_eng, pdb_inp, pdb_objects)
def run(args): global stdfile global outfilelog targetpath_fromGUI = '' targetpath_fromGUI_file = os.path.join(base_path, "targetpath_GUI.txt") if os.path.isfile(targetpath_fromGUI_file) and ( os.stat(targetpath_fromGUI_file).st_size > 0): with open(targetpath_fromGUI_file, "r") as f: targetpath_fromGUI = f.read().strip() if targetpath_fromGUI == '': stddir = "maps" else: tempfile = os.path.join(targetpath_fromGUI, "Shape_Search_Engine") stddir = os.path.join(tempfile, "maps") #stdfile = os.path.join(tempfile,"temp.txt") stdfile = os.path.join(os.path.split(sys.path[0])[0], "shapeup.txt") with open(stdfile, "w") as f: f.truncate() outfilelog = os.path.join( os.path.split(sys.path[0])[0], "outfilelog_shapeup.txt") with open(outfilelog, "w") as f: f.truncate() t1 = time.time() with open(stdfile, "a") as outfile: params = get_input(args, master_params, "query", banner, help, outfile) if (params is None): exit() target_file = params.query.target rmax = params.query.rmax nmax = params.query.nmax smear = params.query.smear dbpath = params.query.dbpath pdb_files = params.query.pdb_files db_choice = params.query.db_choice weight = params.query.weight delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() ntop = params.query.ntop scan = params.query.scan fraction = params.query.fraction scale_power = params.query.scale_power q_step = 1.0 / 200.0 data = saxs_read_write.read_standard_ascii_qis(target_file) try: rg, io = get_rg(data) except: with open(stdfile, "a") as log: print >> log, "Guinier analysis failed, R_max is required" print >> log, "ATTENTION: dummy values for Rg and Io set" print "Guinier analysis failed, R_max is required" print "ATTENTION: dummy values for Rg and Io set" rg = 50 io = 1 qmax = params.query.qmax q_background = params.query.q_background #qmax = 0.44*smath.exp( -0.00023*rmax*rmax ) ######### Interpolation ########## if (rmax is None): rmax = 50 bandwidth = min(q_step, smath.pi / 2.0 / rmax, data.q[1] - data.q[0]) data = reduce_raw_data(data, qmax, bandwidth, q_background=q_background, level=params.query.q_level) ###### END of Interpolation ########## with open(stdfile, "a") as log: print >> log, " ==== Reading in shape database ==== " print " ==== Reading in shape database ==== " begin_time = time.time() nn_coefs, codes, rmaxs = read_pickle(dbpath, dbprefix) ready_time = time.time() delta_time = ready_time - begin_time print with open(stdfile, "a") as log: print >> log, " Done reading database with %i entries in %5.4e seconds" % ( len(codes), delta_time) print >> log, " ==== Shape retrieval ==== " print >> log, " Constructing shape retrieval object" print " Done reading database with %i entries in %5.4e seconds" % ( len(codes), delta_time) print " ==== Shape retrieval ==== " print " Constructing shape retrieval object" shapes = intoshape(data, rg=rg, io=io, nmax=nmax, rmax=rmax, scan=scan, fraction=fraction, smear=smear, prefix=params.query.prefix, weight=weight, delta_q=delta_q, scale_power=scale_power) with open(stdfile, "a") as log: print >> log, " Shape search .... " print " Shape search .... " shapes.lookup(nn_coefs, codes, ntop) nlm_coefs = read_nlm(dbpath, dbprefix) shapes.pair_align(nlm_coefs, params.query.calc_cc) pdb_models = None if (len(pdb_files) > 0): pdb_models = process(pdb_files, nmax, rmax=shapes.best_rmax, fraction=fraction) if (params.query.buildmap): top_cc, top_ids, map_files, levels, cluster_ids, ave_maps, ave_levels, ave_cc = build_map( nmax, shapes.best_rmax, nlm_coefs, codes, shapes.best_models, pdb_models, clusters=shapes.clusters, fract=fraction, prefix=params.query.prefix) # need to use rmax/fraction to get right size of box #build_pymol_script.write_pymol_scripts(maps=map_files,levels=levels,root_name=stddir) build_pymol_script.write_pymol_shapeup(maps=map_files, root_name=stddir) pdb_out_name = None if (pdb_models is not None): pdb_out_name = pdb_files[0].split('.')[0] + '_sa.pdb' #generate_html.generate_jmol_html(ave_maps, ave_cc, ave_levels, map_files, top_cc, levels, cluster_ids, 'models.html', pdb=pdb_out_name) if (len(pdb_files) > 0): with open(params.query.prefix + "_cc2pdb.dat", 'w') as out: print >> out, "Correlation coefficients of retrieved shapes vs input model" for cc, id in zip(top_cc, top_ids): print >> out, "Code: %5s CC: %5.1f " % (id, 100 * cc) print >> out, "mean: %8.5f" % flex.mean(top_cc) with open(stdfile, "a") as log: print >> log, "Compared to the PDB model (%s)" % pdb_models[ 0].filename print >> log, "mean cc: %8.5f" % flex.mean(top_cc) print >> log, "first cc: %8.5f" % top_cc[0] print >> log, "best cc: %8.5f" % flex.max(top_cc) print >> log, "worst cc: %8.5f" % flex.min(top_cc) print "Compared to the PDB model (%s)" % pdb_models[0].filename print "mean cc: %8.5f" % flex.mean(top_cc) print "first cc: %8.5f" % top_cc[0] print "best cc: %8.5f" % flex.max(top_cc) print "worst cc: %8.5f" % flex.min(top_cc) with open(stdfile, "a") as log: print >> log, "Rmax: estimated vs PDB", shapes.best_rmax, pdb_models[ 0].rmax print "Rmax: estimated vs PDB", shapes.best_rmax, pdb_models[ 0].rmax t2 = time.time() with open(stdfile, "a") as log: print >> log, "total time used: ", t2 - t1, "(seconds)" print "total time used", t2 - t1, "(seconds)" with open(stdfile, "a") as log: log.write("__END__")
def run(args): t1 = time.time() params = get_input(args, master_params, "query", banner, help) if (params is None): exit() target_file = params.query.target rmax = params.query.rmax nmax = params.query.nmax smear = params.query.smear dbpath = params.query.dbpath pdb_files = params.query.pdb_files db_choice = params.query.db_choice weight = params.query.weight delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() ntop = params.query.ntop scan = params.query.scan fraction = params.query.fraction q_step = 1.0 / 100.0 data = saxs_read_write.read_standard_ascii_qis(target_file) if (rmax is None): rmax = get_rg(data) * 3.0 / 2.0 qmax = params.query.qmax q_background = params.query.q_background #qmax = 0.44*smath.exp( -0.00023*rmax*rmax ) ######### Interpolation ########## bandwidth = min(q_step, data.q[2] / 2.0) # smath.pi/2.0/rmax ) data = reduce_raw_data(data, qmax, bandwidth, q_background=q_background, level=params.query.q_level) #saxs_read_write.write_standard_ascii_qis(data, 'reduced'+target_file ) ###### END of Interpolation ########## nn_coefs, codes, rmaxs = read_pickle(dbpath, dbprefix) shapes = intoshape(data, nmax=nmax, rmax=rmax, scan=scan, fraction=fraction, smear=smear, prefix=params.query.prefix, weight=weight, delta_q=delta_q) shapes.lookup(nn_coefs, codes, ntop) pdb_models = None if (len(pdb_files) > 0): pdb_models = process(pdb_files, nmax, shapes.best_rmax[0] / fraction) nlm_coefs = None if (params.query.buildmap): nlm_coefs = read_nlm(dbpath, dbprefix) top_cc = build_map(nmax, shapes, nlm_coefs, codes, pdb_models) # need to use rmax/fraction to get right size of box if (len(pdb_files) > 0): out = open(params.query.prefix + "_cc2pdb.dat", 'w') for cc in top_cc: print >> out, cc print >> out, "mean: %8.5f" % flex.mean(top_cc) print "mean cc: %8.5f" % flex.mean(top_cc) print "first cc: %8.5f" % top_cc[0] print "best cc: %8.5f" % flex.max(top_cc) print "worst cc: %8.5f" % flex.min(top_cc) out.close() print "Rmax: estimated vs PDB", shapes.best_rmax[0], pdb_models[ 0].rmax shapes.pair_align(nlm_coefs, params.query.calc_cc) t2 = time.time() print "total time used: ", t2 - t1
def __init__(self, start_pdb, target_I, ntotal, nmodes, max_rmsd, backbone_scale, prefix, weight='i', method='rtb', log='tmp.log'): self.counter = 0 self.nmode_init = ntotal self.method = method self.nmodes = nmodes self.topn = 10 self.Niter = 0 self.modes = flex.int(range(self.nmode_init)) + 7 self.cutoff = 8 self.weighted = True self.log = open(log, 'w') self.chi = open(prefix + '.chi', 'w') pdb_inp = pdb.input(file_name=start_pdb) crystal_symmetry = pdb_inp.xray_structure_simple().\ cubic_unit_cell_around_centered_scatterers( buffer_size = 10).crystal_symmetry() self.pdb_processor = process_pdb_file_srv( crystal_symmetry=crystal_symmetry) self.expt = saxs_read_write.read_standard_ascii_qis(target_I) self.q = self.expt.q self.expt_I = self.expt.i self.expt_s = self.expt.s if (self.q.size() > 100): self.q = self.interpolation(self.q, n_pts=30) self.expt_I = flex.linear_interpolation(self.expt.q, self.expt.i, self.q) self.expt_s = flex.linear_interpolation(self.expt.q, self.expt.s, self.q) #if( weight=='i'): self.expt_s = self.expt_I for aa, bb, cc in zip(self.q, self.expt_I, self.expt_s): print aa, bb, cc start_name = start_pdb self.pdb = PDB(start_name, method=self.method) self.she_engine = she.she(start_name, self.q) self.natom = self.pdb.natm self.scale_factor = backbone_scale self.pdb.Hessian(self.cutoff, self.nmode_init, self.scale_factor) self.root = prefix self.scale = 0 self.drmsd = max_rmsd if (self.method == 'rtb'): self.drmsd = self.drmsd * 2 self.Rmax2 = self.natom * (self.drmsd)**2.0 self.step_size = sqrt(self.Rmax2 / self.nmodes) * 6.0 self.new_indx = flex.int(range(self.natom)) self.stop = False self.minscore = 1e20 self.minDev = 0 #minimum deviations of refined structures, compared to refined structure from the previous step self.optNum = 1 #number of iterations between geometry optimization ### set running env for pulchra ### import libtbx.env_config env = libtbx.env_config.unpickle() self.pulchra = env.build_path + '/pulchra/exe/pulchra' self.iterate() self.log.close() self.chi.close()
def run(args): t1 = time.time() params = get_input(args, master_params, "query", banner, help) if (params is None): exit() target_file = params.query.target rmax = params.query.rmax nmax = params.query.nmax smear = params.query.smear dbpath = params.query.dbpath pdb_files = params.query.pdb_files db_choice = params.query.db_choice weight = params.query.weight delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() ntop = params.query.ntop scan = params.query.scan fraction = params.query.fraction scale_power = params.query.scale_power q_step = 1.0 / 100.0 data = saxs_read_write.read_standard_ascii_qis(target_file) rg, io = get_rg(data) qmax = params.query.qmax q_background = params.query.q_background #qmax = 0.44*smath.exp( -0.00023*rmax*rmax ) ######### Interpolation ########## bandwidth = min(q_step, data.q[2] / 2.0) # smath.pi/2.0/rmax ) data = reduce_raw_data(data, qmax, bandwidth, q_background=q_background, level=params.query.q_level) ###### END of Interpolation ########## print " ==== Reading in shape database ==== " begin_time = time.time() nn_coefs, codes, rmaxs = read_pickle(dbpath, dbprefix) ready_time = time.time() delta_time = ready_time - begin_time print print " Done reading database with %i entries in %5.4e seconds" % ( len(codes), delta_time) print print " ==== Shape retrieval ==== " print " Constructing shape retrieval object" shapes = intoshape(data, rg=rg, io=io, nmax=nmax, rmax=rmax, scan=scan, fraction=fraction, smear=smear, prefix=params.query.prefix, weight=weight, delta_q=delta_q, scale_power=scale_power) print " Shape search .... " shapes.lookup(nn_coefs, codes, ntop) nlm_coefs = read_nlm(dbpath, dbprefix) shapes.pair_align(nlm_coefs, params.query.calc_cc) pdb_models = None if (len(pdb_files) > 0): pdb_models = process(pdb_files, nmax, rmax=shapes.best_rmax, fraction=fraction) if (params.query.buildmap): top_cc, top_ids, map_files, levels, cluster_ids, ave_maps, ave_levels, ave_cc = build_map( nmax, shapes.best_rmax, nlm_coefs, codes, shapes.best_models, pdb_models, clusters=shapes.clusters, fract=fraction) # need to use rmax/fraction to get right size of box build_pymol_script.write_pymol_scripts(map_files, levels) pdb_out_name = None if (pdb_models is not None): pdb_out_name = pdb_files[0].split('.')[0] + '_sa.pdb' generate_html.generate_jmol_html(ave_maps, ave_cc, ave_levels, map_files, top_cc, levels, cluster_ids, 'models.html', pdb=pdb_out_name) if (len(pdb_files) > 0): out = open(params.query.prefix + "_cc2pdb.dat", 'w') print >> out, "Correlation coefficients of retrieved shapes vs input model" for cc, id in zip(top_cc, top_ids): print >> out, "Code: %5s CC: %5.1f " % (id, 100 * cc) print >> out, "mean: %8.5f" % flex.mean(top_cc) print "Compared to the PDB model (%s)" % pdb_models[0].filename print "mean cc: %8.5f" % flex.mean(top_cc) print "first cc: %8.5f" % top_cc[0] print "best cc: %8.5f" % flex.max(top_cc) print "worst cc: %8.5f" % flex.min(top_cc) out.close() print "Rmax: estimated vs PDB", shapes.best_rmax, pdb_models[ 0].rmax t2 = time.time() print "total time used: ", t2 - t1, "(seconds)"
def __init__(self, start_pdb, target_I, max_rmsd, backbone_scale, prefix, nstep_per_cycle=100, method='ca', weight='i', log='tmp.log'): self.counter = 0 self.topn = 3 self.Niter = 0 self.method = method self.cutoff = 12 self.log = open(log, 'w') self.nstep_per_cycle = nstep_per_cycle self.pdb_obj = PDB(start_pdb, method=self.method) crystal_symmetry = self.pdb_obj.pdbi.xray_structure_simple().\ cubic_unit_cell_around_centered_scatterers( buffer_size = 10).crystal_symmetry() self.pdb_processor = process_pdb_file_srv( crystal_symmetry=crystal_symmetry) self.expt = saxs_read_write.read_standard_ascii_qis(target_I) self.q = self.expt.q self.expt_I = self.expt.i self.expt_s = self.expt.s if (self.q.size() > 20): self.q = self.interpolation(self.q, n_pts=20) self.expt_I = flex.linear_interpolation(self.expt.q, self.expt.i, self.q) self.expt_s = flex.linear_interpolation(self.expt.q, self.expt.s, self.q) if (weight == 'i'): self.expt_s = flex.sqrt(self.expt_I) self.time_nm = 0 self.time_she = 0 self.she_engine = she.she(start_pdb, self.q) self.natom = self.pdb_obj.natm self.nbeads = self.pdb_obj.n_block self.scale_factor = backbone_scale time1 = time.time() self.time_nm += (time.time() - time1) self.root = prefix self.drmsd = max_rmsd self.step_size = self.drmsd * 3 self.threshold = self.drmsd**2.0 self.new_indx = flex.int(range(self.natom)) self.stop = False self.minscore = 1e20 self.minDev = 0 #minimum deviations of refined structures, compared to refined structure from the previous step self.optNum = 10 #number of iterations between geometry optimization #self.estimate_init_weight() #self.restraint_weight *= 8 ## contribute 8x of chi initially self.iterate() self.log.close() print "time used for NM : %d" % self.time_nm print "time used for she: %d" % self.time_she
def __init__(self, start_pdb, target_I, ntotal, nmodes, max_rmsd, backbone_scale, prefix, weight='i', method='rtb', log='tmp.log'): self.counter = 0 self.nmode_init = ntotal self.nmodes = 3 #nmodes self.method = method self.topn = 3 self.Niter = 0 self.modes = flex.int(range(self.nmode_init)) + 7 self.cutoff = 12 self.weighted = True self.log = open(log, 'w') pdb_inp = pdb.input(file_name=start_pdb) crystal_symmetry = pdb_inp.xray_structure_simple().\ cubic_unit_cell_around_centered_scatterers( buffer_size = 10).crystal_symmetry() # uc=cctbx.uctbx.unit_cell("300,300,300,90,90,90") # crystal_symmetry=cctbx.crystal.symmetry(uc, 'P1') self.pdb_processor = process_pdb_file_srv( crystal_symmetry=crystal_symmetry) self.expt = saxs_read_write.read_standard_ascii_qis(target_I) self.q = self.expt.q self.expt_I = self.expt.i self.expt_s = self.expt.s if (self.q.size() > 50): self.q = self.interpolation(self.q, n_pts=50) self.expt_I = flex.linear_interpolation(self.expt.q, self.expt.i, self.q) self.expt_s = flex.linear_interpolation(self.expt.q, self.expt.s, self.q) if (weight == 'i'): self.expt_s = self.expt_I self.time_nm = 0 self.time_she = 0 start_name = start_pdb self.pdb = PDB(start_name, method=self.method) self.she_engine = she.she(start_name, self.q) self.natom = self.pdb.natm self.scale_factor = backbone_scale time1 = time.time() self.nmode = self.pdb.Hessian(self.cutoff, self.nmode_init, self.scale_factor) self.time_nm += (time.time() - time1) self.root = prefix self.drmsd = max_rmsd self.Rmax2 = self.natom * (self.drmsd)**2.0 self.step_size = sqrt(self.Rmax2 / self.nmodes) * 5.0 self.new_indx = flex.int(range(self.natom)) self.stop = False self.minscore = 1e20 self.minDev = 0 #minimum deviations of refined structures, compared to refined structure from the previous step self.optNum = 10 #number of iterations between geometry optimization self.iterate() self.log.close() print "time used for NM : %d" % self.time_nm print "time used for she: %d" % self.time_she
def run(file_name): ks = kratky_scaler() data = saxs_read_write.read_standard_ascii_qis(file_name) kratky_analyses(data)