def run(args, log=sys.stdout): params = get_input(args, master_params, "fxs_she", banner, print_help) if (params is None): exit() lmax = params.fxs_she.lmax pdb_file = params.fxs_she.pdb prefix = params.fxs_she.prefix q_array = None if q_array is None: q_array = params.fxs_she.q_start + \ (params.fxs_she.q_stop-params.fxs_she.q_start) \ *flex.double( range(params.fxs_she.n_step) )/(params.fxs_she.n_step-1) she_obj = she.she(pdb_file, q_array=q_array, max_L=lmax) N_phi = 51 phi_array = flex.double(range(N_phi)) / 25.0 * 3.1416 she_obj.engine.calc_spatial_correlation( phi_array) # expansion coefficient calculation is built in this call if (params.fxs_she.print_c2): correlation = she_obj.engine.get_spatial_correlation() correlation = unpack(correlation, N_phi, q_array.size()) out_correlation = open(prefix + '.cor', 'w') print_correlation(out_correlation, correlation, q_array, N_phi) close(out_correlation) out = open(prefix + '.blq', 'w') this_blq = she_obj.get_all_blq() blq_data = fxs_tools.blq_data(q_array, this_blq, lmax) blq_data.print_out(out=out) out.close()
def run(args): params = get_input(args, master_params, "db", banner, help) if params is None: exit() path = params.db.path+"/" nmax=params.db.nmax np = params.db.np fix_dx = params.db.fix_dx prefix = params.db.prefix files = read(path) nlm_coefs = [] nn_coefs = [] codes = [] rmax = [] for file in files: code = file.split('\n')[0].split('.')[0] file = path+file mom_obj, vox_obj, pdb = pdb2zernike.zernike_moments( file, nmax=nmax, np=np, fix_dx=fix_dx, coef_out=False, calc_intensity=False ) if(mom_obj is None): print code, "NOT processed, please check the file" continue codes.append( code ) rmax.append( vox_obj.rmax() ) nlm_coefs.append( mom_obj.moments().coefs().deep_copy() ) nn_coefs.append( mom_obj.fnn().coefs().deep_copy() ) print code, "processed." easy_pickle.dump(prefix+".nlm", nlm_coefs) easy_pickle.dump(prefix+".nn", nn_coefs) easy_pickle.dump(prefix+".rmax", rmax) easy_pickle.dump(prefix+".codes", codes)
def run(args, log=sys.stdout): params = get_input(args, master_params, "pr_ref", banner, print_help) if (params is None): exit() t1 = time.time() flex.set_random_seed(0) start_pdb = params.pr_ref.start_pdb target_i = params.pr_ref.target_I method = params.pr_ref.method n_modes = params.pr_ref.n_modes total_modes = n_modes + 4 max_rmsd = params.pr_ref.max_rmsd backbone_scale = params.pr_ref.backbone_force weight = params.pr_ref.weight prefix = params.pr_ref.prefix nmref(start_pdb, target_i, total_modes, n_modes, max_rmsd, backbone_scale, prefix, weight=weight, method=method) t2 = time.time() print "\n start at: ", time.ctime(t1), "\n finished at: ", time.ctime(t2)
def run(args,log=sys.stdout): params = get_input( args, master_params, "concoord", banner, print_help) if (params is None): exit() pdb_file = params.concoord.start_pdb target = params.concoord.target_I n_struct = params.concoord.n_struct prefix = params.concoord.prefix nprocess = params.concoord.n_process p = Pool(nprocess) inputs = [] for ii in range(nprocess): sub_prefix = prefix + '_' + str(ii) + '_' inputs.append([ pdb_file, target, n_struct, sub_prefix] ) results = p.map( get_concoord, inputs) p.close() p.join() output=open(prefix+'.summary', 'w') min_score=results[0][2] for r in results: min_indx = r[2] print>>output, r[0][min_indx], r[1][min_indx] if( min_score > r[1][min_indx] ): min_score = r[1][min_indx] min_file = r[0][min_indx] print>>output, "min chi2=%f, with model: %s"%(min_score, min_file) output.close()
def run(args): params = get_input(args, master_params, "zrefine", banner, help) if params is None: return nmax = params.zrefine.nmax start_file = params.zrefine.start target_file = params.zrefine.target rmax = params.zrefine.rmax qmax = params.zrefine.qmax np_on_grid = params.zrefine.np_on_grid # number of grids covering [0,1] nbr_dist = params.zrefine.nbr_dist prefix = params.zrefine.prefix splat_range = params.zrefine.splat_range pdb = params.zrefine.pdb n_trial = params.zrefine.n_trial if pdb is not None: pdb_nlm = model_interface.container(pdbfile=pdb, rmax=rmax, nmax=nmax).nlm_array else: pdb_nlm = None data = saxs_read_write.read_standard_ascii_qis(target_file) zm_xplor_refine(data, start_file, rmax, qmax=qmax, nmax=nmax, np_on_grid=np_on_grid, prefix=prefix, splat_range=splat_range, pdb_nlm=pdb_nlm, n_trial=n_trial, nbr_dist=nbr_dist)
def run(args, log=sys.stdout): params = get_input( args, master_params, "fxs_znk", banner, print_help ) if (params is None): exit() nmax=params.fxs_znk.nmax lmax=params.fxs_znk.lmax if (lmax is None): lmax=nmax np_on_grid=params.fxs_znk.np_on_grid filename =params.fxs_znk.pdb output = params.fxs_znk.output fix_dx=params.fxs_znk.fix_dx q_array = None if q_array is None: q_array = params.fxs_znk.q_start + \ (params.fxs_znk.q_stop-params.fxs_znk.q_start) \ *flex.double( range(params.fxs_znk.n_step) )/(params.fxs_znk.n_step-1) mom_obj, vox_obj, pdb = pdb2zernike.zernike_moments( filename, nmax=nmax, \ np=np_on_grid, fix_dx=fix_dx, coef_out=False, calc_intensity=True) c_nlm = mom_obj.moments() rmax = vox_obj.rmax()/0.9 znk_mom_variants = zernike_moment_variants( c_nlm, q_array, rmax, nmax, lmax ) out = open(output,'w') this_blq = znk_mom_variants.get_all_blq2() blq_data = fxs_tools.blq_data(q_array,this_blq,lmax ) blq_data.print_out(out=out) out.close()
def run(args): params = get_input(args, master_params, "zrefine", banner, help) if params is None: return nmax=params.zrefine.nmax lmax=params.zrefine.lmax if( lmax is None ): lmax=nmax start_file=params.zrefine.start target_file=params.zrefine.target rmax = params.zrefine.rmax qmax = params.zrefine.qmax prefix = params.zrefine.prefix splat_range = params.zrefine.splat_range pdb = params.zrefine.pdb np_on_grid = params.zrefine.np_on_grid n_trial = params.zrefine.n_trial nbr_dist = params.zrefine.nbr_dist if pdb is not None: pdb_nlm = model_interface.container( pdbfile=pdb, rmax=rmax, nmax=nmax ).nlm_array else: pdb_nlm = None data = fxs_tools.read_blq( target_file,lmax=lmax ) data.print_out(data=data.blq/data.blq[0], out=open(target_file+'_n', 'w') ) zm_xplor_refine( data, start_file, rmax, qmax=qmax, nmax=nmax, lmax=lmax, np_on_grid=np_on_grid, prefix=prefix, splat_range=splat_range, pdb_nlm=pdb_nlm, n_trial=n_trial, nbr_dist=nbr_dist )
def run(args): params = get_input(args, master_params, "zrefine", banner, help) if params is None: return np = 30 nmax = params.zrefine.nmax start_file = params.zrefine.start target_file = params.zrefine.target rmax = params.zrefine.rmax qmax = params.zrefine.qmax prefix = params.zrefine.prefix splat_range = params.zrefine.splat_range pdb = params.zrefine.pdb n_trial = params.zrefine.n_trial if pdb is not None: pdb_obj = model_interface.container(pdbfile=pdb, rmax=rmax, nmax=nmax) pdb_nlm = pdb_obj.nlm_array else: pdb_nlm = None data = saxs_read_write.read_standard_ascii_qis(target_file) refine_obj = zm_xplor_refine(data, start_file, rmax, qmax=qmax, nmax=nmax, np_on_grid=np, prefix=prefix, splat_range=splat_range, pdb_nlm=pdb_nlm, n_trial=n_trial) if pdb is not None: out_pdb_filename = pdb.split('.')[0] + '_shift.pdb' pdb_obj.write_pdb(rmax=refine_obj.rmax, filename=out_pdb_filename)
def run(args): out = sys.stdout params = interface.get_input(args, master_params, "tdfXS", banner, helpf) bg_list = [] # None dk_list = [] # None sm_list = [] # None if params.tdfXS.images.background.base is not None: bg_base = params.tdfXS.images.background.base bg_range = params.tdfXS.images.background.range bg_wc = params.tdfXS.images.background.file_name_settings.wildcard bg_hlz = params.tdfXS.images.background.file_name_settings.has_leading_zeros if bg_range is None: raise Sorry("Background range cannot be empty") bg_list = get_file_names(bg_base, bg_wc, bg_hlz, bg_range) if params.tdfXS.images.dark.base is not None: dk_base = params.tdfXS.images.dark.base dk_range = params.tdfXS.images.dark.range dk_wc = params.tdfXS.images.dark.file_name_settings.wildcard dk_hlz = params.tdfXS.images.dark.file_name_settings.has_leading_zeros if dk_range is None: raise Sorry("Dark range cannot be empty") dk_list = get_file_names(dk_base, dk_wc, dk_hlz, dk_range) if params.tdfXS.images.sample.base is not None: sm_base = params.tdfXS.images.sample.base sm_range = params.tdfXS.images.sample.range sm_wc = params.tdfXS.images.sample.file_name_settings.wildcard sm_hlz = params.tdfXS.images.sample.file_name_settings.has_leading_zeros if sm_range is None: raise Sorry("Sample range cannot be empty") sm_list = get_file_names(sm_base, sm_wc, sm_hlz, sm_range) print >> out print >> out, "----- Processing images -----" print >> out print >> out, " Sample Images : " for ii in sm_list: print >> out, " ", ii print print " Dark Images : " for ii in dk_list: print >> out, " ", ii print print " Background Images : " for ii in bg_list: print >> out, " ", ii print
def run(args): params = get_input(args, master_params, "db", banner, help) if params is None: exit() path = params.db.path+"/" nmax=params.db.nmax np = params.db.np fix_dx = params.db.fix_dx prefix = params.db.prefix NPROCESS = params.db.nprocess files = read(path) nlm_coefs = [] nn_coefs = [] codes = [] rmaxs = [] inputs = [] for file in files: inputs.append( (path, file, nmax, np, fix_dx) ) # print 'NPROCESS', NPROCESS # p = Pool( NPROCESS ) # CHUNKSIZE = len(inputs)/2/NPROCESS # print "CHUNKSIZE", CHUNKSIZE # results = p.map( get_results, inputs ) results = [] count =0 for i in inputs: count = count+1 print "count",count print i temp = get_results(i) results.append(temp) for result in results: if( result is None ): continue nlm = result[0] nn = result[1] code = result[2] rmax = result[3] codes.append( code ) rmaxs.append( rmax ) nlm_coefs.append( nlm.deep_copy() ) nn_coefs.append( nn.deep_copy() ) easy_pickle.dump(prefix+".nlm", nlm_coefs) easy_pickle.dump(prefix+".nn", nn_coefs) easy_pickle.dump(prefix+".rmax", rmaxs) easy_pickle.dump(prefix+".codes", codes)
def run(args): params = get_input(args, master_params, "zernike", banner, help) if params is None: return pdbfile = params.zernike.pdbfile nmax = params.zernike.nmax np = params.zernike.np fix_dx = params.zernike.fix_dx shift = params.zernike.shift buildmap = params.zernike.buildmap cube = zernike_moments(pdbfile, nmax, fix_dx=fix_dx, shift=shift) return cube
def run(args): params = get_input(args, master_params, "zernike", banner, help) if params is None: return mol2_file = params.zernike.mol2_file nmax=params.zernike.nmax np = params.zernike.np uniform = params.zernike.uniform fix_dx = params.zernike.fix_dx shift = params.zernike.shift buildmap = params.zernike.buildmap coef_out = params.zernike.coef_out zernike_moments(mol2_file, nmax, np, uniform=uniform, fix_dx=fix_dx, shift=shift, buildmap=buildmap, coef_out=coef_out, calc_intensity=True)
def run(args): t1 = time.time() targetfile = os.path.join(os.path.split(sys.path[0])[0],"pregxs.txt") with open(targetfile,"w") as f: f.truncate() tempf = open(targetfile,'a') params = get_input(args, master_params, "pregxs", banner, print_help, tempf) tempf.close() if params is None: return go( params) with open(targetfile,"a") as f: f.write(str(time.time() - t1)+" time use\n") f.write("__END__") return
def run(args): params = get_input(args, master_params, "model", banner, help) if params is None: return pdb_file = params.model.pdb_file map_file = params.model.map_file nlm_file = params.model.nlm_file nmax = params.model.nmax rmax = params.model.rmax output = params.model.output nlm_coefs = None if( nlm_file is not None): nlm_coefs = easy_pickle.load( nlm_file ) model = container( pdbfile=pdb_file, mapfile=map_file, nlm_coefs=nlm_coefs, rmax=rmax, nmax=nmax ) model.write_map( output+'.xplor' ) model.write_bead(output+'.pdb' )
def run(args, log=sys.stdout): params = get_input(args, master_params, "mc", banner, print_help) if (params is None): exit() t1 = time.time() flex.set_random_seed(0) start_pdb = params.mc.start_pdb target_i = params.mc.target_I max_rmsd = params.mc.max_rmsd backbone_scale = params.mc.backbone_force weight = params.mc.weight prefix = params.mc.prefix mcref = mc_refine(start_pdb, target_i, max_rmsd, backbone_scale, prefix, weight=weight) t2 = time.time() print "\n start at: ", time.ctime(t1), "\n finished at: ", time.ctime(t2)
def run(args, log=sys.stdout): params = get_input(args, master_params, "concoord", banner, print_help) if (params is None): exit() pdb_file = params.concoord.start_pdb target = params.concoord.target_I n_struct = params.concoord.n_struct prefix = params.concoord.prefix nprocess = params.concoord.n_process p = Pool(nprocess) inputs = [] for ii in range(nprocess): sub_prefix = prefix + '_' + str(ii) + '_' inputs.append([pdb_file, target, n_struct, sub_prefix]) results = p.map(get_concoord, inputs) p.close() p.join() for r in results: min_indx = r[1] print r[0][0][min_indx], r[0][1][min_indx]
def run(args): t1 = time.time() params = get_input(args, master_params, "query", banner, help) if (params is None): exit() rmax = params.query.rmax nmax = params.query.nmax code = params.query.code dbpath = params.query.dbpath db_choice = params.query.db_choice delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() fraction = params.query.fraction nn_coefs, codes, rmaxs = read_pickle(dbpath, dbprefix) nn_array = get_nn_array_for_code(code, nn_coefs, codes, rmaxs, nmax=nmax) intensity = get_profile(nn_array, rmax, fraction=0.9, nmax=nmax)
def run(args): t1 = time.time() params = get_input(args, master_params, "query", banner, help) if (params is None): exit() target_file = params.query.target rmax = params.query.rmax nmax = params.query.nmax smear = params.query.smear dbpath = params.query.dbpath pdb_files = params.query.pdb_files db_choice = params.query.db_choice weight = params.query.weight delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() ntop = params.query.ntop scan = params.query.scan fraction = params.query.fraction q_step = 1.0 / 100.0 data = saxs_read_write.read_standard_ascii_qis(target_file) if (rmax is None): rmax = get_rg(data) * 3.0 / 2.0 qmax = params.query.qmax q_background = params.query.q_background #qmax = 0.44*smath.exp( -0.00023*rmax*rmax ) ######### Interpolation ########## bandwidth = min(q_step, data.q[2] / 2.0) # smath.pi/2.0/rmax ) data = reduce_raw_data(data, qmax, bandwidth, q_background=q_background, level=params.query.q_level) #saxs_read_write.write_standard_ascii_qis(data, 'reduced'+target_file ) ###### END of Interpolation ########## nn_coefs, codes, rmaxs = read_pickle(dbpath, dbprefix) shapes = intoshape(data, nmax=nmax, rmax=rmax, scan=scan, fraction=fraction, smear=smear, prefix=params.query.prefix, weight=weight, delta_q=delta_q) shapes.lookup(nn_coefs, codes, ntop) pdb_models = None if (len(pdb_files) > 0): pdb_models = process(pdb_files, nmax, shapes.best_rmax[0] / fraction) nlm_coefs = None if (params.query.buildmap): nlm_coefs = read_nlm(dbpath, dbprefix) top_cc = build_map(nmax, shapes, nlm_coefs, codes, pdb_models) # need to use rmax/fraction to get right size of box if (len(pdb_files) > 0): out = open(params.query.prefix + "_cc2pdb.dat", 'w') for cc in top_cc: print >> out, cc print >> out, "mean: %8.5f" % flex.mean(top_cc) print "mean cc: %8.5f" % flex.mean(top_cc) print "first cc: %8.5f" % top_cc[0] print "best cc: %8.5f" % flex.max(top_cc) print "worst cc: %8.5f" % flex.min(top_cc) out.close() print "Rmax: estimated vs PDB", shapes.best_rmax[0], pdb_models[ 0].rmax shapes.pair_align(nlm_coefs, params.query.calc_cc) t2 = time.time() print "total time used: ", t2 - t1
def run(args): params = get_input(args, master_params, "align", banner, help) if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map fix_model=model_interface.build_model( fix, typef, nmax, rmax ) mov_model=model_interface.build_model( mov, typem, nmax, rmax ) fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array print "doing alignment" align_obj = fft_align.align( fix_nlm_array, mov_nlm_array, nmax=nmax, topn=topn ,refine=True) cc = align_obj.get_cc() mov_model.nlm_array = align_obj.moving_nlm rmax = update_rmax( rmax, fix_model, mov_model) fix_model.rmax = rmax mov_model.rmax = rmax shift=(rmax, rmax, rmax) print "############# SUMMARY of ALIGNMENT #############" print "Correlation Coefficient Between two models is: ", cc print "Rmax is : ", rmax print "Center of Mass is shifted to : ", list(shift) print "OUTPUT files are : " current_is_mov = False for model in (fix_model, mov_model): #base=model.id ##################20170520########################### #################change the output dir ################### base = str(model.id.split("/")[-1]) dirlist = sys.argv[0].split("sastbx") tmpdir = str(dirlist[0])+"sastbx/gui/sasqt/tmp.txt" with open(tmpdir,"r") as f: targetdir = str(f.read().strip()) base = os.path.join(targetdir,"superpose",base) ############################################################### easy_pickle.dump(base+"_za.nlm", model.nlm_array.coefs() ) print " "+base+"_za.nlm" if(write_map): model.write_map(filename=base+"_za.xplor") print " "+base+"_za.xplor" if( model.vox_obj is not None): ### Write aligned PDB file #### out_pdb_name=base+"_za.pdb" if(current_is_mov): ea = align_obj.best_ea aligned_xyz = model.vox_obj.rotate((-ea[0],ea[1],-ea[2]), False) else: aligned_xyz = model.vox_obj.xyz() aligned_xyz = aligned_xyz + shift ### Add the shift, such that the EDM center is the same as PDB ###################20170511##################################### ################debug for size error############################ #model.pdb_inp.hierarchy.atoms().set_xyz(aligned_xyz) sel_cache = model.pdb_inp.hierarchy.atom_selection_cache() hetero = model.pdb_inp.hierarchy.atoms().extract_hetero() position = list(hetero) no_hetero = sel_cache.selection("all") for i in position: no_hetero[i]=False no_hetero_atoms = model.pdb_inp.hierarchy.atoms().select(no_hetero) no_hetero_atoms.set_xyz(aligned_xyz) model.pdb_inp.hierarchy.write_pdb_file( file_name=out_pdb_name, open_append=False) print " "+out_pdb_name current_is_mov = True print "############# END of SUMMARY #############"
def run(args): global stdfile global outfilelog targetpath_fromGUI = '' targetpath_fromGUI_file = os.path.join(base_path, "targetpath_GUI.txt") if os.path.isfile(targetpath_fromGUI_file) and ( os.stat(targetpath_fromGUI_file).st_size > 0): with open(targetpath_fromGUI_file, "r") as f: targetpath_fromGUI = f.read().strip() if targetpath_fromGUI == '': stddir = "maps" else: tempfile = os.path.join(targetpath_fromGUI, "Shape_Search_Engine") stddir = os.path.join(tempfile, "maps") #stdfile = os.path.join(tempfile,"temp.txt") stdfile = os.path.join(os.path.split(sys.path[0])[0], "shapeup.txt") with open(stdfile, "w") as f: f.truncate() outfilelog = os.path.join( os.path.split(sys.path[0])[0], "outfilelog_shapeup.txt") with open(outfilelog, "w") as f: f.truncate() t1 = time.time() with open(stdfile, "a") as outfile: params = get_input(args, master_params, "query", banner, help, outfile) if (params is None): exit() target_file = params.query.target rmax = params.query.rmax nmax = params.query.nmax smear = params.query.smear dbpath = params.query.dbpath pdb_files = params.query.pdb_files db_choice = params.query.db_choice weight = params.query.weight delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() ntop = params.query.ntop scan = params.query.scan fraction = params.query.fraction scale_power = params.query.scale_power q_step = 1.0 / 200.0 data = saxs_read_write.read_standard_ascii_qis(target_file) try: rg, io = get_rg(data) except: with open(stdfile, "a") as log: print >> log, "Guinier analysis failed, R_max is required" print >> log, "ATTENTION: dummy values for Rg and Io set" print "Guinier analysis failed, R_max is required" print "ATTENTION: dummy values for Rg and Io set" rg = 50 io = 1 qmax = params.query.qmax q_background = params.query.q_background #qmax = 0.44*smath.exp( -0.00023*rmax*rmax ) ######### Interpolation ########## if (rmax is None): rmax = 50 bandwidth = min(q_step, smath.pi / 2.0 / rmax, data.q[1] - data.q[0]) data = reduce_raw_data(data, qmax, bandwidth, q_background=q_background, level=params.query.q_level) ###### END of Interpolation ########## with open(stdfile, "a") as log: print >> log, " ==== Reading in shape database ==== " print " ==== Reading in shape database ==== " begin_time = time.time() nn_coefs, codes, rmaxs = read_pickle(dbpath, dbprefix) ready_time = time.time() delta_time = ready_time - begin_time print with open(stdfile, "a") as log: print >> log, " Done reading database with %i entries in %5.4e seconds" % ( len(codes), delta_time) print >> log, " ==== Shape retrieval ==== " print >> log, " Constructing shape retrieval object" print " Done reading database with %i entries in %5.4e seconds" % ( len(codes), delta_time) print " ==== Shape retrieval ==== " print " Constructing shape retrieval object" shapes = intoshape(data, rg=rg, io=io, nmax=nmax, rmax=rmax, scan=scan, fraction=fraction, smear=smear, prefix=params.query.prefix, weight=weight, delta_q=delta_q, scale_power=scale_power) with open(stdfile, "a") as log: print >> log, " Shape search .... " print " Shape search .... " shapes.lookup(nn_coefs, codes, ntop) nlm_coefs = read_nlm(dbpath, dbprefix) shapes.pair_align(nlm_coefs, params.query.calc_cc) pdb_models = None if (len(pdb_files) > 0): pdb_models = process(pdb_files, nmax, rmax=shapes.best_rmax, fraction=fraction) if (params.query.buildmap): top_cc, top_ids, map_files, levels, cluster_ids, ave_maps, ave_levels, ave_cc = build_map( nmax, shapes.best_rmax, nlm_coefs, codes, shapes.best_models, pdb_models, clusters=shapes.clusters, fract=fraction, prefix=params.query.prefix) # need to use rmax/fraction to get right size of box #build_pymol_script.write_pymol_scripts(maps=map_files,levels=levels,root_name=stddir) build_pymol_script.write_pymol_shapeup(maps=map_files, root_name=stddir) pdb_out_name = None if (pdb_models is not None): pdb_out_name = pdb_files[0].split('.')[0] + '_sa.pdb' #generate_html.generate_jmol_html(ave_maps, ave_cc, ave_levels, map_files, top_cc, levels, cluster_ids, 'models.html', pdb=pdb_out_name) if (len(pdb_files) > 0): with open(params.query.prefix + "_cc2pdb.dat", 'w') as out: print >> out, "Correlation coefficients of retrieved shapes vs input model" for cc, id in zip(top_cc, top_ids): print >> out, "Code: %5s CC: %5.1f " % (id, 100 * cc) print >> out, "mean: %8.5f" % flex.mean(top_cc) with open(stdfile, "a") as log: print >> log, "Compared to the PDB model (%s)" % pdb_models[ 0].filename print >> log, "mean cc: %8.5f" % flex.mean(top_cc) print >> log, "first cc: %8.5f" % top_cc[0] print >> log, "best cc: %8.5f" % flex.max(top_cc) print >> log, "worst cc: %8.5f" % flex.min(top_cc) print "Compared to the PDB model (%s)" % pdb_models[0].filename print "mean cc: %8.5f" % flex.mean(top_cc) print "first cc: %8.5f" % top_cc[0] print "best cc: %8.5f" % flex.max(top_cc) print "worst cc: %8.5f" % flex.min(top_cc) with open(stdfile, "a") as log: print >> log, "Rmax: estimated vs PDB", shapes.best_rmax, pdb_models[ 0].rmax print "Rmax: estimated vs PDB", shapes.best_rmax, pdb_models[ 0].rmax t2 = time.time() with open(stdfile, "a") as log: print >> log, "total time used: ", t2 - t1, "(seconds)" print "total time used", t2 - t1, "(seconds)" with open(stdfile, "a") as log: log.write("__END__")
.type=path .help = "Output base. expect a .pr and a .qii file." """) args = sys.argv[1:] prefix="/home/dongxq/zalign/build/myDB" codes=easy_pickle.load(prefix+".codes") nlm_coefs=easy_pickle.load(prefix+".nlm") nl_coefs=easy_pickle.load(prefix+".nl") rmaxs=easy_pickle.load(prefix+".rmax") targetfile = os.path.join(os.path.split(sys.path[0])[0],"res") with open(targetfile,"w") as f: f.truncate() tempf = open(targetfile,'w') params = get_input(args, master_params, "aligndb", banner, help,tempf) tempf.close() mov = params.align.mov typem = params.align.typem nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map nlNum = params.align.nlnum nlmNum = params.align.nlmnum threadNum = params.align.threadnum mov_model=model_interface.build_model( mov, typem, nmax, rmax ) totalSize = len(nl_coefs) perSizeChi = int(xmath.ceil( totalSize / threadNum)) perSizeCc = int(xmath.ceil(nlNum / threadNum))
def run(args): #targetfile = $SASTBXPATH/modules/cctbx_project/sastbx targetfile = os.path.join(os.path.split(sys.path[0])[0],"superpose.txt") with open(targetfile,"w") as f: f.truncate() tempf = open(targetfile,'w') params = get_input(args, master_params, "align", banner, help,tempf) tempf.close() if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map fix_model=model_interface.build_model( fix, typef, nmax, rmax ) mov_model=model_interface.build_model( mov, typem, nmax, rmax ) fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array with open(targetfile,"a") as f: f.write("doing alignment\n") print "doing alignment" align_obj = fft_align.align( fix_nlm_array, mov_nlm_array, nmax=nmax, topn=topn ,refine=True) cc = align_obj.get_cc() print cc mov_model.nlm_array = align_obj.moving_nlm rmax = update_rmax( rmax, fix_model, mov_model) fix_model.rmax = rmax mov_model.rmax = rmax shift=(rmax, rmax, rmax) with open(targetfile,"a") as f: f.write( "############# SUMMARY of ALIGNMENT #############\n") f.write( "Correlation Coefficient Between two models is: "+str(cc)+"\n") f.write("Rmax is : "+str(rmax)+"\n") f.write("Center of Mass is shifted to : "+str(list(shift))+"\n") f.write("OUTPUT files are : "+"\n") print "############# SUMMARY of ALIGNMENT #############" print "Correlation Coefficient Between two models is: ", cc print "Rmax is : ", rmax print "Center of Mass is shifted to : ", list(shift) print "OUTPUT files are : " current_is_mov = False pdblist = [] xplorlist = [] targetpath_fromGUI = '' targetpath_fromGUI_file = os.path.join(base_path,"targetpath_GUI.txt") if os.path.isfile(targetpath_fromGUI_file) and (os.stat(targetpath_fromGUI_file).st_size>0): with open(targetpath_fromGUI_file,"r") as f: targetpath_fromGUI = f.read().strip() for model in (fix_model, mov_model): if targetpath_fromGUI == '': base=model.id else: base = str(model.id.split("/")[-1]) print "base: ",base targetdir = os.path.join(targetpath_fromGUI,"Model_Superposition") base = os.path.join(targetdir,base) ##################20170520########################### #################change the output dir ################### # base = str(model.id.split("/")[-1]) # dirlist = sys.argv[0].split("sastbx") # tmpdir = str(dirlist[0])+"sastbx/gui/sasqt/tmp.txt" # with open(tmpdir,"r") as f: # targetdir = str(f.read().strip()) # base = os.path.join(targetdir,"superpose",base) ############################################################### # easy_pickle.dump(base+"_za.nlm", model.nlm_array.coefs() ) # with open(targetfile,"a") as f: # f.write(" "+base+"_za.nlm\n") # if(write_map): # model.write_map(filename=base+"_za.xplor") # xplorlist.append(base+"_za.xplor") # with open(targetfile,"a") as f: # f.write(" "+base+"_za.xplor\n") # if( model.vox_obj is not None): ### Write aligned PDB file #### # out_pdb_name=base+"_za.pdb" # pdblist.append(out_pdb_name) # if(current_is_mov): # ea = align_obj.best_ea # aligned_xyz = model.vox_obj.rotate((-ea[0],ea[1],-ea[2]), False) # else: # aligned_xyz = model.vox_obj.xyz() # aligned_xyz = aligned_xyz + shift ### Add the shift, such that the EDM center is the same as PDB ###################20170511##################################### ################debug for size error############################ #model.pdb_inp.hierarchy.atoms().set_xyz(aligned_xyz) # sel_cache = model.pdb_inp.hierarchy.atom_selection_cache() # hetero = model.pdb_inp.hierarchy.atoms().extract_hetero() # position = list(hetero) # no_hetero = sel_cache.selection("all") # for i in position: # no_hetero[i]=False # no_hetero_atoms = model.pdb_inp.hierarchy.atoms().select(no_hetero) # no_hetero_atoms.set_xyz(aligned_xyz) # model.pdb_inp.hierarchy.write_pdb_file( file_name=out_pdb_name, open_append=False) # with open(targetfile,"a") as f: # f.write(" "+out_pdb_name+'\n') # print out_pdb_name # current_is_mov = True # print "pdblist: ",pdblist # print "xplorlist: ", xplorlist ############targetpath_fromGUI=='' for commmand line ############else for GUI if targetpath_fromGUI != '': targetdir = os.path.join(targetpath_fromGUI,"Model_Superposition") build_pymol_script.write_pymol_superpose(pdblist,targetdir) with open(targetfile,"a") as f: f.write("############# END of SUMMARY #############\n") with open(targetfile,"a") as f: f.write("__END__") print "############# END of SUMMARY #############\n" print "__END__"
def run(args): targetfile = os.path.join(os.path.split(sys.path[0])[0], "retrieval.txt") with open(targetfile, "w") as f: f.truncate() time1 = time.time() global nmax global nlm_array_ref global coefs global nlm_total global codes global pdbfile params = get_input(args, master_params, "retrieval", banner, help) if (params is None): exit() pdbfile = params.retrieval.pdbfile dbpath = params.retrieval.dbpath nmax = params.retrieval.nmax dbprefix = params.retrieval.db_prefix prefix = params.retrieval.prefix print "=============process the protein model==============" with open(targetfile, "a") as f: print >> f, "=============process the protein model==============" zernike_moments(pdbfile, nmax=nmax) queryCoefFile = pdbfile.split(".")[0] + ".nlm.pickle" #queryCoefFile=pdbfile.replace("pdb", "nlm.pickle") queryCoef = easy_pickle.load(queryCoefFile) with open(targetfile, "a") as f: print >> f, "=============load database===============" print "=============load database===============" if (dbpath is None): dbpath = set_default_db_path() codes = easy_pickle.load(os.path.join(dbpath, dbprefix + ".codes")) coefs = easy_pickle.load(os.path.join(dbpath, dbprefix + ".nlm")) else: codes = easy_pickle.load(os.path.join(dbpath, dbprefix + ".codes")) coefs = easy_pickle.load(os.path.join(dbpath, dbprefix + ".nlm")) with open(targetfile, "a") as f: print >> f, "=============database=============" print >> f, os.path.join(dbpath, dbprefix + ".codes") print >> f, os.path.join(dbpath, dbprefix + ".nlm") print >> f, "==================================" print "=============database=============" print os.path.join(dbpath, dbprefix + ".codes") print os.path.join(dbpath, dbprefix + ".nlm") print "==================================" nmodels = len(coefs) nlm_array_ref = math.nlm_array(nmax) nlm = nlm_array_ref.nlm() nlm_total = nlm_array_ref.coefs().size() nlm_array_ref.load_coefs(nlm, queryCoef[0:nlm_total]) p = Pool(8) cclist = p.map(calcc, range(nmodels)) distlist = [1 - cc for cc in cclist] rankedlist = sorted(range(nmodels), key=lambda k: distlist[k]) rankedcodes = [codes[rank] for rank in rankedlist] sortedcclist = sorted(cclist, reverse=True) with open(targetfile, "a") as f: print >> f, "=========Tope 10 models matching the input protein model============" print "=========Tope 10 models matching the input protein model============" with open(targetfile, "a") as f: for i in range(10): print "top ", (i + 1), " ", rankedcodes[i], "c.c.", sortedcclist[i] print >> f, "top ", ( i + 1), " ", rankedcodes[i], "c.c.", sortedcclist[i] time2 = time.time() print "time used:", time2 - time1 with open(targetfile, "a") as f: print >> f, "time used: ", time2 - time1
def run(args): t1 = time.time() params = get_input(args, master_params, "query", banner, help) if (params is None): exit() target_file = params.query.target rmax = params.query.rmax nmax = params.query.nmax smear = params.query.smear dbpath = params.query.dbpath pdb_files = params.query.pdb_files db_choice = params.query.db_choice weight = params.query.weight delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() ntop = params.query.ntop scan = params.query.scan fraction = params.query.fraction scale_power = params.query.scale_power q_step = 1.0 / 100.0 data = saxs_read_write.read_standard_ascii_qis(target_file) rg, io = get_rg(data) qmax = params.query.qmax q_background = params.query.q_background #qmax = 0.44*smath.exp( -0.00023*rmax*rmax ) ######### Interpolation ########## bandwidth = min(q_step, data.q[2] / 2.0) # smath.pi/2.0/rmax ) data = reduce_raw_data(data, qmax, bandwidth, q_background=q_background, level=params.query.q_level) ###### END of Interpolation ########## print " ==== Reading in shape database ==== " begin_time = time.time() nn_coefs, codes, rmaxs = read_pickle(dbpath, dbprefix) ready_time = time.time() delta_time = ready_time - begin_time print print " Done reading database with %i entries in %5.4e seconds" % ( len(codes), delta_time) print print " ==== Shape retrieval ==== " print " Constructing shape retrieval object" shapes = intoshape(data, rg=rg, io=io, nmax=nmax, rmax=rmax, scan=scan, fraction=fraction, smear=smear, prefix=params.query.prefix, weight=weight, delta_q=delta_q, scale_power=scale_power) print " Shape search .... " shapes.lookup(nn_coefs, codes, ntop) nlm_coefs = read_nlm(dbpath, dbprefix) shapes.pair_align(nlm_coefs, params.query.calc_cc) pdb_models = None if (len(pdb_files) > 0): pdb_models = process(pdb_files, nmax, rmax=shapes.best_rmax, fraction=fraction) if (params.query.buildmap): top_cc, top_ids, map_files, levels, cluster_ids, ave_maps, ave_levels, ave_cc = build_map( nmax, shapes.best_rmax, nlm_coefs, codes, shapes.best_models, pdb_models, clusters=shapes.clusters, fract=fraction) # need to use rmax/fraction to get right size of box build_pymol_script.write_pymol_scripts(map_files, levels) pdb_out_name = None if (pdb_models is not None): pdb_out_name = pdb_files[0].split('.')[0] + '_sa.pdb' generate_html.generate_jmol_html(ave_maps, ave_cc, ave_levels, map_files, top_cc, levels, cluster_ids, 'models.html', pdb=pdb_out_name) if (len(pdb_files) > 0): out = open(params.query.prefix + "_cc2pdb.dat", 'w') print >> out, "Correlation coefficients of retrieved shapes vs input model" for cc, id in zip(top_cc, top_ids): print >> out, "Code: %5s CC: %5.1f " % (id, 100 * cc) print >> out, "mean: %8.5f" % flex.mean(top_cc) print "Compared to the PDB model (%s)" % pdb_models[0].filename print "mean cc: %8.5f" % flex.mean(top_cc) print "first cc: %8.5f" % top_cc[0] print "best cc: %8.5f" % flex.max(top_cc) print "worst cc: %8.5f" % flex.min(top_cc) out.close() print "Rmax: estimated vs PDB", shapes.best_rmax, pdb_models[ 0].rmax t2 = time.time() print "total time used: ", t2 - t1, "(seconds)"
def run(args, outpath=None): params = get_input(args, master_params, "align", banner, help) if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map #outpath=params.align.outpath fix_model = model_interface.build_model(fix, typef, nmax, rmax) mov_model = model_interface.build_model(mov, typem, nmax, rmax) #fix_nl_array = fix_model.nl_array #mov_nl_array = mov_model.nl_array #CHI2 = flex.sum_sq( fix_nl_array.coefs() - mov_nl_array.coefs() ) #print "CHI2 between Fnl's is %e\n"%CHI2 fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array print "doing alignment" align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, topn=topn, refine=True) cc = align_obj.get_cc() mov_model.nlm_array = align_obj.moving_nlm rmax = update_rmax(rmax, fix_model, mov_model) fix_model.rmax = rmax mov_model.rmax = rmax shift = (rmax, rmax, rmax) print "############# SUMMARY of ALIGNMENT #############" print "Correlation Coefficient Between two models is: ", cc print "Rmax is : ", rmax print "Euler angles for the moving object is : ", list( align_obj.best_ea) print "Center of Mass is shifted to : ", list(shift) print "OUTPUT files are : " current_is_mov = False for model in (fix_model, mov_model): #base = model.id base = outpath + '/' + mov.split("/")[-1][:-4] ''' easy_pickle.dump(base+"_za.nlm", model.nlm_array.coefs() ) print " "+base+"_za.nlm" ''' if (current_is_mov): model.map = None if (write_map): model.write_map(filename=base + "_za.xplor") print " " + base + "_za.xplor" if (model.vox_obj is not None): ### Write aligned PDB file #### out_pdb_name = base + "_za.pdb" if (current_is_mov): ea = align_obj.best_ea aligned_xyz = model.vox_obj.rotate((-ea[0], ea[1], -ea[2]), False) else: aligned_xyz = model.vox_obj.xyz() aligned_xyz = aligned_xyz + shift ### Add the shift, such that the EDM center is the same as PDB model.pdb_inp.hierarchy.atoms().set_xyz(aligned_xyz) model.pdb_inp.hierarchy.write_pdb_file(file_name=out_pdb_name, open_append=False) print " " + out_pdb_name current_is_mov = True print "############# END of SUMMARY #############"
def run(args): global f f = os.path.join(os.path.split(sys.path[0])[0],"she.txt") with open(f,"w") as tempf: tempf.truncate() #check if we have experimental data t1=time.time() exp_data = None q_values = None var = None with open(f,"a") as tempf: params = get_input( args, master_params, "sas_I", banner, print_help,tempf) if (params is None): exit() if params.sas_I.experimental_data is not None: exp_data = saxs_read_write.read_standard_ascii_qis(params.sas_I.experimental_data) #exp_data.s = flex.sqrt( exp_data.i ) if params.sas_I.data_reduct: qmax = exp_data.q[-1] bandwidth = 0.5/(params.sas_I.n_step-1.0) exp_data=reduce_raw_data( exp_data, qmax, bandwidth,outfile=f ) q_values = exp_data.q var = flex.pow(exp_data.s,2.0) if q_values is None: q_values = params.sas_I.q_start + \ (params.sas_I.q_stop-params.sas_I.q_start )*flex.double( range(params.sas_I.n_step) )/( params.sas_I.n_step-1) # read in pdb file pdbi = pdb.hierarchy.input(file_name=params.sas_I.structure) #atoms = pdbi.hierarchy.atoms() atoms = pdbi.hierarchy.models()[0].atoms() # predefine some arrays we will need dummy_atom_types = flex.std_string() radius= flex.double() b_values = flex.double() occs = flex.double() xyz = flex.vec3_double() # keep track of the atom types we have encountered dummy_at_collection = [] for atom in atoms: #if(not atom.hetero): #### temporarily added b_values.append( atom.b ) occs.append( atom.occ ) xyz.append( atom.xyz ) # Hydrogen controls whether H is treated explicitly or implicitly Hydrogen = not params.sas_I.internals.implicit_hydrogens ### Using Zernike Expansion to Calculate Intensity ### if(params.sas_I.method == 'zernike'): znk_nmax=params.sas_I.znk_nmax absolute_Io = znk_model.calc_abs_Io( atoms, Hydrogen) if( absolute_Io == 0.0): ## in case pdb hierarchy parse did not work out correctly absolute_Io = sas_library.calc_abs_Io_from_pdb( params.sas_I.structure, Hydrogen ) if(Hydrogen): density = znk_model.get_density( atoms ) ## Get number of electrons as density else: density = znk_model.get_density( atoms ) + 1 ## add one H-atom to each heavy atom as a correction znk_engine = znk_model.xyz2znk(xyz,absolute_Io,znk_nmax, density=density) calc_i, calc_i_vac, calc_i_sol, calc_i_layer=znk_engine.calc_intensity(q_values) if(params.sas_I.experimental_data is not None): if params.sas_I.internals.solvent_scale: znk_engine.optimize_solvent(exp_data) calc_i = znk_engine.best_i_calc else: #quick scaling scale, offset = linear_fit( calc_i, exp_data.i, exp_data.s ) calc_i = calc_i*scale + offset CHI2 = flex.mean(flex.pow((calc_i-exp_data.i)/exp_data.s,2.0)) CHI=math.sqrt(CHI2) with open(f,"a") as log: print >>log, "fitting to experimental curve, chi = %5.4e"%CHI print "fitting to experimental curve, chi = %5.4e"%CHI write_debye_data(q_values, calc_i, params.sas_I.output+".fit") write_json(params.sas_I.output+"data.json", q_values, calc_i, y2=exp_data.i) else: ## scaled to the absolute I(0) write_she_data(q_values, calc_i, calc_i_vac, calc_i_layer, calc_i_sol, params.sas_I.output) write_json(params.sas_I.output+"data.json", q_values, calc_i) with open(f,"a") as log: print >>log, znk_engine.summary() print >>log, "Done! total time used: %5.4e (seconds)"%(time.time()-t1) print znk_engine.summary() print "Done! total time used: %5.4e (seconds)"%(time.time()-t1) return ### End of Zernike Model ### dummy_ats= sas_library.read_dummy_type(file_name=params.sas_I.structure) for at in dummy_ats: if at not in dummy_at_collection: dummy_at_collection.append( at ) radius_dict={} ener_lib=server.ener_lib() for dummy in dummy_at_collection: if(Hydrogen): radius_dict[dummy]=ener_lib.lib_atom[dummy].vdw_radius else: if ener_lib.lib_atom[dummy].vdwh_radius is not None: radius_dict[dummy]=ener_lib.lib_atom[dummy].vdwh_radius else: radius_dict[dummy]=ener_lib.lib_atom[dummy].vdw_radius if(radius_dict[dummy] is None): with open(f,"a") as log: print >> log, "****************** WARNING WARNING *******************" print >> log, "Did not find atom type: ", dummy, "default value 1.58 A was used" print >> log, "*******************************************************" print "****************** WARNING WARNING *******************" print "Did not find atom type: ", dummy, "default value 1.58 A was used" print "*******************************************************" radius_dict[dummy]=1.58 for at in dummy_ats: dummy_atom_types.append( at) radius.append(radius_dict[at]) Scaling_factors=sas_library.load_scaling_factor() #------------------ # B_factor_on=params.sas_I.internals.use_adp max_i = params.sas_I.internals.max_i max_L = params.sas_I.internals.max_L f_step= params.sas_I.internals.f_step q_step= params.sas_I.internals.integration_q_step solvent_radius_scale=params.sas_I.internals.solvent_radius_scale protein_radius_scale=params.sas_I.internals.protein_radius_scale rho=params.sas_I.internals.rho drho=params.sas_I.internals.drho delta=params.sas_I.internals.delta #------------------ scat_lib_dummy = sas_library.build_scattering_library( dummy_at_collection, q_values, radius_dict, solvent_radius_scale, Hydrogen, Scaling_factors) new_indx =flex.int() new_coord = flex.vec3_double() model=intensity.model(xyz, radius*protein_radius_scale, b_values, occs, dummy_ats, scat_lib_dummy, B_factor_on) t2=time.time() if(params.sas_I.method == 'she'): max_z_eps=0.02 max_z=model.get_max_radius()*(q_values[-1]+max_z_eps) + max_z_eps engine = intensity.she_engine( model, scat_lib_dummy,max_i,max_L,f_step, q_step,max_z, delta,rho,drho ) engine.update_solvent_params(rho,drho) i = engine.I() a = engine.get_IA() b = engine.get_IB() c = engine.get_IC() attri = engine.Area_Volume() with open(f,"a") as log: print >> log, "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; print >> log, "Inner Volume of the Envelop is (A^3.0): ", attri[1]; print >> log, "Volume of the Envelop shell is (A^3.0): ", attri[2]; print "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; print "Inner Volume of the Envelop is (A^3.0): ", attri[1]; print "Volume of the Envelop shell is (A^3.0): ", attri[2]; if params.sas_I.output is not None: write_she_data( q_values, i,a,b,c, params.sas_I.output ) write_json(params.sas_I.output+"data.json", q_values, i) if params.sas_I.pdblist is not None: pdblist=params.sas_I.pdblist if(os.path.isfile(pdblist)): list= open(pdblist,'r') for line in list: filename=line.split('\n')[0] pdbi = pdb.hierarchy.input(file_name=filename) t21 = time.time() atoms = pdbi.hierarchy.atoms() new_coord.clear() new_indx.clear() i=0 for atom in atoms: new_coord.append( atom.xyz ) new_indx.append(i) i=i+1 engine.update_coord(new_coord,new_indx) i = engine.I() a = engine.get_IA() b = engine.get_IB() c = engine.get_IC() attri = engine.Area_Volume() with open(f,"a") as log: print >> log, "Inner surface Area of the Envelop is (A^2.0): ", attri[0] print >> log, "Inner Volume of the Envelop is (A^3.0): ", attri[1] print >> log, "Volume of the Envelop shell is (A^3.0): ", attri[2] print "Inner surface Area of the Envelop is (A^2.0): ", attri[0] print "Inner Volume of the Envelop is (A^3.0): ", attri[1] print "Volume of the Envelop shell is (A^3.0): ", attri[2] write_she_data( q_values, i,a,b,c, filename+'.int' ) with open(f,"a") as log: print >> log, '\nfininshed pdb ', filename, 'at: ',time.ctime(t21),'\n' print '\nfininshed pdb ', filename, 'at: ',time.ctime(t21),'\n' # attri = engine.Area_Volume2() # print "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; elif(params.sas_I.method == 'debye'): engine = intensity.debye_engine (model, scat_lib_dummy) i = engine.I() if params.sas_I.output is not None: write_debye_data(q_values, i, params.sas_I.output) write_json(params.sas_I.output+"data.json", q_values, i) if(params.sas_I.experimental_data is not None): if params.sas_I.internals.solvent_scale: # more thorough scaling solvent_optim = solvent_parameter_optimisation(she_object=engine, observed_data=exp_data ) scale, offset, drho, a = solvent_optim.get_scales() i = solvent_optim.get_scaled_data() else: #quick scaling scale, offset = linear_fit( i, exp_data.i, exp_data.s ) i = scale*i+offset with open(f,"a") as log: print >>log, "Scaled calculated data against experimental data" print >>log, "Scale factor : %5.4e"%scale print >>log,"Offset : %5.4e"%offset print "Scaled calculated data against experimental data" print "Scale factor : %5.4e"%scale print "Offset : %5.4e"%offset if params.sas_I.internals.solvent_scale: with open(f,"a") as log: print >> log, " Solvent average R ra : ", a print >> log, " Solvation Contrast drho: ", drho print " Solvent average R ra : ", a print " Solvation Contrast drho: ", drho print write_debye_data(q_values, i, params.sas_I.output+".fit") write_json(params.sas_I.output+"data.json", q_values, i, y2=exp_data.i) CHI2 = flex.mean(flex.pow((i-exp_data.i)/exp_data.s,2.0)) CHI=math.sqrt(CHI2) with open(f,"a") as log: print >>log, "fitting to experimental curve, chi = %5.4e"%CHI print "fitting to experimental curve, chi = %5.4e"%CHI t3=time.time() with open(f,"a") as log: print >> log, "Done! total time used: %5.4e (seconds)"%(t3-t1) print >>log, 'start running at: ',time.ctime(t1) print >>log, 'finished PDB file processing at: ',time.ctime(t2) print >>log, 'got all desired I(q) at : ',time.ctime(t3) print "Done! total time used: %5.4e (seconds)"%(t3-t1) print 'start running at: ',time.ctime(t1) print 'finished PDB file processing at: ',time.ctime(t2) print 'got all desired I(q) at : ',time.ctime(t3) with open(f,"a") as log: log.write("__END__")
def run(args): # filename = "res" + str(filenum) + ".txt" targetfile = os.path.join(os.path.split(sys.path[0])[0], "c5") with open(targetfile, "w") as f: f.truncate() tempf = open(targetfile, 'w') print args params = get_input(args, master_params, "aligndb", banner, help, tempf) tempf.close() if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map nlNum = params.align.nlnum nlmNum = params.align.nlmnum #fix_model=model_interface.build_model( fix, typef, nmax, rmax ) mov_model = model_interface.build_model(mov, typem, nmax, rmax) # prefix="/home/dongxq/align_code/dude-actives" prefix = "/home/dongxq/zalign/build/myDB" codes = easy_pickle.load(prefix + ".codes") nlm_coefs = easy_pickle.load(prefix + ".nlm") nl_coefs = easy_pickle.load(prefix + ".nl") rmaxs = easy_pickle.load(prefix + ".rmax") #compute distance nlRes = [] mov_nl_array = mov_model.nl_array mov_nl_coefs = mov_model.nl_array.coefs() tnl1 = time.time() for indx in range(len(nl_coefs)): #compute Chi-sequare distance mf_coef = numpy.true_divide(nl_coefs[indx], mov_nl_coefs) dist = numpy.sum(numpy.square(mov_nl_coefs - mf_coef * nl_coefs[indx])) #compute Mahalanobis distance # dist = mol2.Mahalanobis(mov_nl_coefs,nl_coefs[indx]) nlRes.append((indx, dist, codes[indx])) sortedNlRes = sorted(nlRes, key=operator.itemgetter(1), reverse=False) tnl2 = time.time() # compute nl_cc # nl_cc_res = [] # mov_nl_array = mov_model.nl_array # mov_nl_coefs = mov_model.nl_array.coefs() # tnl1 = time.time() # for indx in range(len(nl_coefs)): # nl_cc = pearson.pearson_cc(mov_nl_coefs, nl_coefs[indx]) # print nl_cc # nl_cc_res.append((indx, nl_cc, codes[indx])) # sortedNlRes = sorted(nl_cc_res, key=operator.itemgetter(1), reverse=True) # tnl2 = time.time() #compute nlm_cc mov_nlm_array = mov_model.nlm_array fix_nlm_array = math.nlm_array(nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() nlmRes = [] tnlm1 = time.time() for i in range(nlNum): indx = sortedNlRes[i][0] fix = nlm_coefs[indx][0:nlm_total] fix_nlm_array.load_coefs(nlm, fix) align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() nlmRes.append((indx, codes[indx], cc)) sortedNlmRes = sorted(nlmRes, key=operator.itemgetter(2), reverse=True) sortedNlmRes = sortedNlmRes[:nlmNum] tnlm2 = time.time() #merge chi to cc arr tmerge1 = time.time() for i in range(nlmNum): indx = sortedNlmRes[i][0] chi = list(filter(lambda j: j[0] == indx, sortedNlRes[0:]))[0][1] sortedNlmRes[i] += (chi, ) tmerge2 = time.time() print "merge time used: ", tmerge2 - tmerge1 #output with open(targetfile, "w") as f: f.write("############# SUMMARY of ALIGNMENT #############\n") f.write( "rank indx name cc chi-square\n") rank = 0 for arr in sortedNlmRes: rank += 1 arr = (rank, ) + arr f.write(str(arr) + "\n") t3 = time.time() f.write("rotation invariant computing time used: " + str(tnl2 - tnl1) + "\n") f.write("alignment computing time used: " + str(tnlm2 - tnlm1) + "\n") f.write("total time used: : " + str(t3 - t1))
def run(args): t1 = time.time() params = get_input(args, master_params, "query", banner, help) if (params is None): exit() target_file = params.query.target rmax = params.query.rmax nmax = params.query.nmax lmax = params.query.lmax if (lmax is None): lmax = nmax smear = params.query.smear dbpath = params.query.dbpath pdb_files = params.query.pdb_files db_choice = params.query.db_choice weight = params.query.weight delta_q = params.query.delta_q if (db_choice == "user"): dbprefix = params.query.db_user_prefix else: dbprefix = db_choice if (dbpath is None): dbpath = set_default_db_path() ntop = params.query.ntop scan = params.query.scan fraction = params.query.fraction scale_power = params.query.scale_power q_step = 1.0 / 100.0 data = fxs_tools.read_blq(target_file, lmax=lmax) saxs_i = data.blq[0:-1:int(lmax / 2) + 1] saxs_i = flex.sqrt(flex.abs(saxs_i)) saxs_data = curves.simple_saxs_data(data.q, saxs_i, saxs_i) try: rg, io = get_rg(saxs_data) except: print "Guinier analysis failed, R_max is required" print "ATTENTION: dummy values for Rg and Io set" rg = 50 io = 1 qmax = params.query.qmax q_background = params.query.q_background print " ==== Reading in shape database ==== " begin_time = time.time() nlm_coefs = read_nlm(dbpath, dbprefix) codes = read_codes(dbpath, dbprefix) ready_time = time.time() delta_time = ready_time - begin_time print print " Done reading database with %i entries in %5.4e seconds" % ( len(codes), delta_time) print print " ==== Shape retrieval ==== " print " Constructing shape retrieval object" shapes = intoshape(data, rg=rg, io=io, nmax=nmax, lmax=lmax, rmax=rmax, scan=scan, fraction=fraction, smear=smear, prefix=params.query.prefix, weight=weight, delta_q=delta_q, scale_power=scale_power) print " Shape search .... " shapes.lookup(nlm_coefs, codes, ntop) shapes.pair_align(nlm_coefs, params.query.calc_cc) pdb_models = None if (len(pdb_files) > 0): pdb_models = process(pdb_files, nmax, rmax=shapes.best_rmax, fraction=fraction) if (params.query.buildmap): top_cc, top_ids, map_files, levels, cluster_ids, ave_maps, ave_levels, ave_cc = build_map( nmax, shapes.best_rmax, nlm_coefs, codes, shapes.best_models, pdb_models, clusters=shapes.clusters, fract=fraction) # need to use rmax/fraction to get right size of box build_pymol_script.write_pymol_scripts(map_files, levels) pdb_out_name = None if (pdb_models is not None): pdb_out_name = pdb_files[0].split('.')[0] + '_sa.pdb' generate_html.generate_jmol_html(ave_maps, ave_cc, ave_levels, map_files, top_cc, levels, cluster_ids, 'models.html', pdb=pdb_out_name) if (len(pdb_files) > 0): out = open(params.query.prefix + "_cc2pdb.dat", 'w') print >> out, "Correlation coefficients of retrieved shapes vs input model" for cc, id in zip(top_cc, top_ids): print >> out, "Code: %5s CC: %5.1f " % (id, 100 * cc) print >> out, "mean: %8.5f" % flex.mean(top_cc) print "Compared to the PDB model (%s)" % pdb_models[0].filename print "mean cc: %8.5f" % flex.mean(top_cc) print "first cc: %8.5f" % top_cc[0] print "best cc: %8.5f" % flex.max(top_cc) print "worst cc: %8.5f" % flex.min(top_cc) out.close() print "Rmax: estimated vs PDB", shapes.best_rmax, pdb_models[ 0].rmax t2 = time.time() print "total time used: ", t2 - t1, "(seconds)"