def get_qfac(protein_file, npc, res_num, q_fac_dict): """Fit and individual tensor to each model in the bundle and save each Q-factor into an universal dictionary""" # Load the protein, load the npc prot = protein.load_pdb(protein_file) rawData = dataparse.read_pcs(npc) qfactor_sep = {} # Initialize metal instance for search and set the initial position mStart = metal.Metal() mStart.position = prot[0]['A'][res_num]['CA'].position # Loop: for every model fit an individual tensor and store Q-factors and tensor components into a dict for model in prot: parsedData = prot.parse(rawData, models=model.id) [mGuess], _ = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=10, points=10) [mFit], [data] = fit.nlr_fit_metal_from_pcs([mGuess], [parsedData]) qfactor_sep[model.id] = fit.qfactor(data) # Save in universal dictionary if type(q_fact_dict[model.id]) == list: q_fac_dict[model.id].append(fit.qfactor(data)) else: q_fac_dict[model.id] = [fit.qfactor(data)] minModel, minQfac = sorted(qfactor_sep.items(), key=lambda x: x[1])[0]
def get_tensor(protein_file, model, npc, res_num, tag_number): """Fit a tensor only to the specified model of the bundle pdb and writes down the corresponding components and ORI""" # Load the protein, load the npc prot = protein.load_pdb(protein_file) rawData = dataparse.read_pcs(npc) # Initialize metal instance for search and set the initial position mStart = metal.Metal() mStart.position = prot[model]['A'][res_num]['CA'].position # Get tensor on single structure for mod in prot: if mod.id == model: parsedData = prot.parse(rawData, models=mod.id) [mGuess], [data ] = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=10, points=10) [mFit], [data] = fit.nlr_fit_metal_from_pcs([mGuess], [parsedData]) Axial = round(mFit.ax * 1E32, 3) Rhombicity = round((mFit.rh / mFit.ax), 3) # Generate .pcs metal center file name = os.path.splitext(protein_file)[0] + "_metal_centers.pcs" f = open(name, 'a+') f.write(5 * " " + str(tag_number) + (14 - len(str(Axial))) * " " + str(Axial) + "E+04 " + str(Rhombicity) + 5 * " " + str(Tag_dictionary[tag_number]) + "\n") # Extract information about the metal center position relative to the protein backbone and write them ori = mFit.position name_ori_upl = os.path.splitext(protein_file)[0] + "_ORI_UPL.upl" name_ori_lol = os.path.splitext(protein_file)[0] + "_ORI_LOL.lol" u = open(name_ori_upl, 'a+') l = open(name_ori_lol, 'a+') for res_num in Ori_dictionary[tag_number]: res = prot[model]['A'][res_num]['CA'].position res_type = prot[model]['A'][res_num].get_resname() d = math.sqrt(((ori[0] - res[0])**2) + ((ori[1] - res[1])**2) + ((ori[2] - res[2])**2)) d_upl = round((d * 1E10) + 0.5, 2) d_lol = round((d * 1E10) - 0.5, 2) u.write((3 - len(str(res_num))) * " " + str(res_num) + " " + res_type + " CA " + str(Tag_dictionary[tag_number]) + " ORI A0" + (10 - len(str(d_upl))) * " " + str(d_upl) + "\n") l.write((3 - len(str(res_num))) * " " + str(res_num) + " " + res_type + " CA " + str(Tag_dictionary[tag_number]) + " ORI A0" + (10 - len(str(d_lol))) * " " + str(d_lol) + "\n")
parsedData = prot.parse(rawData) # Define an initial tensor mStart = metal.Metal() # Set the starting position to an atom close to the metal mStart.position = prot[0]['A'][56]['CA'].position # Calculate an initial tensor from an SVD gridsearch [mGuess], [data] = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=10, points=10) # Refine the tensor using non-linear regression [mFit], [data] = fit.nlr_fit_metal_from_pcs([mGuess], [parsedData]) # Estimate uncertainty sourcing noise from the models of the PDB [mod_all], [mod_std] = fit.fit_error_models(fit.nlr_fit_metal_from_pcs, initMetals=[mFit], dataArrays=[parsedData]) mod_std.save('error_tensor_models.txt') # Estimate uncertainty sourcing noise from experimental uncertainties [mc_all], [mc_std] = fit.fit_error_monte_carlo(fit.nlr_fit_metal_from_pcs, 50, initMetals=[mFit], dataArrays=[parsedData]) mod_std.save('error_tensor_monte_carlo.txt')
bindingSite = int(re.search("\\d+", site).group()) # Get residue number mStart = metal.Metal() mStart.position = prot[0]['A'][bindingSite][ 'CA'].position # Set strating position hnpcss = [] # Assemble exp. PCS data for both ions for ion in ions: hnpcs_raw = dataparse.read_pcs( "../data_files/IMP1_HN_{}_{}_FREE.npc".format(site, ion)) hnpcs = prot.parse(hnpcs_raw) hnpcss.append(hnpcs) # Fit the tensor by SVD, then NLR mGuess, _ = fit.svd_gridsearch_fit_metal_from_pcs([mStart, mStart], hnpcss) mFit, _ = fit.nlr_fit_metal_from_pcs(mGuess, hnpcss) # Sample purturbed tensors by bootstrap mSamples, mStd = fit.fit_error_bootstrap(fit.nlr_fit_metal_from_pcs, BOOTSTRAP_ITER, 0.8, initMetals=mFit, dataArrays=hnpcss) mdata.append(mSamples) for ion, mSamples in zip(ions, zip(*mdata)): trpdata = [] mdata = [] # Loop sites with fitted tensors for site, mSample in zip(sites, mSamples):
# Make a list of starting tensors mStart = [metal.Metal(), metal.Metal(), metal.Metal()] # Set the starting position to an atom close to the metal mStart[0].position = prot[0]['A'][56]['CA'].position # Calculate initial tensors from an SVD gridsearch mGuess = fit.svd_gridsearch_fit_metal_from_pcs(mStart, parsedData, radius=10, points=10) # Refine the tensors using non-linear regression fitParameters = ['x', 'y', 'z', 'ax', 'rh', 'a', 'b', 'g'] mFit = fit.nlr_fit_metal_from_pcs(mGuess, parsedData, fitParameters) # Save the fitted tensors to files for name, metal in zip(['Tb', 'Er', 'Yb'], mFit): metal.save("tensor_{}.txt".format(name)) # Make experimental and calculated PCS lists exp = [] cal = [] for metal, data in zip(mFit, parsedData): ex = [] ca = [] for atom, exp_pcs, error in data: ex.append(exp_pcs) ca.append(metal.atom_pcs(atom)) exp.append(ex)
mStart = metal.Metal() # Set the starting position to Calcium ion heteroatom in PDB mStart.position = prot[0]['A'][('H_ CA', 77, ' ')]['CA'].position # Calculate tensor by SVD mFit, calc, qfac = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=0, points=1) mFit[0].save('calbindin_Er_HN_PCS_tensor_position_constrained.txt') # Calculate axially symmetric tensor by NRL mFitAx, calcAx, qfacAx = fit.nlr_fit_metal_from_pcs([mStart], [parsedData], params=('ax', 'b', 'g', 'x', 'y', 'z')) mFitAx[0].save('calbindin_Er_HN_PCS_tensor_axially_symmetric.txt') #### Plot the correlation #### from matplotlib import pyplot as plt fig, ax = plt.subplots(figsize=(5, 5)) # Unpack the experimental values atoms, experiment, errors = zip(*parsedData) # Plot the data ax.plot(experiment, calc[0], marker='o',
parsedData = prot.parse(rawData) # Define an initial tensor mStart = metal.Metal() # Set the starting position to an atom close to the metal mStart.position = prot[0]['A'][56]['CA'].position # Calculate an initial tensor from an SVD gridsearch mGuess, calc, qfac = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=10, points=10) # Refine the tensor using non-linear regression mFit, calc, qfac = fit.nlr_fit_metal_from_pcs(mGuess, [parsedData]) # mets, stdm = fit.pcs_fit_error_bootstrap(mFit, [parsedData], 10, 0.95) mets, stdm = fit.pcs_fit_error_monte_carlo(mFit, [parsedData], 50) # self.errorTensor.set_params(devs.items()) # def transform(vector): # x, y, z = vector # theta = np.arctan2(y, x) # phi = -np.arccos(z) + np.pi/2. # return theta, phi # spcoords = [] # for eulers in zip(stds['a'], stds['b'], stds['g']): # rotationMatrix = metal.euler_to_matrix(np.array(eulers))
# Define an initial tensor mStart = metal.Metal() # Set the starting position to an atom close to the metal mStart.position = prot[0]['A'][START_ATOM[0]][START_ATOM[1]].position # Calculate an initial tensor from an SVD gridsearch mGuess, calc, qfac = fit.svd_gridsearch_fit_metal_from_pcs( [mStart], [parsedData], radius=SVD_RADIUS, points=int(SVD_RADIUS / SVD_DENSITY)) # Refine the tensor using non-linear regression mFit, calc, qfac = fit.nlr_fit_metal_from_pcs(mGuess, [parsedData], useracs=USE_RACS, userads=USE_RADS) # Save the fitted tensor to file mFit[0].save(FITTED_TENSOR_FILE_NAME) # Save calculated PCS values back_calc = [] for atom in prot.get_atoms(): value = mFit[0].atom_pcs(atom, racs=USE_RACS, rads=USE_RADS) _, mdl, chn, (_, seq, _), (atm, _) = atom.get_full_id() back_calc.append((seq, atm, value)) with open(BACK_CALCULATED_PCS_FILE_NAME, 'w') as o: for seq, atm, value in back_calc: line = "{0:<3d} {1:3s} {2:8.3f} 0.0\n".format(seq, atm, value)
from paramagpy import protein, fit, dataparse, metal # Load data prot = protein.load_pdb('../data_files/2bcb.pdb') rawData = dataparse.read_pcs('../data_files/calbindin_Er_HN_PCS.npc') mStart = metal.Metal() mStart.position = prot[0]['A'][56]['CA'].position #### Ensemble average fitting #### parsedData = prot.parse(rawData) mGuess, _, _ = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=10, points=10) mFit, calc, qfac = fit.nlr_fit_metal_from_pcs(mGuess, [parsedData]) mFit[0].save('calbindin_Er_HN_PCS_tensor_ensemble.txt') #### Single model fitting #### # Loop over models, fit tensor and keep one with best Q-factor minQfacMod = 1E50 for model in prot: parsedDataMod = prot.parse(rawData, models=model.id) mFitMod, calcMod, qfacMod = fit.nlr_fit_metal_from_pcs( mGuess, [parsedDataMod]) if qfacMod[0] < minQfacMod: minMod = model.id minParsedDataMod = parsedDataMod minmFitMod = mFitMod mincalcMod = calcMod minQfacMod = qfacMod # #### Plot the correlation ####
# Load data prot = protein.load_pdb('../data_files/2bcb.pdb') rawData = dataparse.read_pcs('../data_files/calbindin_Er_HN_PCS.npc') parsedData = prot.parse(rawData) # Set metal starting position mStart = metal.Metal() mStart.position = prot[0]['A'][56]['CA'].position #### Averaged fit to all models #### [mGuess], [data] = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=10, points=10, ensembleAverage=False) [mFit], [data] = fit.nlr_fit_metal_from_pcs([mGuess], [parsedData], ensembleAverage=False) qfac = fit.qfactor(data, ensembleAverage=False) avg = qfac, data, mFit #### Ensembled averaged fit to all models #### [mGuess], [data] = fit.svd_gridsearch_fit_metal_from_pcs([mStart], [parsedData], radius=10, points=10, ensembleAverage=True) [mFit], [data] = fit.nlr_fit_metal_from_pcs([mGuess], [parsedData], ensembleAverage=True) qfac = fit.qfactor(data, ensembleAverage=True) e_avg = qfac, data, mFit #### Seperate fit for each model ####