def test_opt(self): print(make_title('test optimization of least-squares residual')) out = '{0:>14}: #steps={1:3d}, RMSD: {5:.2f}->{2:.2f}, ' + \ 'accuracy: {3:.3e} (rot), {4:.3e} (trans)' start = spin.random_rotation(), np.random.standard_normal(3) * 10 R, t = fit(*self.coords) rot = [] trans = [] rmsds = [] for trafo in self.trafos[1:]: trafo.matrix_vector = start f = spin.LeastSquares(*self.coords, trafo=trafo) x = trafo.dofs.copy() y = opt.fmin_bfgs(f, x, f.gradient, disp=False) rot.append(spin.distance(R, trafo.rotation)) trans.append(np.linalg.norm(t - trafo.translation.vector)) rmsds.append(np.sqrt(2 * f(y) / len(self.coords[0]))) print( out.format(trafo.rotation.name, len(f.values), rmsds[-1], rot[-1], trans[-1], np.sqrt(2 * f(x) / len(self.coords[0])))) self.assertAlmostEqual(rot[-1], 0., delta=1e-5) self.assertAlmostEqual(trans[-1], 0., delta=1e-5) self.assertAlmostEqual(np.std(rmsds), 0., delta=1e-5)
def test_analytical(self): """ Test analytical calculation of the upper bound using SVD and eigenvalue decomposition. """ tol = 1e-10 n = int(1e3) X, Y = load_coords(['1ake', '4ake']) A = np.dot(X.T, Y) R = fit(X, Y)[0] func = spin.NearestRotation(A) func2 = spin.NearestUnitQuaternion(A) self.assertTrue(spin.distance(func.optimum(), R) < tol) self.assertTrue(np.linalg.norm(func2.optimum().dofs-spin.Quaternion(R).dofs) < tol) rotations = spin.random_rotation(n) vals = np.array(list(map(func, rotations))) vals2 = np.dot(rotations.reshape(n,-1), A.flatten()) self.assertTrue(np.fabs(vals-vals2).max() < tol) self.assertTrue(np.all(vals <= func(func.optimum().matrix))) self.assertTrue(np.all(vals <= func2(func2.optimum())))
def test_lsq(self): rotation, score = self.lsq['svd'].optimum() rmsd_ = [ np.sqrt(score / len(self.coords[0])), self.lsq['svd'].rmsd(rotation.matrix), rmsd(*self.coords) ] lsq_ = [0.5 * score, self.lsq['svd'](rotation.matrix)] for name in ('euler', 'axisangle', 'expmap', 'axisangle'): dofs = self.lsq[name].trafo.from_rotation(rotation).dofs lsq_.append(self.lsq[name](dofs)) rmsd_ = np.round(rmsd_, 5) lsq_ = np.round(lsq_, 2) print(make_title('checking LSQ optimization using SVD')) print('RMSD: {0}'.format(rmsd_)) print(' LSQ: {0}'.format(lsq_)) tol = 1e-10 self.assertTrue(np.all(np.fabs(rmsd_ - rmsd_[0]) < tol)) self.assertTrue(np.all(np.fabs(lsq_ - lsq_[0]) < tol)) self.assertAlmostEqual(spin.distance( fit(*self.coords)[0], rotation.matrix), 0., delta=tol)
def testFit(self): R, t = cbu.fit(X1, X2) Y = numpy.dot(X2, R.T) + t self.assertArrayEqual(R, RZ) self.assertArrayEqual(t, [0., 0., 0.]) self.assertArrayEqual(Y, X3)
def testEnsemble(self): """ The posterior of a gaussian scale mixture with gamma prior is a Student's t distribution, with parameters alpha and beta. Give enough samples, we shoud be able to estimate these parameters """ pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb') ensemble = LegacyStructureParser(pdbfile).parse_models() X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble]) x_mu = average_structure(X) n =X.shape[1] m =X.shape[0] R = numpy.zeros((m,3,3)) t = numpy.ones((m,3)) prior = GammaPrior() mixture = ScaleMixture(scales=n, prior = prior, d=3) from csb.bio.utils import fit, wfit for i in range(m): R[i,:,:], t[i,:] = fit(x_mu, X[i]) # gibbs sampling cycle for j in range(200): # apply rotation data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) **2, -1)**0.5 for i in range(m)]).T # sample scales mixture.estimate(data) # sample rotations for i in range(m): R[i,:,:], t[i,:] = wfit(x_mu, X[i], mixture.scales) self.assertEqual(mixture.scales.shape, (211,)) R_opt = numpy.eye(3) t_opt = numpy.zeros((3,)) for k in range(m): for i in range(3): self.assertAlmostEqual(t[k,i], t_opt[i], delta=2.) for j in range(3): self.assertAlmostEqual(abs(R[k,i, j]), R_opt[i, j], delta=0.15)
def main(self): try: parser = LegacyStructureParser(self.args.pdb) models = parser.models() except IOError as e: self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) if len(models) < 2: self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) ensemble = parser.parse_models(models) X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble]) x_mu = average_structure(X) #n = X.shape[1] m = X.shape[0] R = numpy.zeros((m, 3, 3)) t = numpy.ones((m, 3)) prior = GammaPrior() mixture = ScaleMixture(scales=X.shape[1], prior=prior, d=3) for i in range(m): R[i, :, :], t[i, :] = fit(x_mu, X[i]) # gibbs sampling cycle for j in range(self.args.niter): # apply rotation data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5 for i in range(m)]).T # sample scales mixture.estimate(data) # sample rotations for i in range(m): R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales) out_ensemble = csb.bio.structure.Ensemble() for i, model in enumerate(ensemble): model.transform(R[i], t[i]) out_ensemble.models.append(model) out_ensemble.to_pdb(self.args.outfile)
def testInvGammaMAP(self): """ The posterior of a gaussian scale mixture with gamma prior is a Student's t distribution, with parameters alpha and beta. Give enough samples, we shoud be able to estimate these parameters """ pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb') ensemble = LegacyStructureParser(pdbfile).parse_models() X = numpy.array(ensemble[0].get_coordinates(['CA'], True)) Y = numpy.array(ensemble[13].get_coordinates(['CA'], True)) prior = InvGammaPrior() prior.estimator = InvGammaPosteriorMAP() mixture = ScaleMixture(scales=X.shape[0], prior=prior, d=3) from csb.bio.utils import fit, wfit R, t = fit(X, Y) #numpy.random.seed(100) # gibbs sampling cycle for i in range(200): # apply rotation data = numpy.sum((X - numpy.dot(Y, numpy.transpose(R)) - t) ** 2, axis= -1) ** (1. / 2) # sample scales mixture.estimate(data) # sample rotations R, t = wfit(X, Y, mixture.scales) self.assertEqual(mixture.scales.shape, (211,)) R_opt = numpy.eye(3) t_opt = numpy.zeros((3,)) for i in range(3): self.assertAlmostEqual(t[i], t_opt[i], delta=2.) for j in range(3): self.assertAlmostEqual(R_opt[i, j], R[i, j], delta=1e-1)
def test_rmsd(self): R, t = fit(*self.coords) out = '{0:.2f} ({1})' print(make_title('RMSD')) print(out.format(rmsd(*self.coords), 'SVD')) rmsds = [] for trafo in self.trafos[1:]: trafo.matrix_vector = R, t r = np.mean(np.sum((self.coords[0] - trafo(self.coords[1]))**2, 1))**0.5 print(out.format(r, trafo.rotation.name)) rmsds.append(r) self.assertAlmostEqual(np.std(rmsds), 0., delta=1e-10)
def test_opt(self): """ Constrained optimization to determine the best unit quaternion """ coords = load_coords(['1ake', '4ake']) A = np.dot(coords[0].T,coords[1]) R = fit(*coords)[0] func = spin.NearestUnitQuaternion(A) q_opt = func.optimum().dofs q_opt2 = spin.NearestRotation(A, spin.Quaternion()).optimum().dofs ## constrained optimization constraint = [{'type': 'eq', 'fun': lambda q : np.dot(q,q) - 1}] best = -1e308, None for n_trials in range(10): q_start = spin.Quaternion.random() result = opt.minimize(lambda q: -func(q), q_start, constraints=constraint) q_best = result['x'] * np.sign(result['x'][0]) if abs(constraint[0]['fun'](q_best)) < 1e-10 and func(q_best) > best[0]: best = func(q_best), q_best _, q_best = best print(make_title('finding nearest rotation matrix / unit quaternion')) print(np.round(q_opt, 5)) print(np.round(q_best, 5)) print(np.round(q_opt2, 5)) tol = 1e-5 self.assertTrue(np.linalg.norm(q_opt - q_best) < tol) self.assertTrue(np.linalg.norm(q_opt - q_opt2) < tol)
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) mobile_obj = get_object_name(selection, 1) n_models = cmd.count_states(mobile_obj) if int(guide): selection = '(%s) and guide' % (selection) X = asarray(get_ensemble_coords(selection)) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3,0:3] = R[0] back[0:3,3] = t[0] for i in range(n_models): m[0:3,0:3] = R[i].T m[3,0:3] = -t[i] cmd.transform_object(mobile_obj, list(m.flat), state=i+1) # fit back to first state cmd.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
def xfit(mobile, target, mobile_state=-1, target_state=-1, load_b=0, cycles=10, match='align', guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of the model in the first selection on to the model in the second selection. The weights are estimated with maximum likelihood. The result should be very similar to "theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS mobile = string: atom selection target = string: atom selection mobile_state = int: object state of mobile selection {default: current} target_state = int: object state of target selection {default: current} load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} SEE ALSO intra_xfit, align, super, fit, cealign, theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import distance_sq, wfit, fit from . import querying cycles, quiet = int(cycles), int(quiet) mobile_state, target_state = int(mobile_state), int(target_state) mobile_obj = querying.get_object_name(mobile, 1) if mobile_state < 1: mobile_state = querying.get_object_state(mobile_obj) if target_state < 1: target_state = querying.get_selection_state(target) if int(guide): mobile = '(%s) and guide' % (mobile) target = '(%s) and guide' % (target) mm = MatchMaker(mobile, target, match) Y = asarray(querying.get_coords(mm.mobile, mobile_state)) X = asarray(querying.get_coords(mm.target, target_state)) if int(seed): R, t = identity(3), zeros(3) else: R, t = fit(X, Y) if int(bfit): # adapted from csb.apps.bfit from csb.bio.utils import distance, probabilistic_fit from csb.statistics.scalemixture import ScaleMixture mixture = ScaleMixture(scales=X.shape[0], prior=_bfit_get_prior(distribution), d=3) for _ in range(cycles): data = distance(Y, dot(X - t, R)) mixture.estimate(data) R, t = probabilistic_fit(X, Y, mixture.scales) scales = mixture.scales else: for _ in range(cycles): data = distance_sq(Y, dot(X - t, R)) scales = 1.0 / data.clip(1e-3) R, t = wfit(X, Y, scales) m = identity(4) m[0:3,0:3] = R m[0:3,3] = t cmd.transform_object(mobile_obj, list(m.flat)) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' xfit: %d atoms aligned' % (len(X)))
Compare least-squares fitting with unit quaternions (Kearsley's algorithm) with fitting using singular value decomposition (Kabsch's algorithm). """ from __future__ import print_function import spin import time import numpy as np from littlehelpers import load_coords from csb.bio.utils import fit X, Y = load_coords(['1ake', '4ake']) n = 1000 X = np.repeat(X, n, axis=0) Y = np.repeat(Y, n, axis=0) t = time.clock() A = spin.qfit(X, Y) t = time.clock() - t t2 = time.clock() B = fit(X, Y) t2 = time.clock() - t2 print('dist(R_quat,R_svd) = {0:.3f}'.format(spin.distance(A[0], B[0]))) print('dist(t_quat,t_svd) = {0:.3f}'.format(np.linalg.norm(A[1] - B[1]))) print('times: {0:.3f} vs {1:.3f} (quat vs svd)'.format(t, t2))
def mcsalign(mobile, target, mobile_state=-1, target_state=-1, cycles=5, timeout=10, method='', exact=0, quiet=1, object=None, _self=cmd): ''' DESCRIPTION Align two (ligand) selections based on Maximum-Common-Substructure. Requires: (rdkit | indigo), csb ARGUMENTS mobile = str: atom selection of mobile object target = str: atom selection of target object mobile_state = int: object state of mobile selection {default: -1 = current state} target_state = int: object state of target selection {default: -1 = current state} cycles = int: number of weight-refinement iterations for weighted RMS fitting {default: 5} timeout = int: MCS search timeout {default: 10} method = indigo or rdkit {default: check availability} exact = 0/1: match elements and bond orders {default: 0} object = str: create an aligment object (requires PyMOL 2.3) EXAMPLE fetch 3zcf 4n8t, async=0 mcsalign /3zcf//A/HEC, /4n8t//A/HEM zoom /4n8t//A/HEM, animate=2, buffer=3 ''' from numpy import identity, dot, take from csb.bio.utils import distance_sq, wfit, fit # moving object m_objects = cmd.get_object_list(mobile) if len(m_objects) != 1: # If selection covers multiple objects, call "mcsalign" for every object for m_object in m_objects: mcsalign('(%s) & model %s' % (mobile, m_object), target, mobile_state, target_state, cycles, timeout, method, quiet) return # get molecules from selections m_sdf = get_molstr(mobile, mobile_state) t_sdf = get_molstr(target, target_state) # find maximum common substructure m_indices, t_indices = get_mcs_indices(method, quiet, m_sdf, t_sdf, timeout, int(exact)) if len(m_indices) < 3: raise CmdException('not enough atoms in MCS') if not int(quiet): print(' MCS-Align: found MCS with %d atoms (%s)' % (len(m_indices), m_objects[0])) # coordinates Y = take(cmd.get_coords(mobile, mobile_state), m_indices, 0) X = take(cmd.get_coords(target, target_state), t_indices, 0) # weighted RMS fitting R, t = fit(X, Y) for _ in range(int(cycles)): data = distance_sq(Y, dot(X - t, R)) scales = 1.0 / data.clip(1e-3) R, t = wfit(X, Y, scales) # superpose m = identity(4) m[0:3, 0:3] = R m[0:3, 3] = t cmd.transform_object(m_objects[0], list(m.flat), mobile_state) if object: t_idx_list = iterate_state_to_list(target_state, target, 'model, index') m_idx_list = iterate_state_to_list(mobile_state, mobile, 'model, index') raw = [[t_idx_list[i], m_idx_list[j]] for (i, j) in zip(t_indices, m_indices)] try: _self.set_raw_alignment(object, raw, guide=t_idx_list[0][0]) except AttributeError: raise CmdException( 'Creating an alignment object requires PyMOL 2.3')
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) if int(guide): selection = '(%s) and guide' % (selection) mobile_objs = _self.get_object_list(selection) n_states_objs = [] X = [] for obj in mobile_objs: X_obj = get_ensemble_coords('({}) & {}'.format(selection, obj), _self=_self) if X and len(X_obj) and len(X[0]) != len(X_obj[0]): raise CmdException('objects have different number of atoms') X.extend(X_obj) n_states_objs.append(len(X_obj)) n_models = len(X) X = asarray(X) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3,0:3] = R[0] back[0:3,3] = t[0] transformation_i = 0 for mobile_obj, n_states in zip(mobile_objs, n_states_objs): for state_i in range(n_states): m[0:3, 0:3] = R[transformation_i].T m[3, 0:3] = -t[transformation_i] _self.transform_object(mobile_obj, list(m.flat), state=state_i + 1) transformation_i += 1 # fit back to first state _self.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) _self.alter('({}) & {} & state 1'.format(selection, mobile_obj), 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
def xfit(mobile, target, mobile_state=-1, target_state=-1, load_b=0, cycles=10, match='align', guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of the model in the first selection on to the model in the second selection. The weights are estimated with maximum likelihood. The result should be very similar to "theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS mobile = string: atom selection target = string: atom selection mobile_state = int: object state of mobile selection {default: current} target_state = int: object state of target selection {default: current} load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} SEE ALSO intra_xfit, align, super, fit, cealign, theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import distance_sq, wfit, fit from . import querying cycles, quiet = int(cycles), int(quiet) mobile_state, target_state = int(mobile_state), int(target_state) mobile_obj = querying.get_object_name(mobile, 1, _self=_self) if mobile_state < 1: mobile_state = querying.get_object_state(mobile_obj, _self=_self) if target_state < 1: target_state = querying.get_selection_state(target, _self=_self) if int(guide): mobile = '(%s) and guide' % (mobile) target = '(%s) and guide' % (target) mm = MatchMaker(mobile, target, match, _self=_self) Y = asarray(_self.get_coords(mm.mobile, mobile_state)) X = asarray(_self.get_coords(mm.target, target_state)) if int(seed): R, t = identity(3), zeros(3) else: R, t = fit(X, Y) if int(bfit): # adapted from csb.apps.bfit from csb.bio.utils import distance, probabilistic_fit from csb.statistics.scalemixture import ScaleMixture mixture = ScaleMixture(scales=X.shape[0], prior=_bfit_get_prior(distribution), d=3) for _ in range(cycles): data = distance(Y, dot(X - t, R)) mixture.estimate(data) R, t = probabilistic_fit(X, Y, mixture.scales) scales = mixture.scales else: for _ in range(cycles): data = distance_sq(Y, dot(X - t, R)) scales = 1.0 / data.clip(1e-3) R, t = wfit(X, Y, scales) m = identity(4) m[0:3,0:3] = R m[0:3,3] = t _self.transform_object(mobile_obj, list(m.flat)) if int(load_b): b_iter = iter(-log(scales)) _self.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' xfit: %d atoms aligned' % (len(X)))
def mcsalign(mobile, target, mobile_state=-1, target_state=-1, cycles=5, timeout=10, method="", quiet=1): """ DESCRIPTION Align two (ligand) selections based on Maximum-Common-Substructure. Requires: (rdkit | indigo), csb ARGUMENTS mobile = str: atom selection of mobile object target = str: atom selection of target object mobile_state = int: object state of mobile selection {default: -1 = current state} target_state = int: object state of target selection {default: -1 = current state} cycles = int: number of weight-refinement iterations for weighted RMS fitting {default: 5} timeout = int: MCS search timeout {default: 10} method = indigo or rdkit {default: check availability} EXAMPLE fetch 3zcf 4n8t, async=0 mcsalign /3zcf//A/HEC, /4n8t//A/HEM zoom /4n8t//A/HEM, animate=2, buffer=3 """ from numpy import identity, dot, take from csb.bio.utils import distance_sq, wfit, fit # moving object m_objects = cmd.get_object_list(mobile) if len(m_objects) != 1: # If selection covers multiple objects, call "mcsalign" for every object for m_object in m_objects: mcsalign( "(%s) & model %s" % (mobile, m_object), target, mobile_state, target_state, cycles, timeout, method, quiet, ) return # get molecules from selections m_sdf = get_molstr(mobile, mobile_state) t_sdf = get_molstr(target, target_state) # find maximum common substructure m_indices, t_indices = get_mcs_indices(method, quiet, m_sdf, t_sdf, timeout) if len(m_indices) < 3: raise CmdException("not enough atoms in MCS") if not int(quiet): print(" MCS-Align: found MCS with %d atoms (%s)" % (len(m_indices), m_objects[0])) # coordinates Y = take(cmd.get_coords(mobile, mobile_state), m_indices, 0) X = take(cmd.get_coords(target, target_state), t_indices, 0) # weighted RMS fitting R, t = fit(X, Y) for _ in range(int(cycles)): data = distance_sq(Y, dot(X - t, R)) scales = 1.0 / data.clip(1e-3) R, t = wfit(X, Y, scales) # superpose m = identity(4) m[0:3, 0:3] = R m[0:3, 3] = t cmd.transform_object(m_objects[0], list(m.flat), mobile_state)
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, http://csb.codeplex.com ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) mobile_obj = get_object_name(selection, 1) n_models = cmd.count_states(mobile_obj) if int(guide): selection = '(%s) and guide' % (selection) X = asarray(get_ensemble_coords(selection)) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([ distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models) ]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3, 0:3] = R[0] back[0:3, 3] = t[0] for i in range(n_models): m[0:3, 0:3] = R[i].T m[3, 0:3] = -t[i] cmd.transform_object(mobile_obj, list(m.flat), state=i + 1) # fit back to first state cmd.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(selection, 'b = b_iter.next()', space=locals()) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
def estimate_T(self): from csb.bio.utils import fit for m in range(self.M): for k in range(self.K): self._R[m, k], self._t[m, k] = fit(self._X[m], self.means[k])