def setup(self, bottom, top): if len(bottom) != 2: raise Exception(" need two inputs to compute loss") x, y = T.dvectors('x', 'y') mux, muy = T.dvectors('mux', 'muy') sx, sy = T.dvectors('sx', 'sy') rho = T.dvector('rho') z_sx = T.exp(sx) z_sy = T.exp(sy) z_corr = T.tanh(rho) z = T.square((x - mux) / z_sx) + T.square( (y - muy) / z_sy) - 2.0 * z_corr * (x - mux) * (y - muy) / (z_sx * z_sy) prob = T.exp(-z / (2.0 * (1.0 - T.square(z_corr)))) / ( 2.0 * np.pi * z_sx * z_sy * T.sqrt(1.0 - T.square(z_corr))) result = T.sum(-T.log(T.maximum(prob, 1e-10))) #dmux, dmuy, ds, ds, drho = T.grad(result, [mux,muy,sx,sy,rho]) dmux = T.grad(result, mux) dmuy = T.grad(result, muy) dsx = T.grad(result, sx) dsy = T.grad(result, sy) drho = T.grad(result, rho) self.f = theano.function([mux, muy, sx, sy, rho, x, y], result) self.dfmux = theano.function([mux, muy, sx, sy, rho, x, y], dmux) self.dfmuy = theano.function([mux, muy, sx, sy, rho, x, y], dmuy) self.dfsx = theano.function([mux, muy, sx, sy, rho, x, y], dsx) self.dfsy = theano.function([mux, muy, sx, sy, rho, x, y], dsy) self.dfrho = theano.function([mux, muy, sx, sy, rho, x, y], drho)
def test_assoccomm(): x, a, b, c = tt.dvectors("xabc") test_expr = x + 1 q = var() res = run(1, q, applyo(tt.add, etuple(*test_expr.owner.inputs), test_expr)) assert q == res[0] res = run(1, q, applyo(q, etuple(*test_expr.owner.inputs), test_expr)) assert tt.add == res[0].reify() res = run(1, q, applyo(tt.add, q, test_expr)) assert mt(tuple(test_expr.owner.inputs)) == res[0] x = var() res = run(0, x, eq_comm(mt.mul(a, b), mt.mul(b, x))) assert (mt(a), ) == res res = run(0, x, eq_comm(mt.add(a, b), mt.add(b, x))) assert (mt(a), ) == res (res, ) = run(0, x, eq_assoc(mt.add(a, b, c), mt.add(a, x))) assert res == mt(b + c) (res, ) = run(0, x, eq_assoc(mt.mul(a, b, c), mt.mul(a, x))) assert res == mt(b * c)
def test_assoccomm(): from symbolic_pymc.relations import buildo x, a, b, c = tt.dvectors('xabc') test_expr = x + 1 q = var('q') res = run(1, q, buildo(tt.add, test_expr.owner.inputs, test_expr)) assert q == res[0] res = run(1, q, buildo(q, test_expr.owner.inputs, test_expr)) assert tt.add == res[0].reify() res = run(1, q, buildo(tt.add, q, test_expr)) assert mt(tuple(test_expr.owner.inputs)) == res[0] res = run(0, var('x'), eq_comm(mt.mul(a, b), mt.mul(b, var('x')))) assert (mt(a), ) == res res = run(0, var('x'), eq_comm(mt.add(a, b), mt.add(b, var('x')))) assert (mt(a), ) == res res = run(0, var('x'), (eq_assoc, mt.add(a, b, c), mt.add(a, var('x')))) # TODO: `res[0]` should return `etuple`s. Since `eq_assoc` effectively # picks apart the results of `arguments(...)`, I don't know if we can # keep the `etuple`s around. We might be able to convert the results # to `etuple`s automatically by wrapping `eq_assoc`, though. res_obj = etuple(*res[0]).eval_obj assert res_obj == mt(b + c) res = run(0, var('x'), (eq_assoc, mt.mul(a, b, c), mt.mul(a, var('x')))) res_obj = etuple(*res[0]).eval_obj assert res_obj == mt(b * c)
def setUp(self): name = self._testMethodName.split("_", 1)[1] X_var = T.dvectors("X") function_var = getattr(vs.separators.neural.network.activations, name)(X_var) self.function = theano.function([X_var], function_var)
def test_terms(): x, a, b = tt.dvectors('xab') test_expr = x + a * b assert mt(test_expr.owner.op) == operator(test_expr) assert mt(tuple(test_expr.owner.inputs)) == arguments(test_expr) assert graph_equal(test_expr, term(operator(test_expr), arguments(test_expr)))
def sample_default_value(): print "デフォルト値" x, y = T.dvectors("x", "y") z = x + y f = theano.function([x, Param(y, default=[1,1])], z) print f([0.0, 1.0]) print f([0.0, 1.0], [2.0, 3.0]) print
def __init__(self, Q, D, layers, order, D_cum_sum, N, M, non_rec): try: print('Trying to load model...') with open('model_SV1.save', 'rb') as file_handle: self.f, self.g = pickle.load(file_handle) print('Loaded!') return except: print('Failed. Creating a new model...') print('Setting up variables...') hyp, SIGMA_S, U, b, MU_S = T.dmatrices('hyp', 'SIGMA_S', 'U', 'b','MU_S') y, MEAN_MAP, sn, sf = T.dvectors('y','MEAN_MAP','sn','sf') w = T.dscalars('w') if Q > 1: X = T.dmatrix('X') else: X = T.dvector('X') if layers > 1: MU, SIGMA = T.dmatrices('MU', 'SIGMA') else: MU, SIGMA = T.dvectors('MU', 'SIGMA') SIGMA_trf, SIGMA_S_trf = T.log(1+T.exp(SIGMA))**2, T.log(1+T.exp(SIGMA_S))**2 sf_trf, sn_trf, lengthscale_trf, lengthscale_p_trf = T.log(1 + T.exp(sf))**2, T.log(1 + T.exp(sn))**2, T.log(1 + T.exp(hyp[:,0])), T.log(1 + T.exp(hyp[:,1])) print('Setting up model...') LL, KL = self.get_model(w, lengthscale_trf, lengthscale_p_trf, sn_trf, sf_trf, MU_S, SIGMA_S_trf, MU, SIGMA_trf, U, b, X, y, MEAN_MAP, Q, D, D_cum_sum, layers, order, non_rec, N, M) print('Compiling model...') inputs = {'X': X, 'MU': MU, 'SIGMA': SIGMA, 'MU_S': MU_S, 'SIGMA_S': SIGMA_S, 'U': U, 'b': b, 'hyp': hyp, 'y': y, 'MEAN_MAP': MEAN_MAP, 'sn': sn, 'sf': sf, 'w': w} z = 0.0 * sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph f = {'LL': LL, 'KL': KL} self.f = {fn: theano.function(list(inputs.values()), fv+z, name=fn, on_unused_input='ignore') for fn,fv in f.items()} g = {'LL': LL, 'KL': KL} wrt = {'MU': MU, 'SIGMA': SIGMA, 'MU_S': MU_S, 'SIGMA_S': SIGMA_S, 'U': U, 'b': b, 'hyp': hyp, 'MEAN_MAP': MEAN_MAP, 'sn': sn, 'sf': sf, 'w': w} self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in g.items()} for vn, vv in wrt.items()} with open('model_SV1.save', 'wb') as file_handle: print('Saving model...') sys.setrecursionlimit(100000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
def softmax_cost_func(): U = T.dmatrix('U') v, tv = T.dvectors('v', 'tv') yp = T.nnet.softmax(T.dot(U, v)) J = T.sum(T.log(yp) * tv) dJ = T.grad(J, [U, v]) fJ = theano.function([U, v, tv], J) dfJ = theano.function([U, v, tv], dJ) return fJ, dfJ
def test_vectors(self): try: import theano.tensor as T from theano import function except: return for MT in [False, True]: # Set up variables and function vals = [np.random.randn(20) for i in range(5)] f = lambda a, b, c, d, e: a + (b * c) - d**e # Set up our objects Cs = [ch.Ch(v) for v in vals] C_result = f(*Cs) C_result.MT = MT # Set up Theano equivalents Ts = T.dvectors('T1', 'T2', 'T3', 'T4', 'T5') TF = f(*Ts) T_result = function(Ts, TF) if False: import theano.gradient which = 1 theano_sse = (TF**2.).sum() theano_grad = theano.gradient.grad(theano_sse, Ts[which]) theano_fn = function(Ts, theano_grad) print(theano_fn(*vals)) C_result_grad = ch.SumOfSquares(C_result).dr_wrt(Cs[which]) print(C_result_grad) # if True: # aaa = np.linalg.solve(C_result_grad.T.dot(C_result_grad), C_result_grad.dot(np.zeros(C_result_grad.shape[1]))) # theano_hes = theano.R_obbb = theano.R_op() import pdb pdb.set_trace() # Make sure values and derivatives are equal np.testing.assert_array_equal(C_result.r, T_result(*vals)) for k in range(len(vals)): theano_derivative = function(Ts, T.jacobian(TF, Ts[k]))(*vals) our_derivative = np.array(C_result.dr_wrt(Cs[k]).todense()) #print(theano_derivative, our_derivative) # Theano produces has more nans than we do during exponentiation. # So we test only on entries where Theano is without NaN's without_nans = np.nonzero( np.logical_not(np.isnan(theano_derivative.flatten())))[0] np.testing.assert_array_equal( theano_derivative.flatten()[without_nans], our_derivative.flatten()[without_nans])
def get_variational_scores(result, config, model, inference, true_pop_size): approx_params = list(inference.approx.shared_params.values()) distance = abs(model.pop_size - true_pop_size)/true_pop_size input_vars = tt.dvectors(len(approx_params)) distance_sample = inference.approx.sample_node(distance, size=config['n_eval_samples'], more_replacements={ shared: var for shared, var in zip(approx_params, input_vars) }) distance_mean = tt.mean(distance_sample) distance_function = theano.function(input_vars, distance_mean) distances = [distance_function(*[result[var][i] for var in inference.approx.shared_params.keys()]) for i in range(len(result['i']))] return pd.DataFrame({ 'date_time': result['date_time'], 'error': np.stack(distances) })
def test_vectors(self): try: import theano.tensor as T from theano import function except: return for MT in [False, True]: # Set up variables and function vals = [np.random.randn(20) for i in range(5)] f = lambda a, b, c, d, e : a + (b * c) - d ** e # Set up our objects Cs = [ch.Ch(v) for v in vals] C_result = f(*Cs) C_result.MT = MT # Set up Theano equivalents Ts = T.dvectors('T1', 'T2', 'T3', 'T4', 'T5') TF = f(*Ts) T_result = function(Ts, TF) if False: import theano.gradient which = 1 theano_sse = (TF**2.).sum() theano_grad = theano.gradient.grad(theano_sse, Ts[which]) theano_fn = function(Ts, theano_grad) print theano_fn(*vals) C_result_grad = ch.SumOfSquares(C_result).dr_wrt(Cs[which]) print C_result_grad # if True: # aaa = np.linalg.solve(C_result_grad.T.dot(C_result_grad), C_result_grad.dot(np.zeros(C_result_grad.shape[1]))) # theano_hes = theano.R_obbb = theano.R_op() import pdb; pdb.set_trace() # Make sure values and derivatives are equal np.testing.assert_array_equal(C_result.r, T_result(*vals)) for k in range(len(vals)): theano_derivative = function(Ts, T.jacobian(TF, Ts[k]))(*vals) our_derivative = np.array(C_result.dr_wrt(Cs[k]).todense()) #print theano_derivative, our_derivative # Theano produces has more nans than we do during exponentiation. # So we test only on entries where Theano is without NaN's without_nans = np.nonzero(np.logical_not(np.isnan(theano_derivative.flatten())))[0] np.testing.assert_array_equal(theano_derivative.flatten()[without_nans], our_derivative.flatten()[without_nans])
def createObjectiveFunction(self): ''' @escription: initialize objective function and minimization function @X,y data matrix/vector @u random noise for simulator @v standard normal for reparametrization trick ''' y = T.dvector("y") W, U = T.dvectors("W", "U") V = T.dscalar("V") mu = self.params[0] #logSigma = self.params[1] logSigma = sharedX(0.6) logLambda = sharedX(0) #self.params[2] negKL = 0.5 * self.dimTheta + 0.5 * T.sum(2 * logSigma - mu**2 - T.exp(logSigma)**2) results, updates = th.map(fn=self.alpha_stable, sequences=[W, U], non_sequences=[V]) f = results results2, updates2 = th.map(fn=self.alpha_perfect, sequences=[W, U]) f2 = results2 #SSE = T.sum((y-f)**2) logLike = -self.m * ( 0.5 * np.log(2 * np.pi) + logLambda) - 0.5 * T.sum( (T.flatten(y) - T.flatten(f))**2) / (T.exp(logLambda)**2) #logLike2 = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f2)**2)/(T.exp(logLambda)**2) elbo = (negKL + logLike) #elbo2 = (negKL + logLike2) obj = -elbo #obj = SSE self.f = th.function([y, W, U, V], f, updates=updates, on_unused_input='ignore') self.lowerboundfunction = th.function([y, W, U, V], obj, updates=updates, on_unused_input='ignore') derivatives = T.grad(obj, self.params) self.gradientfunction = th.function([y, W, U, V], derivatives, updates=updates, on_unused_input='ignore')
def _get_theano_interval_result(self, lower, upper, function, *args, **kwargs): dim = len(lower.shape) if dim == 1: t_lower, t_upper = T.dvectors('inpl', 'inpu') elif dim == 3: t_lower, t_upper = T.tensor3s('inpl', 'inpu') else: raise NotImplementedError iinp = TheanoInterval(t_lower, t_upper) res = function(iinp, *args, **kwargs) d = {t_lower: lower, t_upper: upper} return res.eval(d)
def test_terms(): x, a, b = tt.dvectors("xab") test_expr = x + a * b assert mt(test_expr.owner.op) == operator(test_expr) assert mt(tuple(test_expr.owner.inputs)) == tuple(arguments(test_expr)) assert tuple(arguments(test_expr)) == mt(tuple(test_expr.owner.inputs)) # Implicit `etuple` conversion should retain the original object # (within the implicitly introduced meta object, of course). assert test_expr == arguments(test_expr)._parent._eval_obj.obj assert graph_equal(test_expr, term(operator(test_expr), arguments(test_expr))) assert mt(test_expr) == term(operator(test_expr), arguments(test_expr)) # Same here: should retain the original object. assert test_expr == term(operator(test_expr), arguments(test_expr)).reify()
def test_unification(): x, y, a, b = tt.dvectors("xyab") x_s = tt.scalar("x_s") y_s = tt.scalar("y_s") c_tt = tt.constant(1, "c") d_tt = tt.constant(2, "d") x_l = var("x_l") y_l = var("y_l") assert a == reify(x_l, {x_l: a}).reify() test_expr = mt.add(1, mt.mul(2, x_l)) test_reify_res = reify(test_expr, {x_l: a}) assert graph_equal(test_reify_res.reify(), 1 + 2 * a) z = tt.add(b, a) assert {x_l: z} == unify(x_l, z) assert b == unify(mt.add(x_l, a), mt.add(b, a))[x_l].reify() res = unify(mt.inv(mt.add(x_l, a)), mt.inv(mt.add(b, y_l))) assert res[x_l].reify() == b assert res[y_l].reify() == a mt_expr_add = mt.add(x_l, y_l) # The parameters are vectors tt_expr_add_1 = tt.add(x, y) assert graph_equal( tt_expr_add_1, reify(mt_expr_add, unify(mt_expr_add, tt_expr_add_1)).reify()) # The parameters are scalars tt_expr_add_2 = tt.add(x_s, y_s) assert graph_equal( tt_expr_add_2, reify(mt_expr_add, unify(mt_expr_add, tt_expr_add_2)).reify()) # The parameters are constants tt_expr_add_3 = tt.add(c_tt, d_tt) assert graph_equal( tt_expr_add_3, reify(mt_expr_add, unify(mt_expr_add, tt_expr_add_3)).reify())
def _getModel(): s1, s2 = T.dvectors('s1', 's2') t1, t2 = T.dmatrices('t1', 't2') gw = T.dvector('gw') prank = T.dvector('prank') r1 = T.dot(t1, prank) r2 = T.dot(t2, prank) erd = T.exp(r2 - r1) p = erd / (erd + 1) loglterms = gw * ((s1 * T.log(1 - p)) + (s2 * T.log(p))) logl = -T.sum(loglterms) gradf = T.grad(logl, prank) hessf = theano.gradient.hessian(logl, prank) return s1, s2, t1, t2, gw, prank, loglterms, logl, gradf, hessf
def test_assoccomm(): from kanren.assoccomm import buildo x, a, b, c = tt.dvectors('xabc') test_expr = x + 1 q = var('q') assert q == run(1, q, buildo(tt.add, test_expr.owner.inputs, test_expr))[0] assert tt.add == run(1, q, buildo(q, test_expr.owner.inputs, test_expr))[0].reify() assert graph_equal(tuple(test_expr.owner.inputs), run(1, q, buildo(tt.add, q, test_expr))[0]) assert (mt(a), ) == run(0, var('x'), (eq_comm, mt.mul(a, b), mt.mul(b, var('x')))) assert (mt(a), ) == run(0, var('x'), (eq_comm, mt.add(a, b), mt.add(b, var('x')))) res = run(0, var('x'), (eq_assoc, mt.add(a, b, c), mt.add(a, var('x')))) assert graph_equal(res[0], b + c) res = run(0, var('x'), (eq_assoc, mt.mul(a, b, c), mt.mul(a, var('x')))) assert graph_equal(res[0], b * c)
def createObjectiveFunction(self): ''' @escription: initialize objective function and minimization function @X,y data matrix/vector @u random noise for simulator @v standard normal for reparametrization trick ''' y = T.dvector("y") W, U = T.dvectors("W","U") V = T.dscalar("V") mu = self.params[0] #logSigma = self.params[1] logSigma = sharedX(0.6) logLambda = sharedX(0) #self.params[2] negKL = 0.5*self.dimTheta+0.5*T.sum(2*logSigma - mu ** 2 - T.exp(logSigma) ** 2) results,updates = th.map(fn=self.alpha_stable,sequences=[W,U],non_sequences=[V]) f = results results2,updates2 = th.map(fn=self.alpha_perfect,sequences=[W,U]) f2 = results2 #SSE = T.sum((y-f)**2) logLike = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((T.flatten(y)-T.flatten(f))**2)/(T.exp(logLambda)**2) #logLike2 = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f2)**2)/(T.exp(logLambda)**2) elbo = (negKL + logLike) #elbo2 = (negKL + logLike2) obj = -elbo #obj = SSE self.f = th.function([y,W,U,V],f,updates=updates,on_unused_input='ignore') self.lowerboundfunction = th.function([y,W,U,V], obj, updates=updates,on_unused_input='ignore') derivatives = T.grad(obj,self.params) self.gradientfunction = th.function([y,W,U,V], derivatives, updates=updates,on_unused_input='ignore')
def compile_theano(): """ This function generates theano compiled kernels for energy and force learning ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn The position of the atoms relative to the centrla one, and their chemical species are defined by a matrix of dimension Mx5 Returns: km_ee (func): energy-energy kernel km_ef (func): energy-force kernel km_ff (func): force-force kernel """ if not (os.path.exists(Mffpath / 'k3_ee_m.pickle') and os.path.exists(Mffpath / 'k3_ef_m.pickle') and os.path.exists(Mffpath / 'k3_ff_m.pickle')): print("Building Kernels") import theano.tensor as T from theano import function, scan logger.info("Started compilation of theano three body kernels") # -------------------------------------------------- # INITIAL DEFINITIONS # -------------------------------------------------- # positions of central atoms r1, r2 = T.dvectors('r1d', 'r2d') # positions of neighbours rho1, rho2 = T.dmatrices('rho1', 'rho2') # hyperparameter sig = T.dscalar('sig') # cutoff hyperparameters theta = T.dscalar('theta') rc = T.dscalar('rc') # positions of neighbours without chemical species rho1s = rho1[:, 0:3] rho2s = rho2[:, 0:3] alpha_1 = rho1[:, 3].flatten() alpha_2 = rho2[:, 3].flatten() alpha_j = rho1[:, 4].flatten() alpha_m = rho2[:, 4].flatten() alpha_k = rho1[:, 4].flatten() alpha_n = rho2[:, 4].flatten() # -------------------------------------------------- # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS # -------------------------------------------------- # first and second configuration r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1)) r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1)) rjk = T.sqrt( T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2)) rmn = T.sqrt( T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2)) # -------------------------------------------------- # CHEMICAL SPECIES MASK # -------------------------------------------------- # numerical kronecker def delta_alpha2(a1j, a2m): d = np.exp(-(a1j - a2m)**2 / (2 * 0.00001**2)) return d # permutation 1 delta_alphas12 = delta_alpha2(alpha_1[0], alpha_2[0]) delta_alphasjm = delta_alpha2(alpha_j[:, None], alpha_m[None, :]) delta_alphas_jmkn = delta_alphasjm[:, None, :, None] * delta_alphasjm[None, :, None, :] delta_perm1 = delta_alphas12 * delta_alphas_jmkn # permutation 3 delta_alphas1m = delta_alpha2(alpha_1[0, None], alpha_m[None, :]).flatten() delta_alphasjn = delta_alpha2(alpha_j[:, None], alpha_n[None, :]) delta_alphask2 = delta_alpha2(alpha_k[:, None], alpha_2[None, 0]).flatten() delta_perm3 = delta_alphas1m[None, None, :, None] * delta_alphasjn[:, None, None, :] * \ delta_alphask2[None, :, None, None] # permutation 5 delta_alphas1n = delta_alpha2(alpha_1[0, None], alpha_n[None, :]).flatten() delta_alphasj2 = delta_alpha2(alpha_j[:, None], alpha_2[None, 0]).flatten() delta_alphaskm = delta_alpha2(alpha_k[:, None], alpha_m[None, :]) delta_perm5 = delta_alphas1n[None, None, None, :] * delta_alphaskm[None, :, :, None] * \ delta_alphasj2[:, None, None, None] # -------------------------------------------------- # BUILD THE KERNEL # -------------------------------------------------- # Squared exp of differences se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2)) se_jkmn = T.exp( -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 / (2 * sig**2)) se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 / (2 * sig**2)) se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 / (2 * sig**2)) # Kernel not summed (cyclic permutations) k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] * se_jkmn) k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] * se_1j2m[None, :, :, None]) k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] * se_1jmn[None, :, :, :]) # final shape is M1 M1 M2 M2 ker_loc = k1n * delta_perm1 + k2n * delta_perm3 + k3n * delta_perm5 # Faster version of cutoff (less calculations) cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc)) cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc)) cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * ( 1 + T.cos(np.pi * rjk / rc)) cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * ( 1 + T.cos(np.pi * rmn / rc)) # -------------------------------------------------- # REMOVE DIAGONAL ELEMENTS # -------------------------------------------------- # remove diagonal elements AND lower triangular ones from first configuration mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk) # remove diagonal elements from second configuration mask_mn = T.ones_like(rmn) - T.identity_like(rmn) # Combine masks mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :] # Apply mask and then apply cutoff functions ker_loc = ker_loc * mask_jkmn ker_loc = T.sum(ker_loc * cut_jk[:, :, None, None] * cut_mn[None, None, :, :]) ker_loc = T.exp(ker_loc / 20) # -------------------------------------------------- # FINAL FUNCTIONS # -------------------------------------------------- # energy energy kernel k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc], ker_loc, on_unused_input='ignore') # energy force kernel k_ef_cut = T.grad(ker_loc, r2) k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ef_cut, on_unused_input='ignore') # force force kernel k_ff_cut = T.grad(ker_loc, r1) k_ff_cut_der, updates = scan( lambda j, k_ff_cut, r2: T.grad(k_ff_cut[j], r2), sequences=T.arange(k_ff_cut.shape[0]), non_sequences=[k_ff_cut, r2]) k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ff_cut_der, on_unused_input='ignore') # Save the function that we want to use for multiprocessing # This is necessary because theano is a crybaby and does not want to access the # Automaticallly stored compiled object from different processes with open(Mffpath / 'k3_ee_m.pickle', 'wb') as f: pickle.dump(k_ee_fun, f) with open(Mffpath / 'k3_ef_m.pickle', 'wb') as f: pickle.dump(k_ef_fun, f) with open(Mffpath / 'k3_ff_m.pickle', 'wb') as f: pickle.dump(k_ff_fun, f) else: print("Loading Kernels") with open(Mffpath / "k3_ee_m.pickle", 'rb') as f: k_ee_fun = pickle.load(f) with open(Mffpath / "k3_ef_m.pickle", 'rb') as f: k_ef_fun = pickle.load(f) with open(Mffpath / "k3_ff_m.pickle", 'rb') as f: k_ff_fun = pickle.load(f) # WRAPPERS (we don't want to plug the position of the central element every time) def km_ee(conf1, conf2, sig, theta, rc): """ Many body kernel for energy-energy correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (float): scalar valued energy-energy many-body kernel """ return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def km_ef(conf1, conf2, sig, theta, rc): """ Many body kernel for energy-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (array): 3x1 energy-force many-body kernel """ return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def km_ff(conf1, conf2, sig, theta, rc): """ Many body kernel for force-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (matrix): 3x3 force-force many-body kernel """ return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) logger.info("Ended compilation of theano many body kernels") return km_ee, km_ef, km_ff
cxb = thcross(r3 - r2, r2 - r1) cxb /= cxb.norm(2) angle = th.arccos(th.dot(bxa, cxb)) return angle + shift def th_dihedral_angle_rec(r0,r1,r2,r3, shift=0.): # reciprocal of the angle bxa = thcross(r2 - r1, r1 - r0) bxa /= bxa.norm(2) cxb = thcross(r3 - r2, r2 - r1) cxb /= cxb.norm(2) angle = 2*sp.pi - th.arccos(th.dot(bxa, cxb)) return angle + shift thr0,thr1, thr2, thr3 = th.dvectors('thr0', 'thr1', 'thr2', 'thr3') angle = th_dihedral_angle(thr0,thr1, thr2, thr3) angle_rec = th_dihedral_angle_rec(thr0,thr1, thr2, thr3) grad_angle = th.grad(angle, [thr0, thr1, thr2, thr3]) grad_angle_rec = th.grad(angle_rec, [thr0,thr1, thr2, thr3]) # fun = theano.function([thr0, thr1, thr2, thr3], angle) d_angle = theano.function([thr0, thr1, thr2, thr3], grad_angle, allow_input_downcast=True) d_angle_rec = theano.function([thr0, thr1, thr2, thr3], grad_angle_rec, allow_input_downcast=True) angle_s = th_dihedral_angle(thr0,thr1, thr2, thr3, shift=PSI_SHIFT) angle_rec_s = th_dihedral_angle_rec(thr0,thr1, thr2, thr3, shift=PSI_SHIFT) grad_angle_s = th.grad(angle_s, [thr0, thr1, thr2, thr3]) grad_angle_rec_s = th.grad(angle_rec_s, [thr0,thr1, thr2, thr3])
return -10.0 class Pxf(tt.Op): __props__ = () itypes = [tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar] otypes = [tt.dscalar] def perform(self, node, inputs, outputs): bs, c, g1, kxmax, p50, T, I, D, s = inputs px = pxf(bs, c, g1, kxmax, p50, T, I, D, s) outputs[0][0] = np.array(px) ''' Simulation ''' # Soil moisture simulation Estt, Rstt = tt.dvectors('Estt', 'Rstt') sstt = tt.dscalar('sstt') smd, updates = theano.scan(fn = lambda E, R, s : tt.minimum(s - E + R, 1), sequences = [Estt, Rstt], outputs_info = [sstt]) sf = theano.function(inputs=[Estt, Rstt, sstt], outputs = smd, updates = updates) ss = sf(vnod * 0.01, Rfod / 1000 / n / 3 * intercept, 0.8) # Sap flow Tvntt, Ivntt, Dvntt, svntt = tt.dvectors('Tvntt', 'Ivntt', 'Dvntt', 'svntt') def step(T, I, D, s, alpha, bs, c, g1, kxmax, p50, Z): ps = pe * s ** (-beta) # Soil water potential px = Pxf()(bs, c, g1, kxmax, p50, T, I, D, s) # Xylem water potential slope = 16 + tt.exp(p50) * 1092 # Slope - xylem vulnerability
def compile_theano(): """ This function generates theano compiled kernels for energy and force learning ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn The position of the atoms relative to the centrla one, and their chemical species are defined by a matrix of dimension Mx5 Returns: k3_ee (func): energy-energy kernel k3_ef (func): energy-force kernel k3_ff (func): force-force kernel """ if not (os.path.exists(Mffpath / 'k3_ee_s.pickle') and os.path.exists(Mffpath / 'k3_ef_s.pickle') and os.path.exists(Mffpath / 'k3_ff_s.pickle')): print("Building Kernels") import theano.tensor as T from theano import function, scan logger.info("Started compilation of theano three body kernels") # -------------------------------------------------- # INITIAL DEFINITIONS # -------------------------------------------------- # positions of central atoms r1, r2 = T.dvectors('r1d', 'r2d') # positions of neighbours rho1, rho2 = T.dmatrices('rho1', 'rho2') # hyperparameter sig = T.dscalar('sig') # cutoff hyperparameters theta = T.dscalar('theta') rc = T.dscalar('rc') # positions of neighbours without chemical species rho1s = rho1[:, 0:3] rho2s = rho2[:, 0:3] # -------------------------------------------------- # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS # -------------------------------------------------- # first and second configuration r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1)) r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1)) rjk = T.sqrt( T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2)) rmn = T.sqrt( T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2)) # -------------------------------------------------- # BUILD THE KERNEL # -------------------------------------------------- # Squared exp of differences se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2)) se_jkmn = T.exp( -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 / (2 * sig**2)) se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 / (2 * sig**2)) se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 / (2 * sig**2)) # Kernel not summed (cyclic permutations) k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] * se_jkmn) k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] * se_1j2m[None, :, :, None]) k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] * se_1jmn[None, :, :, :]) # final shape is M1 M1 M2 M2 ker = k1n + k2n + k3n cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc)) * ( (T.sgn(rc - r1j) + 1) / 2) cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc)) * ( (T.sgn(rc - r2m) + 1) / 2) cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * ( 1 + T.cos(np.pi * rjk / rc)) * ((T.sgn(rc - rjk) + 1) / 2) cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * ( 1 + T.cos(np.pi * rmn / rc)) * ((T.sgn(rc - rmn) + 1) / 2) # -------------------------------------------------- # REMOVE DIAGONAL ELEMENTS AND ADD CUTOFF # -------------------------------------------------- # remove diagonal elements AND lower triangular ones from first configuration mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk) # remove diagonal elements from second configuration mask_mn = T.ones_like(rmn) - T.identity_like(rmn) # Combine masks mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :] # Apply mask and then apply cutoff functions ker = ker * mask_jkmn ker = T.sum(ker * cut_jk[:, :, None, None] * cut_mn[None, None, :, :]) # -------------------------------------------------- # FINAL FUNCTIONS # -------------------------------------------------- # global energy energy kernel k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc], ker, on_unused_input='ignore') # global energy force kernel k_ef = T.grad(ker, r2) k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ef, on_unused_input='ignore') # local force force kernel k_ff = T.grad(ker, r1) k_ff_der, updates = scan(lambda j, k_ff, r2: T.grad(k_ff[j], r2), sequences=T.arange(k_ff.shape[0]), non_sequences=[k_ff, r2]) k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ff_der, on_unused_input='ignore') # Save the function that we want to use for multiprocessing # This is necessary because theano is a crybaby and does not want to access the # Automaticallly stored compiled object from different processes with open(Mffpath / 'k3_ee_s.pickle', 'wb') as f: pickle.dump(k_ee_fun, f) with open(Mffpath / 'k3_ef_s.pickle', 'wb') as f: pickle.dump(k_ef_fun, f) with open(Mffpath / 'k3_ff_s.pickle', 'wb') as f: pickle.dump(k_ff_fun, f) else: print("Loading Kernels") with open(Mffpath / "k3_ee_s.pickle", 'rb') as f: k_ee_fun = pickle.load(f) with open(Mffpath / "k3_ef_s.pickle", 'rb') as f: k_ef_fun = pickle.load(f) with open(Mffpath / "k3_ff_s.pickle", 'rb') as f: k_ff_fun = pickle.load(f) # WRAPPERS (we don't want to plug the position of the central element every time) def k3_ee(conf1, conf2, sig, theta, rc): """ Three body kernel for global energy-energy correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (float): scalar valued energy-energy 3-body kernel """ return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def k3_ef(conf1, conf2, sig, theta, rc): """ Three body kernel for global energy-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (array): 3x1 energy-force 3-body kernel """ return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def k3_ff(conf1, conf2, sig, theta, rc): """ Three body kernel for local force-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (matrix): 3x3 force-force 3-body kernel """ return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) logger.info("Ended compilation of theano three body kernels") return k3_ee, k3_ef, k3_ff
def __init__(self, params,correct, samples = 500,batch_size=None): ker = kernel() self.samples = samples self.params = params self.batch_size=batch_size #データの保存ファイル model_file_name = 'model2' + '.save' #もしこれまでに作ったのがあるならロードする try: print ('Trying to load model...') with open(model_file_name, 'rb') as file_handle: obj = pickle.load(file_handle) self.f, self.g= obj print ('Loaded!') return except: print ('Failed. Creating a new model...') X,Y,X_test,mu,Sigma_b,Z,eps_NQ,eps_M =\ T.dmatrices('X','Y','X_test','mu','Sigma_b','Z','eps_NQ','eps_M') Wx, Ws, Wu=\ T.dmatrices('Wx', 'Ws', 'Wu') bx, bs, bu=\ T.dvectors('bx', 'bs', 'bu') gamma_x,beta_x,gamma_u,beta_u,gamma_s,beta_s=\ T.dvectors("gamma_x","beta_x","gamma_u","beta_u","gamma_s","beta_s") lhyp = T.dvector('lhyp') ls=T.dvector('ls') (M, D), N, Q = Z.shape, X.shape[0], X.shape[1] #変数の正の値への制約条件 beta = T.exp(ls[0]) #beta=T.exp(lhyp[0]) sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q]) #Sigma=T.exp(self.Sigma_b) #xについてはルートを取らなくても対角行列なので問題なし #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b)))) #スケール変換 mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma #隠れ層の生成 out1=self.neural_net_predict(Wx,bx,gamma_x,beta_x,X) m=self.neural_net_predict(Wu,bu,gamma_u,beta_u,out1) S=self.neural_net_predict(Ws,bs,gamma_s,beta_s,out1) #outputs1 = T.dot(X,Wx) + bx #m = T.dot(out1,Wu) + bu #S=T.dot(out1,Ws) + bs S=T.exp(S) S=T.sqrt(S) Xtilda = m+S*eps_NQ U = mu_scaled+Sigma_scaled.dot(eps_M) print ('Setting up cache...') Kmm = ker.RBF(sf2, l, Z) KmmInv = sT.matrix_inverse(Kmm) #KmmDet=theano.sandbox.linalg.det(Kmm) #KmmInv_cache = sT.matrix_inverse(Kmm) #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm') #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache') #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている #逆行列の微分関数を作っている #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'), # 'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')} print ('Modeling...') Kmn = ker.RBF(sf2,l,Z,Xtilda) Knn = ker.RBF(sf2,l,Xtilda,Xtilda) Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn)) Kinterval=T.dot(KmmInv,Kmn) mean_U=T.dot(Kinterval.T,U) Covariance = beta LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct KL_X = -self.KLD_X(m,S)*correct KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv) print ('Compiling model ...') inputs = {'X': X, 'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ,\ "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\ "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s} z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\ for n,f in zip(['Xtilda','U', 'LL', 'KL_U', 'KL_X'], [Xtilda,U, LL, KL_U, KL_X])} wrt = {'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\ "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s} self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()} with open(model_file_name, 'wb') as file_handle: print ('Saving model...') sys.setrecursionlimit(2000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
def __theano_build__(self): E, V, U, W, b, c, ML = self.E, self.V, self.U, self.W, self.b, self.c, self.ML batch_size = self.batch_size # mx = T.imatrix('mx') # my = T.imatrix('my') start = T.iscalar('start') batch_len = T.iscalar('batch_len') # x = T.ivector('x') # y = T.ivector('y') bx = T.ivectors(batch_size) by = T.ivectors(batch_size) for i in np.arange(batch_size): bx[i] = T.cast(self.gx[start+i*batch_len:start+(i+1)*batch_len], dtype='int32') by[i] = T.cast(self.gy[start+i*batch_len:start+(i+1)*batch_len], dtype='int32') prediction = T.ivectors(batch_size) bce = T.dvectors(batch_size) bout = T.dvectors(batch_size) def forward_prop_step(x_t, s_t1_prev, s_t2_prev): # This is how we calculated the hidden state in a simple RNN. No longer! # s_t = T.tanh(U[:,x_t] + W.dot(s_t1_prev)) # print "are we here?" # Word embedding layer # print type(x_t) x_e = E[:,x_t] # print "are we here?" # weight for MLE weight = ML[:,x_t] # GRU Layer 1 z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0]) r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1]) c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2]) s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev # GRU Layer 2 z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3]) r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4]) c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5]) s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev # Final output calculation # Theano's softmax returns a matrix with one row, we only need the row o_t = T.nnet.softmax(V.dot(s_t2) + c + weight)[0] return [o_t, s_t1, s_t2] for bs in np.arange(batch_size): # o will be the output vector for each word in vocabulary [bout[bs], s, s2], updates = theano.scan( forward_prop_step, sequences=bx[bs], truncate_gradient=self.bptt_truncate, outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim))]) #index prediction prediction[bs] = T.argmax(bout[bs], axis=1) bce[bs] = T.sum(T.nnet.categorical_crossentropy(bout[bs], by[bs])) cost = T.mean(bce) + 0.01*(T.sum(E**2) + T.sum(V**2) + T.sum(U**2) + T.sum(W**2) + T.sum(b**2) + T.sum(c**2)) # Gradients dE = T.grad(cost, E) dU = T.grad(cost, U) dW = T.grad(cost, W) db = T.grad(cost, b) dV = T.grad(cost, V) dc = T.grad(cost, c) # for minibatch, it goes like this: # loop through all samples in batch and get sample derivative # accumulative all sample derivative to get batch derivative # update all parameters using batch derivative # Assign functions self.predict_prob = theano.function([start,batch_len], bout) self.predict_class = theano.function([start,batch_len], prediction) self.optimization_error = theano.function([start,batch_len],cost) self.cross_entropy_loss = theano.function([start,batch_len], T.mean(bce)) self.bptt = theano.function([start,batch_len], [dE, dU, dW, db, dV, dc]) # SGD parameters learning_rate = T.scalar('learning_rate') decay = T.scalar('decay') # rmsprop cache updates mE = decay * self.mE + (1 - decay) * dE ** 2 mU = decay * self.mU + (1 - decay) * dU ** 2 mW = decay * self.mW + (1 - decay) * dW ** 2 mV = decay * self.mV + (1 - decay) * dV ** 2 mb = decay * self.mb + (1 - decay) * db ** 2 mc = decay * self.mc + (1 - decay) * dc ** 2 #rmsprop self.batch_step = theano.function( [start,batch_len,learning_rate, theano.In(decay, value=0.9)], [], updates=[(E, E - learning_rate * dE / T.sqrt(mE + 1e-6)), (U, U - learning_rate * dU / T.sqrt(mU + 1e-6)), (W, W - learning_rate * dW / T.sqrt(mW + 1e-6)), (V, V - learning_rate * dV / T.sqrt(mV + 1e-6)), (b, b - learning_rate * db / T.sqrt(mb + 1e-6)), (c, c - learning_rate * dc / T.sqrt(mc + 1e-6)), (self.mE, mE), (self.mU, mU), (self.mW, mW), (self.mV, mV), (self.mb, mb), (self.mc, mc) ]) tx = T.ivector() ty = T.ivector() [tout, _, _], _ = theano.scan(forward_prop_step, sequences=tx, truncate_gradient=self.bptt_truncate, outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim)) ]) sce = T.sum(T.nnet.categorical_crossentropy(tout, ty)) self.example_loss = theano.function([tx,ty], sce, on_unused_input='warn') self.example_prediction = theano.function([tx,ty],[tout, T.argmax(tout, axis=1), sce])
dpxdT = -dfdT/dfdpx dpxdI = -dfdI/dfdpx dpxdD = -dfdD/dfdpx dpxds = -dfds/dfdpx return [g[0]*dpxdbs, g[0]*dpxdc, g[0]*dpxdg1, g[0]*dpxdkxmax, g[0]*dpxdp50, g[0]*dpxdT, g[0]*dpxdI, g[0]*dpxdD, g[0]*dpxds] ''' Sap flow simulation ''' def step(T, I, D, sp, alpha, bs, c, g1, kxmax, p50, Z): ps = pe * sp ** (-beta) # Soil water potential px = Pxf()(bs, c, g1, kxmax, p50, T, I, D, sp) # Xylem water potential slope = 16 + tt.exp(p50) * 1092 # Slope - xylem vulnerability PLC = (1/(1+tt.exp(slope/25*(px-p50))) - 1/(1+tt.exp(slope/25*(-p50))))/(1 - 1/(1+tt.exp(slope/25*(-p50)))) # PLC kx = kxmax * (1 - PLC) # Xylem conductance vn = (kx * l * L * (ps - px) * u) / (1000 * n * Z) / alpha # Sap flow return vn Ttt, Itt, Dtt, stt = tt.dvectors('Ttt', 'Itt', 'Dtt', 'stt') outputs, updates = theano.scan(fn = step, sequences = [Ttt, Itt, Dtt, stt], non_sequences = [alphaed, bsed, ced, g1ed, kxmaxed, p50ed, Zed]) simulation = theano.function(inputs=[Ttt, Itt, Dtt, stt, alphaed, bsed, ced, g1ed, kxmaxed, p50ed, Zed], outputs=outputs, updates=updates) ''' Sampling ''' obs = pm.Normal('obs', mu = outputs, sd = sigmaed, observed = vnod) #start = pm.find_MAP() #print(start) #step = pm.NUTS(profile = True)#scaling = start #db = pm.backends.Text(species) #trace = pm.sample(1e3, step = step, chains = 1)#, trace = db, start = start #theano.printing.pydotprint(outputs, outfile="1.png", var_with_name_simple=True)
def compile_theano(): """ This function generates theano compiled kernels for global energy and force learning The position of the atoms relative to the central one, and their chemical species are defined by a matrix of dimension Mx5 here called r1 and r2. Returns: k2_ee (func): energy-energy kernel k2_ef (func): energy-force kernel k2_ff (func): force-force kernel """ if not (os.path.exists(Mffpath / 'k2_ee_m.pickle') and os.path.exists(Mffpath / 'k2_ef_m.pickle') and os.path.exists(Mffpath / 'k2_ff_m.pickle')): print("Building Kernels") import theano.tensor as T from theano import function, scan logger.info("Started compilation of theano two body kernels") # -------------------------------------------------- # INITIAL DEFINITIONS # -------------------------------------------------- # positions of central atoms r1, r2 = T.dvectors('r1d', 'r2d') # positions of neighbours rho1, rho2 = T.dmatrices('rho1', 'rho2') # lengthscale hyperparameter sig = T.dscalar('sig') # cutoff hyperparameters theta = T.dscalar('theta') rc = T.dscalar('rc') # positions of neighbours without chemical species (3D space assumed) rho1s = rho1[:, 0:3] rho2s = rho2[:, 0:3] alpha_1 = rho1[:, 3] # .flatten() alpha_2 = rho2[:, 3] # .flatten() alpha_j = rho1[:, 4] # .flatten() alpha_m = rho2[:, 4] # .flatten() # numerical kronecker def delta_alpha2(a1j, a2m): d = T.exp(-(a1j - a2m) ** 2 / (2 * 1e-5 ** 2)) return d # matrices determining whether couples of atoms have the same atomic number delta_alphas12 = delta_alpha2(alpha_1[:, None], alpha_2[None, :]) delta_alphasjm = delta_alpha2(alpha_j[:, None], alpha_m[None, :]) delta_alphas1m = delta_alpha2(alpha_1[:, None], alpha_m[None, :]) delta_alphasj2 = delta_alpha2(alpha_j[:, None], alpha_2[None, :]) # distances of atoms wrt to the central one and wrt each other in 1 and 2 r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :]) ** 2, axis=1)) r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :]) ** 2, axis=1)) # Get the squared exponential kernels se_jm = T.exp(-(r1j[:, None] - r2m[None, :]) ** 2 / (2 * sig ** 2)) # Define cutoff function cut_jm = 0.5*(1+T.cos(np.pi*r1j[:, None]/rc))*0.5*(1+T.cos(np.pi*r2m[None, :]/rc))*( (T.sgn(rc-r1j) + 1) / 2)*((T.sgn(rc-r2m) + 1) / 2) # Apply cutoffs and chemical species masks se_jm = se_jm*cut_jm * \ (delta_alphas12 * delta_alphasjm + delta_alphas1m * delta_alphasj2) ker = T.sum(se_jm) # -------------------------------------------------- # FINAL FUNCTIONS # -------------------------------------------------- # global energy energy kernel k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc], ker, allow_input_downcast=False, on_unused_input='warn') # energy force kernel - Used to predict energies from forces k_ef = T.grad(ker, r2) k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ef, allow_input_downcast=False, on_unused_input='warn') # force force kernel - it uses only local atom pairs to avoid useless computation k_ff = T.grad(ker, r1) k_ff_der, updates = scan(lambda j, k_ff, r2: T.grad(k_ff[j], r2), sequences=T.arange(k_ff.shape[0]), non_sequences=[k_ff, r2]) k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc], k_ff_der, allow_input_downcast=False, on_unused_input='warn') # Save the function that we want to use for multiprocessing # This is necessary because theano is a crybaby and does not want to access the # Automaticallly stored compiled object from different processes with open(Mffpath / 'k2_ee_m.pickle', 'wb') as f: pickle.dump(k_ee_fun, f) with open(Mffpath / 'k2_ef_m.pickle', 'wb') as f: pickle.dump(k_ef_fun, f) with open(Mffpath / 'k2_ff_m.pickle', 'wb') as f: pickle.dump(k_ff_fun, f) else: print("Loading Kernels") with open(Mffpath / "k2_ee_m.pickle", 'rb') as f: k_ee_fun = pickle.load(f) with open(Mffpath / "k2_ef_m.pickle", 'rb') as f: k_ef_fun = pickle.load(f) with open(Mffpath / "k2_ff_m.pickle", 'rb') as f: k_ff_fun = pickle.load(f) # # -------------------------------------------------- # # WRAPPERS (we don't want to plug the position of the central element every time) # # -------------------------------------------------- def k2_ee(conf1, conf2, sig, theta, rc): """ Two body kernel for global energy-energy correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (float): scalar valued energy-energy 2-body kernel """ return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def k2_ef(conf1, conf2, sig, theta, rc): """ Two body kernel for global energy-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (array): 3x1 energy-force 2-body kernel """ return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) def k2_ff(conf1, conf2, sig, theta, rc): """ Two body kernel for energy-energy correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] theta (float): cutoff decay rate hyperparameter theta[1] rc (float): cutoff distance hyperparameter theta[2] Returns: kernel (matrix): 3x3 force-force 2-body kernel """ return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta, rc) logger.info("Ended compilation of theano two body kernels") return k2_ee, k2_ef, k2_ff
# -*- coding: utf-8 -*- """ Created on Fri Apr 07 14:55:14 2017 @author: B907-LGH """ import theano.tensor as T import theano a, b = T.dmatrices('a', 'b') x, y = T.dvectors('x', 'y') z = T.switch(T.lt(a, b), x, y) test_switch = theano.function([a, b, x, y], z) import numpy as np X, Y = np.random.randn(4), np.random.randn(4) A, B = np.random.randn(3, 4), np.random.randn(3, 4) print 'X:' print X print print 'Y:' print Y print print 'A:'
Q12 = matrix[y0, x1] Q22 = matrix[y1, x1] den12 = (x0-x1) / (y0-y1) den21 = (x0-x1) / (y1-y0) a0 = Q11 * x1*y1 / den12 + Q12 * x1*y0 / den21 + Q21 * x0*y1 / den21 + Q22 * x0*y0 / den12 a1 = Q11 * y1 / den21 + Q12 * y0 / den12 + Q21 * y1 / den12 + Q22 * y0 / den21 a2 = Q11 * x1 / den21 + Q12 * x1 / den12 + Q21 * x0 / den12 + Q22 * x0 / den21 a3 = Q11 / den12 + Q12 / den21 + Q21 / den21 + Q22 / den12 return a0, a1, a2, a3 # Theano operations temp_vector, den_vector = tt.dvectors('temp_vector', 'den_vector') temp_scalar, den_scalar = tt.dscalars('temp_scalar', 'den_scalar') indexing_Qxx_ttfunction = function(inputs=[temp_vector, temp_scalar, den_vector, den_scalar], outputs=indexing_Qxx_tt(temp_vector, temp_scalar, den_vector, den_scalar)) x, y = tt.dscalars('x', 'y') x0_limit, y0_limit, x1_limit, y1_limit = tt.dscalars('x0_limit', 'y0_limit', 'x1_limit', 'y1_limit') matrixGrid = tt.dmatrix('matrixGrid') bilinearInterpolationttfunction = function(inputs=[x, y, matrixGrid, x0_limit, y0_limit, x1_limit, y1_limit], outputs=biLinearInterpolation_v2_tt(x, y, matrixGrid, x0_limit, y0_limit, x1_limit, y1_limit)) biLinearInterpolation_CoeffsFunction_tt = function(inputs=[x, y, matrixGrid, x0_limit, y0_limit, x1_limit, y1_limit], outputs=biLinearInterpolation_Coeffs_tt(x, y, matrixGrid, x0_limit, y0_limit, x1_limit, y1_limit)) # Numpy steps
import numpy as np import theano import theano.tensor as T # good reference: http://www.marekrei.com/blog/theano-tutorial/ # defining r, u = T.dscalars('r', 'u') m, H = T.dvectors('m', 'H') helmoltz_energy = 0.5 * r * T.dot(m, m) + 0.25 * u * T.dot(m, m) * T.dot( m, m) - T.dot(H, m) # Landau model # numerical variables grad_m = T.grad(helmoltz_energy, m) alpha = T.dscalar('alpha') init_m = T.sqrt(T.abs_(r) / u) # Helhholtz free energy energy_density = theano.function(inputs=[r, u, H, m], outputs=helmoltz_energy) grad_energy_wrt_m = theano.function(inputs=[r, u, H, m], outputs=grad_m)
from theano import tensor as T from theano import function from theano.ifelse import ifelse #tensor.gt (greater than), .ge (greater than or equal to) #Similarly there are lt, le, eq, ne #The evaluation of the above are all element-wise #time.clock() can be used to get the time at any point in the operation. # tic_1 = time.clock() # Operation #tic_2 = time.clock() #tic_2 - tic_1 gives the time taken to run Operation. a, b = T.dscalars(2) x, y = T.dvectors(2) z_switch = T.switch(T.le(a, b), T.mean(x), T.max(y)) z_ifelse = ifelse(T.gt(a, b), T.max(x), T.mean(y)) f_switch = function([a, b, x, y], z_switch, mode=theano.Mode(linker='vm')) f_ifelse = function([a, b, x, y], z_ifelse, mode=theano.Mode(linker='vm')) value1 = 2.3 value2 = 3.44 vector_1 = np.ones((4000, )) vector_2 = [2.3, 4.5, 5.6, 7.8, 9, 10, 11, 12, 13, 14, 576, 456, 32467, 43598] print f_switch(value1, value2, vector_1, vector_2) print f_ifelse(value1, value2, vector_1, vector_2)
def traintutorial (inputmat, outputmat, w, b): inp, out = T.dvectors('inp', 'out')
# View more python tutorials on my Youtube and Youku channel!!! # Youtube video tutorial: https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg # Youku video tutorial: http://i.youku.com/pythontutorial # 7 - Activation function """ The available activation functions in theano can be found in this link: http://deeplearning.net/software/theano/library/tensor/nnet/nnet.html The activation functions include but not limited to softplus, sigmoid, relu, softmax, elu, tanh... For the hidden layer, we could use relu, tanh, softplus... For classification problems, we could use sigmoid or softmax for the output layer. For regression problems, we could use a linear function for the output layer. """ theano.tensor.nnet.nnet.sigmoid(x) theano.tensor.nnet.nnet.softplus(x) theano.tensor.nnet.nnet.softmax(x) theano.tensor.nnet.relu(x, alpha=0) import theano.tensor as T x, y, b = T.dvectors('x', 'y', 'b') W = T.dmatrix('W') y = T.nnet.sigmoid(T.dot(W, x) + b)
def test_unification(): x, y, a, b = tt.dvectors('xyab') x_s = tt.scalar('x_s') y_s = tt.scalar('y_s') c_tt = tt.constant(1, 'c') d_tt = tt.constant(2, 'd') # x_l = tt.vector('x_l') # y_l = tt.vector('y_l') # z_l = tt.vector('z_l') x_l = var('x_l') y_l = var('y_l') z_l = var('z_l') assert a == reify(x_l, {x_l: a}).reify() test_expr = mt.add(1, mt.mul(2, x_l)) test_reify_res = reify(test_expr, {x_l: a}) assert graph_equal(test_reify_res.reify(), 1 + 2 * a) z = tt.add(b, a) assert {x_l: z} == unify(x_l, z) assert b == unify(mt.add(x_l, a), mt.add(b, a))[x_l].reify() res = unify(mt.inv(mt.add(x_l, a)), mt.inv(mt.add(b, y_l))) assert res[x_l].reify() == b assert res[y_l].reify() == a # TODO: This produces a `DimShuffle` so that the scalar constant `1` # will match the dimensions of the vector `b`. That `DimShuffle` isn't # handled by the logic variable form. # assert unify(mt.add(x_l, 1), mt.add(b_l, 1))[x] == b with variables(x): assert unify(x + 1, b + 1)[x].reify() == b assert unify(mt.add(x_l, a), mt.add(b, a))[x_l].reify() == b with variables(x): assert unify(x, b)[x] == b assert unify([x], [b])[x] == b assert unify((x, ), (b, ))[x] == b assert unify(x + 1, b + 1)[x].reify() == b assert unify(x + a, b + a)[x].reify() == b with variables(x): assert unify(a + b, a + x)[x].reify() == b mt_expr_add = mt.add(x_l, y_l) # The parameters are vectors tt_expr_add_1 = tt.add(x, y) assert graph_equal( tt_expr_add_1, reify(mt_expr_add, unify(mt_expr_add, tt_expr_add_1)).reify()) # The parameters are scalars tt_expr_add_2 = tt.add(x_s, y_s) assert graph_equal( tt_expr_add_2, reify(mt_expr_add, unify(mt_expr_add, tt_expr_add_2)).reify()) # The parameters are constants tt_expr_add_3 = tt.add(c_tt, d_tt) assert graph_equal( tt_expr_add_3, reify(mt_expr_add, unify(mt_expr_add, tt_expr_add_3)).reify())
def compile_theano(): """ This function generates theano compiled kernels for global energy and force learning The position of the atoms relative to the central one, and their chemical species are defined by a matrix of dimension Mx5 here called r1 and r2. Returns: k2_ee (func): energy-energy kernel k2_ef (func): energy-force kernel k2_ff (func): force-force kernel k2_ee_map (func): energy-energy kernel that takes descriptor as one argument k2_ef_map (func): energy-force kernel that takes descriptor as one argument """ if not (os.path.exists(Mffpath / 'keam_ee_m.pickle') and os.path.exists(Mffpath / 'keam_ef_m.pickle') and os.path.exists(Mffpath / 'keam_ff_m.pickle') and os.path.exists(Mffpath / 'keam_eed_m.pickle') and os.path.exists(Mffpath / 'keam_efd_m.pickle')): print("Building Kernels") import theano.tensor as T from theano import function, scan logger.info( "Started compilation of theano eam multi species kernels") # -------------------------------------------------- # INITIAL DEFINITIONS # -------------------------------------------------- # positions of central atoms r1, r2 = T.dvectors('r1d', 'r2d') # positions of neighbours rho1, rho2 = T.dmatrices('rho1', 'rho2') # lengthscale hyperparameter sig = T.dscalar('sig') # cutoff hyperparameters rc = T.dscalar('rc') # Descriptor as a given input, used to map q1_descr = T.dscalar('q1_descr') # Element of the central atom if descriptor is Given alpha_1_descr = T.dscalar('alpha_1_descr') # Radius to use at denominator in the descriptor r0 = T.dscalar('r0') # positions of neighbours without chemical species (3D space assumed) rho1s = rho1[:, 0:3] rho2s = rho2[:, 0:3] alpha_1 = rho1[0, 3] # .flatten() alpha_2 = rho2[0, 3] # .flatten() # numerical kronecker def delta_alpha(a1j, a2m): d = T.exp(-(a1j - a2m)**2 / (2 * 1e-5**2)) return d # matrices determining whether couples of atoms have the same atomic number delta_alpha_12 = delta_alpha(alpha_1, alpha_2) delta_alpha_12_descr = delta_alpha(alpha_1_descr, alpha_2) # distances of atoms wrt to the central one and wrt each other in 1 and 2 r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1)) r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1)) esp_term_1 = (r1j / r0 - 1) esp_term_2 = (r2m / r0 - 1) cut_1 = 0.5 * (1 + T.cos(np.pi * r1j / rc)) * ( (T.sgn(rc - r1j) + 1) / 2) cut_2 = 0.5 * (1 + T.cos(np.pi * r2m / rc)) * ( (T.sgn(rc - r2m) + 1) / 2) q1 = T.sum(T.exp(-esp_term_1) * cut_1) q2 = T.sum(T.exp(-esp_term_2) * cut_2) k = T.exp(-(q1 - q2)**2 / (2 * sig**2)) * delta_alpha_12 k_descr = T.exp(-(q1_descr - q2)**2 / (2 * sig**2)) * delta_alpha_12_descr # energy energy kernel k_ee_fun = function([r1, r2, rho1, rho2, sig, rc, r0], k, allow_input_downcast=False, on_unused_input='warn') # energy force kernel - Used to predict energies from forces k_ef = T.grad(k, r2) k_ef_fun = function([r1, r2, rho1, rho2, sig, rc, r0], k_ef, allow_input_downcast=False, on_unused_input='warn') # force force kernel - it uses only local atom pairs to avoid useless computation k_ff = T.grad(k, r1) k_ff_der, updates = scan(lambda j, k_ff, r2: T.grad(k_ff[j], r2), sequences=T.arange(k_ff.shape[0]), non_sequences=[k_ff, r2]) k_ff_fun = function([r1, r2, rho1, rho2, sig, rc, r0], k_ff_der, allow_input_downcast=False, on_unused_input='warn') # energy energy descriptor kernel k_ee_fun_d = function( [r2, q1_descr, rho2, sig, rc, r0, alpha_1_descr], k_descr, allow_input_downcast=False, on_unused_input='warn') # energy force descriptor kernel k_ef_descr = T.grad(k_descr, r2) k_ef_fun_d = function( [r2, q1_descr, rho2, sig, rc, r0, alpha_1_descr], k_ef_descr, allow_input_downcast=False, on_unused_input='warn') # Save the function that we want to use for multiprocessing # This is necessary because theano is a crybaby and does not want to access the # Automaticallly stored compiled object from different processes with open(Mffpath / 'keam_ee_m.pickle', 'wb') as f: pickle.dump(k_ee_fun, f) with open(Mffpath / 'keam_ef_m.pickle', 'wb') as f: pickle.dump(k_ef_fun, f) with open(Mffpath / 'keam_ff_m.pickle', 'wb') as f: pickle.dump(k_ff_fun, f) with open(Mffpath / 'keam_eed_m.pickle', 'wb') as f: pickle.dump(k_ee_fun_d, f) with open(Mffpath / 'keam_efd_m.pickle', 'wb') as f: pickle.dump(k_ef_fun_d, f) else: print("Loading Kernels") with open(Mffpath / "keam_ee_m.pickle", 'rb') as f: k_ee_fun = pickle.load(f) with open(Mffpath / "keam_ef_m.pickle", 'rb') as f: k_ef_fun = pickle.load(f) with open(Mffpath / "keam_ff_m.pickle", 'rb') as f: k_ff_fun = pickle.load(f) with open(Mffpath / 'keam_eed_m.pickle', 'rb') as f: k_ee_fun_d = pickle.load(f) with open(Mffpath / 'keam_efd_m.pickle', 'rb') as f: k_ef_fun_d = pickle.load(f) # -------------------------------------------------- # WRAPPERS (we don't want to plug the position of the central element every time) # -------------------------------------------------- def k2_ee(conf1, conf2, sig, rc, r0): """ Eam kernel for global energy-energy correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] rc (float): cutoff distance hyperparameter theta[1] Returns: kernel (float): scalar valued energy-energy Eam kernel """ return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, rc, r0) def k2_ef(conf1, conf2, sig, rc, r0): """ Eam kernel for global energy-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] rc (float): cutoff distance hyperparameter theta[1] Returns: kernel (array): 3x1 energy-force Eam kernel """ return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, rc, r0) def k2_ff(conf1, conf2, sig, rc, r0): """ Eam kernel for force-force correlation Args: conf1 (array): first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] rc (float): cutoff distance hyperparameter theta[1] Returns: kernel (matrix): 3x3 force-force Eam kernel """ return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, rc, r0) def k2_ee_d(descr1, conf2, sig, rc, r0, alpha_1_descr): """ Eam kernel for global energy-force correlation Args: descr1 (float): descriptor calculated for the first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] rc (float): cutoff distance hyperparameter theta[2] alpha_1_descr (int): element of the central atom Returns: kernel (array): 3x1 energy-force Eam kernel """ return k_ee_fun_d(np.zeros(3), descr1, conf2, sig, rc, r0, alpha_1_descr) def k2_ef_d(descr1, conf2, sig, rc, r0, alpha_1_descr): """ Eam kernel for force-force correlation Args: descr1 (float): descriptor calculated for the first configuration. conf2 (array): second configuration. sig (float): lengthscale hyperparameter theta[0] rc (float): cutoff distance hyperparameter theta[1] alpha_1_descr (int): element of the central atom Returns: kernel (matrix): 3x3 force-force Eam kernel """ return -k_ef_fun_d(np.zeros(3), descr1, conf2, sig, rc, r0, alpha_1_descr) logger.info("Ended compilation of theano eam multi species kernels") return k2_ee, k2_ef, k2_ff, k2_ee_d, k2_ef_d
import theano import theano.tensor as T import numpy x,y,b = T.dvectors('x','y','b') W = T.dmatrix('W') y = T.nnet.softmax(T.dot(W,x) + b) out=theano.function([W,x,b],y) #softmax函数 x=T.dmatrix('x') y=T.nnet.softmax(x) softmax=theano.function([x],y) #concat函数 x=T.vector('x') y=T.vector('y') i=T.iscalar('i') y1=T.concatenate([x,y],axis=i) concat=theano.function([x,y,i],y1) #cos x=T.matrix('x') y=T.matrix('y') out=T.sum(x*y,axis=-1)/(T.sqrt(T.sum(x*x,axis=-1)*T.sum(y*y,axis=-1))+0.0000000000001) cos=theano.function([x,y],out) x=[1,2] b=[1,2] W=[[1,1],[1,2]]
from theano import function, theano from theano import pp import theano.tensor as T #computing Gradients x = T.dscalar('x') y= x ** 2 gy = T.grad(y, x) print pp(gy) f = function([x], gy) print f(4) x = T.matrix('x') s = T.sum(1 / (1 + T.exp(-x))) gs = T.grad(s, x) dlogistic = function([[0, 1], [-1, -2]]) print dlogistic #computing Jacobian x = T.dvectors('x') y = x ** 2 J, updates = theano.scan(lambda i, y,x : T.grad(y[i], x), sequences = T.arange(y.shape[0]), non_sequences = [y, x]) f = function([x], J, updates = updates) print f([4, 4])
def __init__(self, params, correct, samples=500, batch_size=None): ker = kernel() self.samples = samples self.params = params self.batch_size = batch_size #データの保存ファイル model_file_name = 'model2' + '.save' #もしこれまでに作ったのがあるならロードする try: print('Trying to load model...') with open(model_file_name, 'rb') as file_handle: obj = pickle.load(file_handle) self.f, self.g = obj print('Loaded!') return except: print('Failed. Creating a new model...') X,Y,X_test,mu,Sigma_b,Z,eps_NQ,eps_M =\ T.dmatrices('X','Y','X_test','mu','Sigma_b','Z','eps_NQ','eps_M') Wx, Ws, Wu=\ T.dmatrices('Wx', 'Ws', 'Wu') bx, bs, bu=\ T.dvectors('bx', 'bs', 'bu') gamma_x,beta_x,gamma_u,beta_u,gamma_s,beta_s=\ T.dvectors("gamma_x","beta_x","gamma_u","beta_u","gamma_s","beta_s") lhyp = T.dvector('lhyp') ls = T.dvector('ls') (M, D), N, Q = Z.shape, X.shape[0], X.shape[1] #変数の正の値への制約条件 beta = T.exp(ls[0]) #beta=T.exp(lhyp[0]) sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1 + Q]) #Sigma=T.exp(self.Sigma_b) #xについてはルートを取らなくても対角行列なので問題なし #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b)))) #スケール変換 mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma #隠れ層の生成 out1 = self.neural_net_predict(Wx, bx, gamma_x, beta_x, X) m = self.neural_net_predict(Wu, bu, gamma_u, beta_u, out1) S = self.neural_net_predict(Ws, bs, gamma_s, beta_s, out1) #outputs1 = T.dot(X,Wx) + bx #m = T.dot(out1,Wu) + bu #S=T.dot(out1,Ws) + bs S = T.exp(S) S = T.sqrt(S) Xtilda = m + S * eps_NQ U = mu_scaled + Sigma_scaled.dot(eps_M) print('Setting up cache...') Kmm = ker.RBF(sf2, l, Z) KmmInv = sT.matrix_inverse(Kmm) #KmmDet=theano.sandbox.linalg.det(Kmm) #KmmInv_cache = sT.matrix_inverse(Kmm) #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm') #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache') #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている #逆行列の微分関数を作っている #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'), # 'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')} print('Modeling...') Kmn = ker.RBF(sf2, l, Z, Xtilda) Knn = ker.RBF(sf2, l, Xtilda, Xtilda) Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn)) Kinterval = T.dot(KmmInv, Kmn) mean_U = T.dot(Kinterval.T, U) Covariance = beta LL = (self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum( (T.eye(N) * Ktilda))) * correct KL_X = -self.KLD_X(m, S) * correct KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv) print('Compiling model ...') inputs = {'X': X, 'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ,\ "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\ "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s} z = 0.0 * sum([ T.sum(v) for v in inputs.values() ]) # solve a bug with derivative wrt inputs not in the graph self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\ for n,f in zip(['Xtilda','U', 'LL', 'KL_U', 'KL_X'], [Xtilda,U, LL, KL_U, KL_X])} wrt = {'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\ "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s} self.g = { vn: { gn: theano.function(list(inputs.values()), T.grad(gv + z, vv), name='d' + gn + '_d' + vn, on_unused_input='ignore') for gn, gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X]) } for vn, vv in wrt.items() } with open(model_file_name, 'wb') as file_handle: print('Saving model...') sys.setrecursionlimit(2000) pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)