def adam(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8): updates = [] if type(cost_or_grads) is not list: gs = tf.gradients(cost_or_grads, params) else: gs = cost_or_grads beta2 = 1-1./(hps.train_its*hps.polyak_epochs) # all-reduce grads = [Z.allreduce_mean(g) for g in gs] t = tf.Variable(1., 'adam_t') alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \ (1. - tf.pow(hps.beta1, t)) updates.append(t.assign_add(1)) for w, g in zip(params, grads): mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2') if hps.beta1 > 0: mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1') mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g updates.append(mom1.assign(mom1_new)) else: mom1_new = g m2_new = beta2 * mom2 + (1. - beta2) * tf.square(g) delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon) w_new = hps.weight_decay * w - alpha_t * delta_t updates.append(mom2.assign(m2_new)) updates.append(w.assign(w_new)) # Polyak averaging polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2) train_op = tf.group(polyak_avg_op, *updates) return train_op, polyak_swap_op, ema
def to_tf(self, vecs): prefix = join_name(self.name_prefix, self.name) a = tf.get_variable(join_name(prefix, 'a'), initializer=self.a) k = tf.get_variable(join_name(prefix, 'k'), initializer=self.k) x = vecs[prefix] pow_x_a = tf.pow(x, a) return pow_x_a / (tf.pow(k, a) + pow_x_a)
def _logloss(self): ''' Poisson _logloss ''' alpha = self.alpha fx = tf.matmul(self.design_, self.weights) -self.offset #fx = tf.reshape(fx, [-1, self.num_features, self.num_neurons]) #fx = tf.reduce_sum(fx, reduction_indices =[1])- self.offset lam = self.non_lin(fx) lam_ = tf.mul(self.scale,lam)+ self.eps coef = tf.mul(self.obs_, tf.log(lam_)) distrib = lam_ self.loss = tf.reduce_sum(distrib - coef) if self.reg == 'l2': self.loss += alpha*tf.reduce_sum(tf.matmul(self.weights, self.weights, transpose_a = True)) self.loss += alpha*tf.reduce_sum(tf.pow(self.scale, 2)) self.loss += alpha*tf.reduce_sum(tf.pow(self.offset, 2)) if self.reg == 'l1': self.loss += alpha*tf.reduce_sum(self.weights + self.offset + self.scale ) return self.loss
def _logloss(self): ''' Gaussian Log loss ''' alpha = self.alpha fx = tf.matmul(self.design_, self.weights) - self.offset #fx = tf.reshape(fx, [-1, self.num_features, self.num_neurons]) #fx = tf.reduce_sum(fx, reduction_indices = [1])- self.offset lam = self.non_lin(fx) lam_ = tf.mul(self.scale,lam)+ self.eps #returns a separate loss for each neuron self.loss = tf.reduce_sum(tf.pow(tf.log(self.obs_) - lam_, 2), reduction_indices = [0]) if self.reg == 'l2': self.loss += alpha*tf.reduce_sum(tf.matmul(self.weights, self.weights, transpose_a = True)) self.loss += alpha*tf.reduce_sum(tf.pow(self.scale, 2)) self.loss += alpha*tf.reduce_sum(tf.pow(self.offset, 2)) if self.reg == 'l1': self.loss += alpha*tf.reduce_sum(self.weights + self.offset + self.scale ) return self.loss
def lppool(inpOp, pnorm, kH, kW, dH, dW, padding): global pool_counter global parameters name = 'pool' + str(pool_counter) pool_counter += 1 with tf.name_scope('lppool'): if pnorm == 2: pwr = tf.square(inpOp) else: pwr = tf.pow(inpOp, pnorm) subsamp = tf.nn.avg_pool(pwr, ksize=[1, kH, kW, 1], strides=[1, dH, dW, 1], padding=padding, name=name) subsamp_sum = tf.mul(subsamp, kH*kW) if pnorm == 2: out = tf.sqrt(subsamp_sum) else: out = tf.pow(subsamp_sum, 1/pnorm) return out
def init_main_block(self): self.x_pow_cache = {} self.matmul_cache = {} self.outputs = self.b with tf.name_scope('linear_part') as scope: contribution = matmul_wrapper(self.train_x, self.w[0], self.input_type) self.outputs += contribution for i in range(2, self.order + 1): with tf.name_scope('order_{}'.format(i)) as scope: raw_dot = matmul_wrapper(self.train_x, self.w[i - 1], self.input_type) dot = tf.pow(raw_dot, i) initialization_shape = tf.shape(dot) for in_pows, out_pows, coef in utils.powers_and_coefs(i): product_of_pows = tf.ones(initialization_shape) for pow_idx in range(len(in_pows)): product_of_pows *= tf.pow( self.pow_matmul(i, in_pows[pow_idx]), out_pows[pow_idx] ) dot -= coef * product_of_pows contribution = tf.reshape(tf.reduce_sum(dot, [1]), [-1, 1]) contribution /= float(math.factorial(i)) self.outputs += contribution with tf.name_scope('loss') as scope: self.init_loss() with tf.name_scope('regularization') as scope: self.init_regularization()
def normalized_loss(self, expected, predicted): predicted = np.minimum(predicted, 1-10**-15) predicted = np.maximum(predicted, 10**-15) w2 = tf.reduce_sum(tf.pow(self.w2, 2)) w3 = tf.reduce_sum(tf.pow(self.w3, 2)) l2 = self.params.normalization*(w2*w3)/self.params.hidden_units return -tf.reduce_sum(expected*tf.log(predicted)) + l2
def run_tf_simulation(self, c_in, h_in, timesteps=100, dt=0.005): r_e = tf.Variable( tf.zeros([self.N_pairs, self.N_pairs]) ) r_i = tf.Variable( tf.zeros([self.N_pairs, self.N_pairs]) ) W_EE = tf.placeholder(tf.float32) W_EI = tf.placeholder(tf.float32) W_IE = tf.placeholder(tf.float32) W_II = tf.placeholder(tf.float32) k = tf.placeholder(tf.float32) n_E = tf.placeholder(tf.float32) n_I = tf.placeholder(tf.float32) tau_E = tf.placeholder(tf.float32) tau_I = tf.placeholder(tf.float32) c0 = tf.constant(c_in) h0 = tf.constant(h_in) # Compile functions: I_E = c0*h0 + tf.transpose(tf.reshape(tf.reduce_sum(W_EE * r_e, [1,2]), [75,75])) \ - tf.transpose(tf.reshape(tf.reduce_sum(W_EI * r_i, [1,2]), [75,75])) I_I = c0*h0 + tf.transpose(tf.reshape(tf.reduce_sum(W_IE * r_e, [1,2]), [75,75])) \ - tf.transpose(tf.reshape(tf.reduce_sum(W_II * r_i, [1,2]), [75,75])) I_thresh_E = tf.maximum(0., I_E) I_thresh_I = tf.maximum(0., I_I) r_SS_E = k * tf.pow(I_thresh_E, n_E) r_SS_I = k * tf.pow(I_thresh_I, n_I) rE_out = r_e + dt*(-r_e+r_SS_E)/tau_E rI_out = r_i + dt*(-r_i+r_SS_I)/tau_I update_rE = tf.assign(r_e, rE_out) update_rI = tf.assign(r_i, rI_out) init = tf.initialize_all_variables() rE = 0 rI = 0 fd = {W_EE:self.W_EE.astype(np.float32), W_EI:self.W_EI.astype(np.float32), W_IE:self.W_IE.astype(np.float32), W_II:self.W_II.astype(np.float32), k:self.k.astype(np.float32), n_E:self.n_E.astype(np.float32), n_I:self.n_I.astype(np.float32), tau_E:self.tau_E.astype(np.float32), tau_I:self.tau_I.astype(np.float32)} with tf.Session() as sess: sess.run(init, feed_dict=fd) for t in range(timesteps): # run the simulation sess.run([update_rE, update_rI], feed_dict=fd) # fetch the rates rE = sess.run([r_e], feed_dict=fd) rI = sess.run([r_i], feed_dict=fd) return rE, rI
def _meshgrid(height, width, fp): x_t = tf.matmul( tf.ones(shape=tf.stack([height, 1])), tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) y_t = tf.matmul( tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), tf.ones(shape=tf.stack([1, width]))) x_t_flat = tf.reshape(x_t, (1, -1)) y_t_flat = tf.reshape(y_t, (1, -1)) x_t_flat_b = tf.expand_dims(x_t_flat, 0) # [1, 1, h*w] y_t_flat_b = tf.expand_dims(y_t_flat, 0) # [1, 1, h*w] num_batch = tf.shape(fp)[0] px = tf.expand_dims(fp[:,:,0], 2) # [n, nx*ny, 1] py = tf.expand_dims(fp[:,:,1], 2) # [n, nx*ny, 1] d = tf.sqrt(tf.pow(x_t_flat_b - px, 2.) + tf.pow(y_t_flat_b - py, 2.)) r = tf.pow(d, 2) * tf.log(d + 1e-6) # [n, nx*ny, h*w] x_t_flat_g = tf.tile(x_t_flat_b, tf.stack([num_batch, 1, 1])) # [n, 1, h*w] y_t_flat_g = tf.tile(y_t_flat_b, tf.stack([num_batch, 1, 1])) # [n, 1, h*w] ones = tf.ones_like(x_t_flat_g) # [n, 1, h*w] grid = tf.concat([ones, x_t_flat_g, y_t_flat_g, r], 1) # [n, nx*ny+3, h*w] return grid
def step_loss(self, state, action, time): # cost: x_h = tf.slice(state, [0, self.x_h_field[0]], [-1, 1]) x_t = tf.slice(state, [0, self.x_t_field[0]], [-1, self.n_t]) # 0. smooth acceleration policy cost_accel = tf.square(action) cost_accel_d = tf.mul(tf.pow(self.gamma, time), cost_accel) # 1. forcing the host to move forward (until the right point of the roundabout) cost_prog = tf.square(self.x_goal - x_h) cost_prog_d = tf.mul(tf.pow(self.gamma, time), cost_prog) cost_prog_d = tf.squeeze(cost_prog_d, squeeze_dims=[1]) # 2. keeping distance from vehicles ahead # distance to other vehicles x_abs_diffs = tf.abs(x_h - x_t) # punish only vehicles closer than "require distance" cost_acci = tf.nn.relu(self.require_distance - x_abs_diffs) # punish only w.r.t vehicles ahead cost_acci = tf.mul(cost_acci, tf.to_float(x_h < x_t)) # sum over all vehicles cost_acci = tf.reduce_sum(cost_acci) # punish only when host is inside the roundabout (or very close to enter) cost_acci = tf.mul(cost_acci, tf.to_float(x_h > -0.5 * self.host_length)) cost_acci_d = tf.mul(tf.pow(self.gamma, time), cost_acci) cost_acci_d = tf.squeeze(cost_acci_d, squeeze_dims=[1]) return tf.transpose(tf.pack(values=[cost_accel_d, cost_prog_d, cost_acci_d], name='scan_return'))
def update_op(self, has_nan, amax): is_nonfinite = tf.logical_or(has_nan, tf.is_inf(amax)) x = tf.cond(is_nonfinite, lambda: tf.pow(2., self.log_max), lambda: tf.log(amax) / tf.log(tf.constant(2.))) x_hat_assn = tf.assign(self.x_hat, self.beta1 * self.x_hat + (1 - self.beta1) * x) b1_corr_assn = tf.assign(self.b1_correction, self.b1_correction * self.beta1) with tf.control_dependencies([x_hat_assn, b1_corr_assn]): mu = self.x_hat.read_value() / (1 - self.b1_correction.read_value()) slow_x_hat_assn = tf.assign(self.slow_x_hat, self.beta2 * self.slow_x_hat + (1 - self.beta2) * x) xsquared_hat_assn = tf.assign(self.xsquared_hat, self.beta2 * self.xsquared_hat + (1 - self.beta2) * (x * x)) b2_corr_assn = tf.assign(self.b2_correction, self.b2_correction * self.beta2) with tf.control_dependencies([slow_x_hat_assn, xsquared_hat_assn, b2_corr_assn]): e_xsquared = self.xsquared_hat.read_value() / (1 - self.b2_correction.read_value()) slow_mu = self.slow_x_hat.read_value() / (1 - self.b2_correction.read_value()) sigma2 = e_xsquared - (slow_mu * slow_mu) sigma = tf.sqrt(tf.maximum(sigma2, tf.constant(0.))) log_cutoff = sigma * self.overflow_std_dev + mu log_difference = 16 - log_cutoff proposed_scale = tf.pow(2., log_difference) scale_update = tf.assign(self.scale, tf.clip_by_value(proposed_scale, self.scale_min, self.scale_max)) iter_update = tf.assign_add(self.iteration, 1) with tf.control_dependencies([scale_update]): return tf.identity(iter_update)
def adam2_old(params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8): updates = [] if type(cost_or_grads) is not list: gs = tf.gradients(cost_or_grads, params) else: gs = cost_or_grads # all-reduce grads1 = [Z.allreduce_mean(g) for g in gs] grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs] mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2))) t = tf.Variable(1., 'adam_t') lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t)) updates.append(t.assign_add(1)) for p, g1, g2 in zip(params, grads1, grads2): mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg') if mom1 > 0: v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v') v_t = mom1 * v + (1. - mom1) * g1 updates.append(v.assign(v_t)) else: v_t = g1 mg_t = mom2 * mg + (1. - mom2) * g2 delta_t = v_t / (tf.sqrt(mg_t) + epsilon) p_t = p - lr_t * delta_t updates.append(mg.assign(mg_t)) updates.append(p.assign(p_t)) return tf.group(*updates)
def rbf(x, y=0.0, sigma=1.0, l=1.0): """ Squared-exponential kernel element-wise k(x, y) = sigma^2 exp{ -1/(2l^2) (x_i - y_i)^2 } """ return tf.pow(sigma, 2.0) * \ tf.exp(-1.0/(2.0*tf.pow(l, 2.0)) * tf.pow(x - y , 2.0))
def _build_fm(self): """Construct the factorization machine part for the model. This is a traditional 2-order FM module. Returns: obj: prediction score made by factorization machine. """ with tf.variable_scope("fm_part") as scope: x = tf.SparseTensor( self.iterator.fm_feat_indices, self.iterator.fm_feat_values, self.iterator.fm_feat_shape, ) xx = tf.SparseTensor( self.iterator.fm_feat_indices, tf.pow(self.iterator.fm_feat_values, 2), self.iterator.fm_feat_shape, ) fm_output = 0.5 * tf.reduce_sum( tf.pow(tf.sparse_tensor_dense_matmul(x, self.embedding), 2) - tf.sparse_tensor_dense_matmul(xx, tf.pow(self.embedding, 2)), 1, keep_dims=True, ) return fm_output
def loglik_discrete(a, b, y_, u_, output_collection=(), name=None): """Returns element-wise Weibull censored discrete log-likelihood. Unit-discretized weibull log-likelihood. loss=-loglikelihood. .. note:: All input values must be of same type and shape. :param a:alpha. Positive nonzero `Tensor`. :type a: `float32` or `float64`. :param b:beta. Positive nonzero `Tensor`. :type b: `float32` or `float64`. :param y_: time to event. Positive nonzero `Tensor` :type y_: `float32` or `float64`. :param u_: indicator. 0.0 if right censored, 1.0 if uncensored `Tensor` :type u_: `float32` or `float64`. :param output_collection:name of the collection to collect result of this op. :type output_collection: Tuple of Strings. :param String name: name of the operation. :return: A `Tensor` of log-likelihoods of same shape as a, b, y_, u_. """ with tf.name_scope(name, "weibull_loglik_discrete", [a, b, y_, u_]): hazard0 = tf.pow(tf.div(y_ + 1e-35, a), b) # 1e-9 safe, really hazard1 = tf.pow(tf.div(y_ + 1.0, a), b) loglik = tf.multiply(u_, tf.log( tf.exp(hazard1 - hazard0) - 1.0)) - hazard1 tf.add_to_collection(output_collection, loglik) return(loglik)
def disjunction_of_literals(literals, label="no_label"): list_of_literal_tensors = [lit.tensor for lit in literals] literals_tensor = tf.concat(1,list_of_literal_tensors) if default_tnorm == "product": result = 1.0-tf.reduce_prod(1.0-literals_tensor, 1, keep_dims=True) if default_tnorm == "yager2": result = tf.minimum(1.0, tf.sqrt(tf.reduce_sum(tf.square(literals_tensor), 1, keep_dims=True))) if default_tnorm == "luk": print "data aggregator is lukas" result = tf.minimum(1.0, tf.reduce_sum(literals_tensor, 1, keep_dims=True)) PR(result) if default_tnorm == "goedel": result = tf.reduce_max(literals_tensor, 1, keep_dims=True, name=label) if default_aggregator == "product": return tf.reduce_prod(result, keep_dims=True) if default_aggregator == "mean": print "data aggregator is mean" return tf.reduce_mean(result, keep_dims=True, name=label) if default_aggregator == "gmean": return tf.exp(tf.mul(tf.reduce_sum(tf.log(result), keep_dims=True), tf.inv(tf.to_float(tf.size(result)))), name=label) if default_aggregator == "hmean": print "data aggregator is hmean" return tf.div(tf.to_float(tf.size(result)), tf.reduce_sum(tf.inv(result), keep_dims=True)) if default_aggregator == "min": print "data aggregator is min" return tf.reduce_min(result, keep_dims=True, name=label) if default_aggregator == "qmean": print "data aggregator is qmean" return tf.sqrt(tf.reduce_mean(tf.square(result), keep_dims=True), name=label) if default_aggregator == "cmean": print "data aggregator is cmean" return tf.pow(tf.reduce_mean(tf.pow(result, 3), keep_dims=True), tf.inv(tf.to_float(3)), name=label)
def adam_updates(params, cost_or_grads, lr=0.001, mom1=0.9, mom2=0.999): ''' Adam optimizer ''' updates = [] if type(cost_or_grads) is not list: grads = tf.gradients(cost_or_grads, params) else: grads = cost_or_grads t = tf.Variable(1., 'adam_t') for p, g in zip(params, grads): mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg') if mom1>0: v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v') v_t = mom1*v + (1. - mom1)*g v_hat = v_t / (1. - tf.pow(mom1,t)) updates.append(v.assign(v_t)) else: v_hat = g mg_t = mom2*mg + (1. - mom2)*tf.square(g) mg_hat = mg_t / (1. - tf.pow(mom2,t)) g_t = v_hat / tf.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append(mg.assign(mg_t)) updates.append(p.assign(p_t)) updates.append(t.assign_add(1)) return tf.group(*updates)
def hnet_loss(gt_pts, transformation_coeffcient, name): """ :param gt_pts: 原始的标签点对 [x, y, 1] :param transformation_coeffcient: 映射矩阵参数(6参数矩阵) [[a, b, c], [0, d, e], [0, f, 1]] :param name: :return: """ with tf.variable_scope(name): # 首先映射原始标签点对 transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1) H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]]) H_shape = tf.constant([9]) H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape) H = tf.reshape(H, shape=[3, 3]) gt_pts = tf.transpose(gt_pts) pts_projects = tf.matmul(H, gt_pts) # 求解最小二乘二阶多项式拟合参数矩阵 Y = tf.transpose(pts_projects[1, :]) X = tf.transpose(pts_projects[0, :]) Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32)) Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1) w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)), tf.transpose(Y_stack)), tf.expand_dims(X, -1)) # 利用二阶多项式参数求解拟合位置并反算到原始投影空间计算损失 x_preds = tf.matmul(Y_stack, w) preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1)) x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds) loss = tf.reduce_mean(tf.pow(gt_pts[0, :] - x_transformation_back[0, :], 2)) return loss
def hnet_transformation(gt_pts, transformation_coeffcient, name): """ :param gt_pts: :param transformation_coeffcient: :param name: :return: """ with tf.variable_scope(name): # 首先映射原始标签点对 transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1) H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]]) H_shape = tf.constant([9]) H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape) H = tf.reshape(H, shape=[3, 3]) gt_pts = tf.transpose(gt_pts) pts_projects = tf.matmul(H, gt_pts) # 求解最小二乘二阶多项式拟合参数矩阵 Y = tf.transpose(pts_projects[1, :]) X = tf.transpose(pts_projects[0, :]) Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32)) Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1) w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)), tf.transpose(Y_stack)), tf.expand_dims(X, -1)) # 利用二阶多项式参数求解拟合位置 x_preds = tf.matmul(Y_stack, w) preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1)) preds_fit = tf.stack([tf.squeeze(x_preds, -1), Y], axis=1) x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds) return x_transformation_back
def test_0d(): x1 = tf.Variable(tf.random_normal([1], dtype=tf.float32)) x2 = tf.Variable(tf.random_normal([1], dtype=tf.float32)) y = tf.pow(x1, tf.constant(2.0)) + tf.constant(2.0) * x1 * x2 + \ tf.constant(3.0) * tf.pow(x2, tf.constant(2.0)) + \ tf.constant(4.0) * x1 + tf.constant(5.0) * x2 + tf.constant(6.0) _test(y, [x1], val_true=np.array([[2.0]])) _test(y, [x2], val_true=np.array([[6.0]]))
def evolve(self, x, n, k, gamma): """ Compute time-derivative at current state Model: dx/dt = x^n / (x^n + K^n) - gamma*x This leads to single-species bistability. """ dxdt = tf.pow(x, n)/(tf.pow(x, n)+tf.pow(k,n)) - gamma*x return dxdt
def gabor(n_values=32, sigma=1.0, mean=0.0): x = tf.linspace(-3.0, 3.0, n_values) z = (tf.exp(tf.negative(tf.pow(x - mean, 2.0)/ (2.0 * tf.pow(sigma, 2.0)))) * (1.0 / (sigma * tf.sqrt(2.0 * 3.145)))) gauss_kernel = tf.matmul(tf.reshape(z, [n_values, 1]), tf.reshape(z,[1, n_values])) x = tf.reshape(tf.sin(tf.linspace(-3.0, 3.0, n_values)), [n_values, 1]) y = tf.reshape(tf.ones_like(x), [1, n_values]) gabor_kernel = tf.multiply(tf.matmul(x ,y), gauss_kernel) return gabor_kernel
def multivariate_rbf(x, y=0.0, sigma=1.0, l=1.0): """ Squared-exponential kernel k(x, y) = sigma^2 exp{ -1/(2l^2) sum_i (x_i - y_i)^2 } """ return tf.pow(sigma, 2.0) * \ tf.exp(-1.0/(2.0*tf.pow(l, 2.0)) * \ tf.reduce_sum(tf.pow(x - y , 2.0)))
def getreg(self, winp, hinp): ''' return regularization variable for given input index variables here: l2 norm ''' tReg = (1./2.) * (tf.reduce_sum(tf.pow(tf.nn.embedding_lookup(self.W, winp), 2)) * self.Wreg + tf.reduce_sum(tf.pow(tf.nn.embedding_lookup(tf.transpose(self.H), hinp), 2)) * self.Hreg) #tReg = (1./2.) * (T.sum(self.W**2) * self.Wreg + T.sum(self.H**2) * self.Hreg) return tReg
def _get_lr_from_schedule(self): TINY = 1e-8 if self._lr_sched_params['type'] == 'stable' : curr_lr = self._init_lr_tfv elif self._lr_sched_params['type'] == 'poly' : first_it_for_sch = self._lr_sched_params['poly']['epochs_wait_before_decr'] final_it_for_sch = self._lr_sched_params['poly']['final_ep_for_sch'] # * subepochs_per_ep assert first_it_for_sch < final_it_for_sch curr_it = tf.cast(self._num_epochs_trained_tfv, dtype='float32') # * subepochs_per_ep + curr_subepoch #curr_lr = init_lr * ( 1 - x/x2) ^ power. Power = 0.9 in parsenet, which we validated to behave ok. x2 = final_it_for_sch - first_it_for_sch x = tf.maximum( tf.constant(0, dtype="float32"), curr_it - first_it_for_sch ) # to make schedule happen within the window (first, final) epoch, stable outside. x = tf.minimum( x, x2 ) # in case the current iteration is after max, so that I keep schedule stable afterwards. y1 = self._init_lr_tfv y2 = 0.9 curr_lr = y1 * tf.pow( 1.0 - x/x2, y2 ) elif self._lr_sched_params['type'] == 'expon' : first_it_for_sch = self._lr_sched_params['expon']['epochs_wait_before_decr'] final_it_for_sch = self._lr_sched_params['expon']['final_ep_for_sch'] # * subepochs_per_ep assert first_it_for_sch < final_it_for_sch curr_it = tf.cast(self._num_epochs_trained_tfv, dtype='float32') # y = y1 * gamma^x. gamma = (y2 / y1)^(1/x2) x2 = final_it_for_sch - first_it_for_sch x = tf.maximum( tf.constant(0, dtype="float32"), curr_it-first_it_for_sch ) x = tf.minimum( x, x2 ) y1 = self._init_lr_tfv y2 = self._lr_sched_params['expon']['lr_to_reach_at_last_ep'] gamma = tf.pow( (y2+TINY)/y1, 1.0/x2 ) curr_lr = y1 * tf.pow( gamma, x ) elif self._lr_sched_params['type'] == 'predef' : #Predefined Schedule. div_lr_by = self._lr_sched_params['predef']['div_lr_by'] epochs_boundaries = [ tf.cast(e, tf.int32) for e in self._lr_sched_params['predef']['epochs'] ] lr_values = [ ( self._init_lr_tfv / pow(div_lr_by, i) ) for i in range( 1+len(epochs_boundaries) ) ] curr_lr = tf.train.piecewise_constant(self._num_epochs_trained_tfv, boundaries = epochs_boundaries, values = lr_values) elif self._lr_sched_params['type'] == 'auto' : self._learning_rate_tfv = tf.Variable( self._init_lr_tfv, dtype="float32", trainable=False, name="curr_lr_tfv") self._top_mean_val_acc_tfv = tf.Variable(0, dtype="float32", trainable=False, name="top_mean_val_acc") self._epoch_with_top_mean_val_acc_tvf = tf.Variable(0, dtype=self._num_epochs_trained_tfv.dtype.as_numpy_dtype, trainable=False, name="ep_top_mean_val_acc") self._last_epoch_lr_got_lowered_tvf = tf.Variable(0, dtype="float32", trainable=False, name="last_ep_lr_lowered") self._op_assign_new_lr = tf.assign(self._learning_rate_tfv, self._tf_plchld_float32) self._op_assign_top_mean_val_acc_tfv = tf.assign(self._top_mean_val_acc_tfv, self._tf_plchld_float32) self._op_assign_epoch_with_top_mean_val_acc_tvf = tf.assign(self._epoch_with_top_mean_val_acc_tvf, self._tf_plchld_int32) self._op_assign_last_epoch_lr_lowered = tf.assign(self._last_epoch_lr_got_lowered_tvf, self._tf_plchld_float32) # The LR will be changed during the routine.training, by a call to function self.run_lr_sched_updates( sessionTf ) curr_lr = self._learning_rate_tfv return curr_lr
def create_tensor(self, in_layers=None, set_tensors=True, **kwargs): """ Generate Angular Symmetry Function """ if in_layers is None: in_layers = self.in_layers in_layers = convert_to_layers(in_layers) self.build() max_atoms = self.max_atoms d_cutoff = in_layers[0].out_tensor d = in_layers[1].out_tensor atom_coordinates = in_layers[2].out_tensor if self.atomic_number_differentiated: atom_numbers = in_layers[3].out_tensor atom_number_embedded = tf.nn.embedding_lookup(self.atom_number_embedding, atom_numbers) vector_distances = tf.tile(tf.expand_dims(atom_coordinates, axis=2), (1, 1, max_atoms, 1)) - \ tf.tile(tf.expand_dims(atom_coordinates, axis=1), (1, max_atoms, 1, 1)) R_ij = tf.tile(tf.expand_dims(d, axis=3), (1, 1, 1, max_atoms)) R_ik = tf.tile(tf.expand_dims(d, axis=2), (1, 1, max_atoms, 1)) f_R_ij = tf.tile(tf.expand_dims(d_cutoff, axis=3), (1, 1, 1, max_atoms)) f_R_ik = tf.tile(tf.expand_dims(d_cutoff, axis=2), (1, 1, max_atoms, 1)) # Define angle theta = R_ij(Vector) dot R_ik(Vector)/R_ij(distance)/R_ik(distance) theta = tf.reduce_sum(tf.tile(tf.expand_dims(vector_distances, axis=3), (1, 1, 1, max_atoms, 1)) * \ tf.tile(tf.expand_dims(vector_distances, axis=2), (1, 1, max_atoms, 1, 1)), axis=4) theta = tf.div(theta, R_ij * R_ik + 1e-5) R_ij = tf.stack([R_ij] * self.length, axis=4) R_ik = tf.stack([R_ik] * self.length, axis=4) f_R_ij = tf.stack([f_R_ij] * self.length, axis=4) f_R_ik = tf.stack([f_R_ik] * self.length, axis=4) theta = tf.stack([theta] * self.length, axis=4) lambd = tf.reshape(self.lambd, (1, 1, 1, 1, -1)) zeta = tf.reshape(self.zeta, (1, 1, 1, 1, -1)) ita = tf.reshape(self.ita, (1, 1, 1, 1, -1)) Rs = tf.reshape(self.Rs, (1, 1, 1, 1, -1)) thetas = tf.reshape(self.thetas, (1, 1, 1, 1, -1)) out_tensor = tf.pow(1 + lambd * tf.cos(theta - thetas), zeta) * \ tf.exp(-ita * tf.square((R_ij + R_ik) / 2 - Rs)) * \ f_R_ij * f_R_ik * tf.pow(tf.constant(2.), 1 - zeta) if self.atomic_number_differentiated: out_tensors = [] for atom_type_j in self.atom_number_cases: for atom_type_k in self.atom_number_cases: selected_atoms = tf.stack([atom_number_embedded[:, :, atom_type_j]] * max_atoms, axis=2) * \ tf.stack([atom_number_embedded[:, :, atom_type_k]] * max_atoms, axis=1) selected_atoms = tf.expand_dims( tf.expand_dims(selected_atoms, axis=1), axis=4) out_tensors.append( tf.reduce_sum(out_tensor * selected_atoms, axis=[2, 3])) self.out_tensor = tf.concat(out_tensors, axis=2) else: self.out_tensor = tf.reduce_sum(out_tensor, axis=[2, 3])
def cosine_similarity(U, v, eps=1e-7): """ Calculates the cosine similarity u.v/(l2(u)*l2(v)) between all of the vectors in two 3-dimensional tensors U and v """ sims_numerator = tf.batch_matmul(U, v)[:, :, 0] U_l2 = tf.reduce_sum(tf.pow(U, 2), 2) v_l2 = tf.reduce_sum(tf.pow(v, 2), 1) sims_denominator = tf.sqrt(tf.maximum(tf.mul(U_l2, v_l2), eps)) sims = tf.div(sims_numerator, sims_denominator) return sims
def test_1d(): x1 = tf.Variable(tf.random_normal([1], dtype=tf.float32)) x2 = tf.Variable(tf.random_normal([1], dtype=tf.float32)) y = tf.pow(x1, tf.constant(2.0)) + tf.constant(2.0) * x1 * x2 + \ tf.constant(3.0) * tf.pow(x2, tf.constant(2.0)) + \ tf.constant(4.0) * x1 + tf.constant(5.0) * x2 + tf.constant(6.0) _test(y, [x1, x2], val_true=np.array([[2.0, 2.0], [2.0, 6.0]])) x3 = tf.Variable(tf.random_normal([3], dtype=tf.float32)) y = tf.pow(x2, tf.constant(2.0)) + tf.reduce_sum(x3) _test(y, [x3], val_true=np.zeros([3, 3])) _test(y, [x2, x3], val_true=np.diag([2.0, 0.0, 0.0, 0.0]))
def adafactor_decay_rate_adam(beta2): """Second-moment decay rate like Adam, subsuming the correction factor. Args: beta2: a float between 0 and 1 Returns: a scalar """ t = tf.to_float(tf.train.get_or_create_global_step()) + 1.0 decay = beta2 * (1.0 - tf.pow(beta2, t - 1.0)) / (1.0 - tf.pow(beta2, t)) # decay = tf.cond(tf.equal(t, 1.0), lambda: beta2, lambda: decay) return decay
def smooth_cosine_similarity(m, v): shape_x = m.get_shape().as_list() shape_y = v.get_shape().as_list() if shape_x[1] != shape_y[0]: raise ValueError("Smooth cosine similarity is expecting same dimemsnion") m_norm = tf.sqrt(tf.reduce_sum(tf.pow(m, 2),1)) v_norm = tf.sqrt(tf.reduce_sum(tf.pow(v, 2))) m_dot_v = tf.matmul(m, tf.reshape(v, [-1, 1])) similarity = tf.div(tf.reshape(m_dot_v, [-1]), m_norm * v_norm + 1e-3) return similarity
# HYPERPARAMETER BATCH_SIZE = 1 ETA = 0.0003 epochs = 1000 X_TRAIN = tf.placeholder(tf.float64, shape=(None, n_features), name="train_data") Y_TRAIN = tf.placeholder(tf.float64, shape=(None, 1), name="train_labels") W = tf.Variable(np.random.randn(n_features, 1), name="weights") # B = tf.Variable(np.random.randn(None,1),name = "biases") prediction = tf.matmul(X_TRAIN, W) cost = tf.reduce_sum(tf.pow(Y_TRAIN - prediction, 2)) / 2 optimizer = tf.train.GradientDescentOptimizer(ETA).minimize(cost) init_op = tf.initialize_all_variables() log = [] with tf.Session() as sess: sess.run(init_op) for epoch in range(epochs): for x, y in zip(x_train, y_train): sess.run(optimizer, feed_dict={
display_step = 100 num_of_samples = x_data.shape[0] X = tf.placeholder(tf.float32, name='X') Y = tf.placeholder(tf.float32, name='Y') W = tf.Variable(np.random.randn(), name="Weight") B = tf.Variable(np.random.randn(), name="Bias") with tf.name_scope('Pred') as scope: pred = X + W * B * X tf.summary.histogram("hypothesis", pred) with tf.name_scope('Input') as scope: cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * num_of_samples) cost_summ = tf.summary.scalar("cost", cost) with tf.name_scope('Optimizer') as scope: optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) sess = tf.Session() sess.run(tf.global_variables_initializer()) merged = tf.summary.merge_all() writer = tf.summary.FileWriter("./Model", sess.graph) writer.add_graph(sess.graph) # Show the graph for epoch in range(training_epochs): for (x, y) in zip(x_data, y_data): # zip에 있는 리스트들을 원소로하는 튜플을 생성하여
## fully connected layer 1 W_fc1 = nl.weight_variable([160, 60]) b_fc1 = nl.bias_variable([60]) h_fc1 = tf.matmul(x_flat, W_fc1) + b_fc1 #outsize = batch*60 a_fc1 = tf.nn.relu(h_fc1) #outsize = batch*60 a_fc1_dropout = tf.nn.dropout(a_fc1, keep_prob) #dropout layer 1 ## fully connected layer 2 W_fc2 = nl.weight_variable([60, 2]) b_fc2 = nl.bias_variable([2]) h_fc2 = tf.matmul(a_fc1_dropout, W_fc2) + b_fc2 #outsize = batch*2 a_fc2 = tf.nn.relu(h_fc2) #outsize = batch*2 #regularization and loss function original_cost = tf.reduce_mean(tf.pow(y - a_fc2, 2)) tv = tf.trainable_variables() #L2 regularization regularization_cost = 1 / 32 * tf.reduce_sum([tf.nn.l2_loss(v) for v in tv ]) #1 / 32 is hyperparameter cost = original_cost + regularization_cost Optimizer = tf.train.AdamOptimizer(0.001).minimize(original_cost) init = tf.global_variables_initializer() #average accuracy every batch accuracy = tf.reduce_mean((y - a_fc2), 0) #average accuracy every batch testaccuracy = np.zeros([17330, 2], dtype=np.float32) cache_RE32 = np.zeros([34660, 2], dtype=np.float32) #save the model saver = tf.train.Saver() with tf.Session() as sess: sess.run(init)
x, 1 ) # The new dimesnion is of undefine size and hypotetically each element in this dimennsion is the same as the original element the size of the new dimension will be determined once this is used #----------------------------------------------------------------------------------- rx = expanded_x1 - expanded_x2 #Distance between every pair of particles in x in every dimension (dx,dy) rx2 = tf.square( rx) # sqar distane for each particle pair in each dimension (dx^2,dx^2) r2 = tf.reduce_sum( rx2, 2) # absolute squar distance between every pair of particles(dx^2+dx^2) r = tf.sqrt(r2) # absolute distance between every pair of particles r = tf.maximum( r, tf.ones_like(r) * 0.02 ) # To avoid division by zero make min distance larger then 0 this add to prevent simulation explosion if particles get too closed F = -30 / tf.pow(r, 2) + 10 / tf.pow( r, 3 ) # Force between pair of particles F=9/r^2-1/r^3 (attracion 9/r^2 and repulsion 1/r^3) Fx = rx * tf.expand_dims( F / r, 2 ) # The forces per axis applied between each pair of particles we divide the force by r since rx is not normalize by distance between particles Accel = ( dt / m ) * Fx # Acceleration resulted from forces between each pair is simply force between each pair divide by particle map and multiply by time of step dv = tf.reduce_sum( Accel, 0 ) ## or dim2? Sum velocity changes for each particle in each step of the simulation vnew = (v + dv) * CoolingFactor # Update velocity for particle U= xnew = x + ( vnew ) * dt # Update position for each particles according to particle speed (avereged on new and previous speed)
import os os.environ['TF_CPP_MIN_LOG_LEVEL']='2' import tensorflow as tf import numpy as np x =2 y =3 add_op = tf.add(x, y) mul_op = tf.multiply(x, y ) useless = tf.multiply(x, add_op) pow_op = tf.pow(add_op, mul_op) a = tf.constant([2, 2], name='a') # constant not fit for big data, but variable b = tf.constant([[0,1], [2,3]], name='b') c = tf.multiply(a, b ,name='mul') tf.zeros([2, 3], tf.int16) tf.fill([2, 3], 8) d = tf.Variable(2, name='scalar') e = tf.Variable([[0,1], [2,3]], name='matrix') W = tf.Variable(tf.random_uniform([784, 10]), name='weight') assign_op = d.assign(100) ap = tf.placeholder(tf.float16, shape=[2,3],name='weight1') bc = tf.constant(1, tf.float16,name='bc') cp = ap + bc # writer = tf.summary.FileWriter('../graphs', tf.get_default_graph())
tf.add(tf.matmul(layer_3, weights['decoder_h4']), biases['decoder_b4'])) return layer_4 # Construct model encoder_op = encoder(X) decoder_op = decoder(encoder_op) # Prediction y_pred = decoder_op # Targets (Labels) are the input data. y_true = X # Define loss and optimizer, minimize the squared error cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # Launch the graph with tf.Session() as sess: # tf.initialize_all_variables() no long valid from # 2017-03-02 if using tensorflow >= 0.12 if int((tf.__version__).split('.')[1]) < 12 and int( (tf.__version__).split('.')[0]) < 1: init = tf.initialize_all_variables() else: init = tf.global_variables_initializer() sess.run(init) total_batch = int(mnist.train.num_examples / batch_size) # Training cycle for epoch in range(training_epochs):
def get_angles(self, position, i, d_model): angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32)) return position * angles
def smooth_l1_loss(pred, target, weight, beta): val = target - pred abs_val = tf.abs(val) return weight * tf.reduce_sum(tf.where( abs_val < beta, 0.5 / beta * tf.pow(val, 2), (abs_val - 0.5 * beta)), axis=-1)
#随机选取训练集 xs_index = list(range(n_observations)) xs_index = random.sample(xs_index, n_samples) xs_index = np.array(xs_index) xd = xs[xs_index] yd = ys[xs_index] X = tf.placeholder(tf.float32, name="X") Y = tf.placeholder(tf.float32, name="Y") #初始化参数和权重 W = tf.Variable(tf.random_normal([1]), name="weight") b = tf.Variable(tf.random_normal([1]), name="bias") #计算预测结果 Y_pred = tf.add(tf.multiply(X, W), b) W_2 = tf.Variable(tf.random_normal([1]), name="weight_2") Y_pred = tf.add(tf.multiply(tf.pow(X, 2), W_2), Y_pred) W_3 = tf.Variable(tf.random_normal([1]), name='weight_3') Y_pred = tf.add(tf.multiply(tf.pow(X, 3), W_3), Y_pred) W_4 = tf.Variable(tf.random_normal([1]), name='weight_4') Y_pred = tf.add(tf.multiply(tf.pow(X, 4), W_4), Y_pred) #计算损失函数值 sample_num = xs.shape[0] loss = tf.reduce_sum(tf.pow(Y_pred - Y, 2)) / sample_num #初始化optimizer learning_rate = 0.01 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss) #指定迭代次数,在session里执行graph
def focal_loss(logits, gamma): #count focal loss for negative_loss logits_ = tf.pow(logits, gamma) bce_loss = tf.keras.backend.binary_crossentropy(tf.zeros_like(logits), logits) return tf.reduce_sum(bce_loss * logits_)
trainOp = tf.train.AdamOptimizer( learning_rate=lR, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam').minimize( loss, global_step=tf.contrib.framework.get_global_step()) return myOut #, model_fn_lib.ModelFnOps(mode=mode,loss=loss, train_op=trainOp) # tf.layers.dropout(inputs, rate=0.5, noise_shape=None, seed=None, training=False, name=None) myOut = cephovision(data, targets, mode) loss = tf.reduce_mean(tf.pow(targets - myOut, 2)) trainOp = tf.train.AdamOptimizer( learning_rate=lR, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False, name='Adam').minimize(loss, global_step=tf.contrib.framework.get_global_step()) init = tf.global_variables_initializer() def main(unused_argv): with tf.Session() as sess: tf.initialize_all_variables().run(
def get_angles(self, position, i, d_model): angles = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32)) # amgle을 구하는 함수 ,tf.cast(d_model, tf.float32) cast(var, tf.float32) var을 float32로 변환 return position * angles
def grow_topk(i, alive_seq, alive_log_probs, states): r"""Inner beam seach loop. This function takes the current alive sequences, and grows them to topk sequences where k = 2*beam. We use 2*beam because, we could have beam_size number of sequences that might hit <EOS> and there will be no alive sequences to continue. With 2*beam_size, this will not happen. This relies on the assumption the vocab size is > beam size. If this is true, we'll have at least beam_size non <EOS> extensions if we extract the next top 2*beam words. Length penalty is given by = (5+len(decode)/6) ^ -\alpha. Pls refer to https://arxiv.org/abs/1609.08144. Args: i: loop index alive_seq: Topk sequences decoded so far [batch_size, beam_size, i+1] alive_log_probs: probabilities of these sequences. [batch_size, beam_size] states: dict (possibly nested) of decoding states. Returns: Tuple of (Topk sequences extended by the next word, The log probs of these sequences, The scores with length penalty of these sequences, Flags indicating which of these sequences have finished decoding, dict of transformed decoding states) """ # Get the logits for all the possible next symbols flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1]) # (batch_size * beam_size, decoded_length) if states: flat_states = nest.map_structure(_merge_beam_dim, states) flat_logits, flat_states = symbols_to_logits_fn( flat_ids, i, flat_states) states = nest.map_structure( lambda t: _unmerge_beam_dim(t, batch_size, beam_size), flat_states) else: flat_logits = symbols_to_logits_fn(flat_ids) logits = tf.reshape(flat_logits, [batch_size, beam_size, -1]) # Convert logits to normalized log probs candidate_log_probs = log_prob_from_logits(logits) # Multiply the probabilites by the current probabilites of the # beam. # (batch_size, beam_size, vocab_size) + (batch_size, beam_size, 1) log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs, axis=2) length_penalty = tf.pow(((5. + tf.to_float(i + 1)) / 6.), alpha) curr_scores = log_probs / length_penalty # Flatten out (beam_size, vocab_size) probs in to a list of # possibilites flat_curr_scores = tf.reshape(curr_scores, [-1, beam_size * vocab_size]) topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores, k=beam_size * 2) # Recovering the log probs because we will need to send them back topk_log_probs = topk_scores * length_penalty # Work out what beam the top probs are in. topk_beam_index = topk_ids // vocab_size topk_ids %= vocab_size # Unflatten the ids # The next three steps are to create coordinates for tf.gather_nd # to pull out the correct seqences from id's that we need to grow. # We will also use the coordinates to gather the booleans of the # beam items that survived. batch_pos = compute_batch_indices(batch_size, beam_size * 2) # top beams will give us the actual coordinates to do the gather. # stacking will create a tensor of dimension batch * beam * 2, # where the last dimension contains the i,j gathering coordinates. topk_coordinates = tf.stack([batch_pos, topk_beam_index], axis=2) # Gather up the most probable 2*beams both for the ids and # finished_in_alive bools topk_seq = tf.gather_nd(alive_seq, topk_coordinates) if states: states = nest.map_structure( lambda state: tf.gather_nd(state, topk_coordinates), states) # Append the most probable alive topk_seq = tf.concat( [topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2) topk_finished = tf.equal(topk_ids, eos_id) return topk_seq, topk_log_probs, topk_scores, topk_finished, states
def wct_tf(content, style, alpha, eps=1e-8): '''TensorFlow version of Whiten-Color Transform Assume that content/style encodings have shape 1xHxWxC See p.4 of the Universal Style Transfer paper for corresponding equations: https://arxiv.org/pdf/1705.08086.pdf ''' # Remove batch dim and reorder to CxHxW # content_t = tf.transpose(a=tf.squeeze(content), perm=(2, 0, 1)) # style_t = tf.transpose(a=tf.squeeze(style), perm=(2, 0, 1)) content_t = tf.transpose(a=content[0], perm=(2, 0, 1)) style_t = tf.transpose(a=style[0], perm=(2, 0, 1)) Cc, Hc, Wc = tf.unstack(tf.shape(input=content_t)) Cs, Hs, Ws = tf.unstack(tf.shape(input=style_t)) # CxHxW -> CxH*W content_flat = tf.reshape(content_t, (Cc, Hc * Wc)) style_flat = tf.reshape(style_t, (Cs, Hs * Ws)) # Content covariance mc = tf.reduce_mean(input_tensor=content_flat, axis=1, keepdims=True) fc = content_flat - mc fcfc = tf.matmul(fc, fc, transpose_b=True) / ( tf.cast(Hc * Wc, tf.float32) - 1.) + tf.eye(Cc) * eps # Style covariance ms = tf.reduce_mean(input_tensor=style_flat, axis=1, keepdims=True) fs = style_flat - ms fsfs = tf.matmul(fs, fs, transpose_b=True) / ( tf.cast(Hs * Ws, tf.float32) - 1.) + tf.eye(Cs) * eps # tf.svd is slower on GPU, see https://github.com/tensorflow/tensorflow/issues/13603 with tf.device('/cpu:0'): Sc, Uc, _ = tf.linalg.svd(fcfc) Ss, Us, _ = tf.linalg.svd(fsfs) ## Uncomment to perform SVD for content/style with np in one call ## This is slower than CPU tf.svd but won't segfault for ill-conditioned matrices # @jit # def np_svd(content, style): # '''tf.py_func helper to run SVD with NumPy for content/style cov tensors''' # Uc, Sc, _ = np.linalg.svd(content) # Us, Ss, _ = np.linalg.svd(style) # return Uc, Sc, Us, Ss # Uc, Sc, Us, Ss = tf.py_func(np_svd, [fcfc, fsfs], [tf.float32, tf.float32, tf.float32, tf.float32]) # Filter small singular values k_c = tf.reduce_sum(input_tensor=tf.cast(tf.greater(Sc, 1e-5), tf.int32)) k_s = tf.reduce_sum(input_tensor=tf.cast(tf.greater(Ss, 1e-5), tf.int32)) # Whiten content feature Dc = tf.linalg.tensor_diag(tf.pow(Sc[:k_c], -0.5)) fc_hat = tf.matmul( tf.matmul(tf.matmul(Uc[:, :k_c], Dc), Uc[:, :k_c], transpose_b=True), fc) # Color content with style Ds = tf.linalg.tensor_diag(tf.pow(Ss[:k_s], 0.5)) fcs_hat = tf.matmul( tf.matmul(tf.matmul(Us[:, :k_s], Ds), Us[:, :k_s], transpose_b=True), fc_hat) # Re-center with mean of style fcs_hat = fcs_hat + ms # Blend whiten-colored feature with original content feature blended = alpha * fcs_hat + (1 - alpha) * (fc + mc) # CxH*W -> CxHxW blended = tf.reshape(blended, (Cc, Hc, Wc)) # CxHxW -> 1xHxWxC blended = tf.expand_dims(tf.transpose(a=blended, perm=(1, 2, 0)), 0) return blended
with tf.variable_scope("ae", reuse=True): tf.get_variable_scope().reuse_variables() encoder_output, state_enc = AE.encoder(residue, init_enc) decoder_output, state_dec = AE.decoder(encoder_output, init_dec) residue = X - decoder_output print('iteration', _ + 2) #%% Cost and optimization setup y_true = X cost = tf.reduce_mean(tf.pow(y_true - decoder_output, 2)) optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8).minimize(cost) #%%############################################################################## # Training init = tf.global_variables_initializer() sess = tf.Session() #sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) # Training cycle cost_vector = [] for epoch in range(training_epochs): for i in range(n_batch):
def wct_style_swap(content, style, alpha, patch_size=2, stride=2, eps=1e-8): '''Modified Whiten-Color Transform that performs style swap on whitened content/style encodings before coloring Assume that content/style encodings have shape 1xHxWxC ''' content_t = tf.transpose(a=tf.squeeze(content), perm=(2, 0, 1)) style_t = tf.transpose(a=tf.squeeze(style), perm=(2, 0, 1)) Cc, Hc, Wc = tf.unstack(tf.shape(input=content_t)) Cs, Hs, Ws = tf.unstack(tf.shape(input=style_t)) # CxHxW -> CxH*W content_flat = tf.reshape(content_t, (Cc, Hc * Wc)) style_flat = tf.reshape(style_t, (Cs, Hs * Ws)) # Content covariance mc = tf.reduce_mean(input_tensor=content_flat, axis=1, keepdims=True) fc = content_flat - mc fcfc = tf.matmul(fc, fc, transpose_b=True) / ( tf.cast(Hc * Wc, tf.float32) - 1.) + tf.eye(Cc) * eps # Style covariance ms = tf.reduce_mean(input_tensor=style_flat, axis=1, keepdims=True) fs = style_flat - ms fsfs = tf.matmul(fs, fs, transpose_b=True) / ( tf.cast(Hs * Ws, tf.float32) - 1.) + tf.eye(Cs) * eps # tf.svd is slower on GPU, see https://github.com/tensorflow/tensorflow/issues/13603 with tf.device('/cpu:0'): Sc, Uc, _ = tf.linalg.svd(fcfc) Ss, Us, _ = tf.linalg.svd(fsfs) ## Uncomment to perform SVD for content/style with np in one call ## This is slower than CPU tf.svd but won't segfault for ill-conditioned matrices # @jit # def np_svd(content, style): # '''tf.py_func helper to run SVD with NumPy for content/style cov tensors''' # Uc, Sc, _ = np.linalg.svd(content) # Us, Ss, _ = np.linalg.svd(style) # return Uc, Sc, Us, Ss # Uc, Sc, Us, Ss = tf.py_func(np_svd, [fcfc, fsfs], [tf.float32, tf.float32, tf.float32, tf.float32]) k_c = tf.reduce_sum(input_tensor=tf.cast(tf.greater(Sc, 1e-5), tf.int32)) k_s = tf.reduce_sum(input_tensor=tf.cast(tf.greater(Ss, 1e-5), tf.int32)) ### Whiten content Dc = tf.linalg.tensor_diag(tf.pow(Sc[:k_c], -0.5)) fc_hat = tf.matmul( tf.matmul(tf.matmul(Uc[:, :k_c], Dc), Uc[:, :k_c], transpose_b=True), fc) # Reshape before passing to style swap, CxH*W -> 1xHxWxC whiten_content = tf.expand_dims( tf.transpose(a=tf.reshape(fc_hat, [Cc, Hc, Wc]), perm=[1, 2, 0]), 0) ### Whiten style before swapping Ds = tf.linalg.tensor_diag(tf.pow(Ss[:k_s], -0.5)) whiten_style = tf.matmul( tf.matmul(tf.matmul(Us[:, :k_s], Ds), Us[:, :k_s], transpose_b=True), fs) # Reshape before passing to style swap, CxH*W -> 1xHxWxC whiten_style = tf.expand_dims( tf.transpose(a=tf.reshape(whiten_style, [Cs, Hs, Ws]), perm=[1, 2, 0]), 0) ### Style swap whitened encodings ss_feature = style_swap(whiten_content, whiten_style, patch_size, stride) # HxWxC -> CxH*W ss_feature = tf.transpose(a=tf.reshape(ss_feature, [Hc * Wc, Cc]), perm=[1, 0]) ### Color style-swapped encoding with style Ds_sq = tf.linalg.tensor_diag(tf.pow(Ss[:k_s], 0.5)) fcs_hat = tf.matmul( tf.matmul(tf.matmul(Us[:, :k_s], Ds_sq), Us[:, :k_s], transpose_b=True), ss_feature) fcs_hat = fcs_hat + ms ### Blend style-swapped & colored encoding with original content encoding blended = alpha * fcs_hat + (1 - alpha) * (fc + mc) # CxH*W -> CxHxW blended = tf.reshape(blended, (Cc, Hc, Wc)) # CxHxW -> 1xHxWxC blended = tf.expand_dims(tf.transpose(a=blended, perm=(1, 2, 0)), 0) return blended
def trainDot(request): logger.info("Entering trainDot...") WW = request.POST.get('W', '') bb = request.POST.get('b', '') if not WW: WW = '0.0' if not bb: bb = '0.0' logger.info('W: %s, b: %s' % (WW, bb)) try: learning_rate = 0.01 training_epochs = 100 display_step = 50 # Training Data vlqs = Axis.objects.values_list('ax', flat=True) train_X = np.asarray(vlqs) vlqs = Axis.objects.values_list('ay', flat=True) train_Y = np.asarray(vlqs) n_samples = train_X.shape[0] X = tf.placeholder("float") Y = tf.placeholder("float") W = tf.Variable(float(WW), name="weight") b = tf.Variable(float(bb), name="bias") # Construct a linear model pred = tf.add(tf.multiply(X, W), b) # Mean squared error #cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples) cost = tf.reduce_mean(tf.pow(pred - Y, 2)) / 2 # Gradient descent # Note, minimize() knows to modify W and b because Variable objects are trainable=True by default optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( cost) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() train_set = {X: train_X, Y: train_Y} # Start training with tf.Session() as sess: # Run the initializer sess.run(init) # Fit all training data for epoch in range(training_epochs): #for (x, y) in zip(train_X, train_Y): # sess.run(optimizer, feed_dict={X: x, Y: y}) sess.run(optimizer, train_set) # Display logs per epoch step if (epoch + 1) % display_step == 0: #c = sess.run(cost, feed_dict={X: train_X, Y:train_Y}) c = sess.run(cost, train_set) logger.info( "Epoch: %04d, cost: %s, W: %s, b: %s" % (epoch + 1, "{:.9f}".format(c), "{:.9f}".format( sess.run(W)), "{:.9f}".format(sess.run(b)))) logger.info("Optimization Finished!") training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) WW = sess.run(W) bb = sess.run(b) logger.info("Training cost: %s, W: %s, b: %s" % (training_cost, WW, bb)) logger.info('Done!') return HttpResponse( json.dumps({ 'code': 'True', 'message': { 'W': str(WW), 'b': str(bb), 'cost': str(training_cost) } })) except Exception as e: logger.error(str(e)) return HttpResponse(json.dumps({'code': 'False', 'message': str(e)}))
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) # Create network. net = DeepLabResNetModel({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output = net.layers['fc1_voc12'] # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last ] all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, images, labels, preds, summary, _ = sess.run( [ reduced_loss, image_batch, label_batch, pred, total_summary, train_op ], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
# Define the variables holding the size_factor and price we set during training. # We initialize them to some random values based on the normal distribution. tf_size_factor = tf.Variable(np.random.randn(), name="size_factor") tf_price_offset = tf.Variable(np.random.randn(), name="price_offset") # 2. Define the operations for the predicting values - predicted price = (size_factor * house_size ) + price_offset # Notice, the use of the tensorflow add and multiply functions. These add the operations to the computation graph, # AND the tensorflow methods understand how to deal with Tensors. Therefore do not try to use numpy or other library # methods. tf_price_pred = tf.add(tf.multiply(tf_size_factor, tf_house_size), tf_price_offset) # 3. Define the Loss Function (how much error) - Mean squared error tf_cost = tf.reduce_sum(tf.pow(tf_price_pred-tf_price, 2))/(2*num_train_samples) # Optimizer learning rate. The size of the steps down the gradient learning_rate = 0.1 # 4. define a Gradient descent optimizer that will minimize the loss defined in the operation "cost". optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(tf_cost) # Initializing the variables init = tf.global_variables_initializer() # Launch the graph in the session with tf.Session() as sess: sess.run(init)
def chi_squared_loss(out, truth_vec, epsilon): "Symmetric version of chi-squared, used for distributions. Ranges 0-2. Mo Akbar also says this one is good." return tf.reduce_mean( tf.reduce_sum(tf.pow(out - truth_vec, 2) / (out + truth_vec + epsilon), axis=-1))
import tensorflow as tf a = tf.constant([2, 2], name="a") b = tf.constant([[3, 4], [2, 0]], name="b") x = tf.add(a, b, name="addition") y = tf.multiply(a, b, name="mul") z = tf.matmul([a], b, name="matmul") pow = tf.pow(x, y, name="power") with tf.Session() as sess: writer = tf.summary.FileWriter("./graphs/sample", sess.graph) pow = sess.run(pow) #p = sess.run(pow) print(pow, x, y) writer.close() """ with tf.device('/gpu:0'): a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name='a') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name='b') c = tf.multiply(a, b) # Creates a session with log_device_placement set to True. sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) # Runs the op. print (sess.run(c)) """
def squared_error_loss(out, truth_vec): return tf.reduce_mean(tf.reduce_sum(tf.pow(truth_vec - out, 2), axis=-1))
train_data_output = np.array(my_output) no_of_inputs = train_data_input.shape[0] input_data = tf.placeholder("float") output_data = tf.placeholder("float") weights = tf.Variable(np.random.randn()) bias = tf.Variable(np.random.randn()) learning_rate = 0.01 epochs = 1000 activation = tf.add(tf.mul(input_data, weights), bias) cost = tf.reduce_sum(tf.pow(activation - output_data, 2)) / (2 * no_of_inputs) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) for each_epoch in range(epochs): for (x, y) in zip(train_data_input, train_data_output): sess.run(train_step, feed_dict={input_data: x, output_data: y}) print("Training completed!") training_cost = sess.run(cost, feed_dict={input_data: train_data_input, output_data: train_data_output}) print("Training cost=", training_cost, "W=", sess.run(weights), "b=", sess.run(bias), '\n')
def euclidean_distance_loss(out, truth_vec): return tf.reduce_mean( tf.sqrt(tf.reduce_sum(tf.pow(truth_vec - out, 2), axis=-1)))
def __init__(self, n_mid, embedding_dim, hidden_size, batch_size, num_interest, seq_len=256, add_pos=True): super(Model_ComiRec_SA, self).__init__(n_mid, embedding_dim, hidden_size, batch_size, seq_len, flag="ComiRec_SA") self.dim = embedding_dim item_list_emb = tf.reshape(self.item_his_eb, [-1, seq_len, embedding_dim]) if add_pos: self.position_embedding = \ tf.get_variable( shape=[1, seq_len, embedding_dim], name='position_embedding') item_list_add_pos = item_list_emb + tf.tile( self.position_embedding, [tf.shape(item_list_emb)[0], 1, 1]) else: item_list_add_pos = item_list_emb num_heads = num_interest with tf.variable_scope("self_atten", reuse=tf.AUTO_REUSE) as scope: item_hidden = tf.layers.dense(item_list_add_pos, hidden_size * 4, activation=tf.nn.tanh) item_att_w = tf.layers.dense(item_hidden, num_heads, activation=None) item_att_w = tf.transpose(item_att_w, [0, 2, 1]) atten_mask = tf.tile(tf.expand_dims(self.mask, axis=1), [1, num_heads, 1]) paddings = tf.ones_like(atten_mask) * (-2**32 + 1) item_att_w = tf.where(tf.equal(atten_mask, 0), paddings, item_att_w) item_att_w = tf.nn.softmax(item_att_w) interest_emb = tf.matmul(item_att_w, item_list_emb) self.user_eb = interest_emb atten = tf.matmul( self.user_eb, tf.reshape(self.item_eb, [get_shape(item_list_emb)[0], self.dim, 1])) atten = tf.nn.softmax( tf.pow(tf.reshape(atten, [get_shape(item_list_emb)[0], num_heads]), 1)) readout = tf.gather( tf.reshape(self.user_eb, [-1, self.dim]), tf.argmax(atten, axis=1, output_type=tf.int32) + tf.range(tf.shape(item_list_emb)[0]) * num_heads) self.build_sampled_softmax_loss(self.item_eb, readout)
def coarseness(image): kmax = tf.constant(5) #image = tf.reduce_mean(image,axis=3) #image = tf.expand_dims(image,-1) image = tf.image.rgb_to_grayscale(image) window1 = np.power(2, 1) kernel1 = tf.ones([window1, window1, 1, 1]) average_gray1 = tf.nn.conv2d(image, kernel1, strides=[1, 1, 1, 1], padding='SAME') kernel_h1 = np.zeros([1, 2 * window1, 1, 1]) kernel_h1[0][0][0][0] = -1 kernel_h1[0][2 * window1 - 1][0][0] = 1 horizon1 = tf.nn.conv2d(average_gray1, kernel_h1, strides=[1, 1, 1, 1], padding='SAME') horizon1 = tf.squeeze(horizon1, [3]) kernel_v1 = np.zeros([2 * window1, 1, 1, 1]) kernel_v1[0][0][0][0] = -1 kernel_v1[2 * window1 - 1][0][0][0] = 1 vertical1 = tf.nn.conv2d(average_gray1, kernel_v1, strides=[1, 1, 1, 1], padding='SAME') vertical1 = tf.squeeze(vertical1, [3]) window2 = np.power(2, 2) kernel2 = tf.ones([window2, window2, 1, 1]) average_gray2 = tf.nn.conv2d(image, kernel2, strides=[1, 1, 1, 1], padding='SAME') kernel_h2 = np.zeros([1, 2 * window2, 1, 1]) kernel_h2[0][0][0][0] = -1 kernel_h2[0][2 * window2 - 1][0][0] = 1 horizon2 = tf.nn.conv2d(average_gray2, kernel_h2, strides=[1, 1, 1, 1], padding='SAME') horizon2 = tf.squeeze(horizon2, [3]) kernel_v2 = np.zeros([2 * window2, 1, 1, 1]) kernel_v2[0][0][0][0] = -1 kernel_v2[2 * window2 - 1][0][0][0] = 1 vertical2 = tf.nn.conv2d(average_gray2, kernel_v2, strides=[1, 1, 1, 1], padding='SAME') vertical2 = tf.squeeze(vertical2, [3]) window3 = np.power(2, 3) kernel3 = tf.ones([window3, window3, 1, 1]) average_gray3 = tf.nn.conv2d(image, kernel3, strides=[1, 1, 1, 1], padding='SAME') kernel_h3 = np.zeros([1, 2 * window3, 1, 1]) kernel_h3[0][0][0][0] = -1 kernel_h3[0][2 * window3 - 1][0][0] = 1 horizon3 = tf.nn.conv2d(average_gray3, kernel_h3, strides=[1, 1, 1, 1], padding='SAME') horizon3 = tf.squeeze(horizon3, [3]) kernel_v3 = np.zeros([2 * window3, 1, 1, 1]) kernel_v3[0][0][0][0] = -1 kernel_v3[2 * window3 - 1][0][0][0] = 1 vertical3 = tf.nn.conv2d(average_gray3, kernel_v3, strides=[1, 1, 1, 1], padding='SAME') vertical3 = tf.squeeze(vertical3, [3]) window4 = np.power(2, 4) kernel4 = tf.ones([window4, window4, 1, 1]) average_gray4 = tf.nn.conv2d(image, kernel4, strides=[1, 1, 1, 1], padding='SAME') kernel_h4 = np.zeros([1, 2 * window4, 1, 1]) kernel_h4[0][0][0][0] = -1 kernel_h4[0][2 * window4 - 1][0][0] = 1 horizon4 = tf.nn.conv2d(average_gray4, kernel_h4, strides=[1, 1, 1, 1], padding='SAME') horizon4 = tf.squeeze(horizon4, [3]) kernel_v4 = np.zeros([2 * window4, 1, 1, 1]) kernel_v4[0][0][0][0] = -1 kernel_v4[2 * window4 - 1][0][0][0] = 1 vertical4 = tf.nn.conv2d(average_gray4, kernel_v4, strides=[1, 1, 1, 1], padding='SAME') vertical4 = tf.squeeze(vertical4, [3]) window5 = np.power(2, 5) kernel5 = tf.ones([window5, window5, 1, 1]) average_gray5 = tf.nn.conv2d(image, kernel5, strides=[1, 1, 1, 1], padding='SAME') kernel_h5 = np.zeros([1, 2 * window5, 1, 1]) kernel_h5[0][0][0][0] = -1 kernel_h5[0][2 * window5 - 1][0][0] = 1 horizon5 = tf.nn.conv2d(average_gray5, kernel_h5, strides=[1, 1, 1, 1], padding='SAME') horizon5 = tf.squeeze(horizon5, [3]) kernel_v5 = np.zeros([2 * window5, 1, 1, 1]) kernel_v5[0][0][0][0] = -1 kernel_v5[2 * window5 - 1][0][0][0] = 1 vertical5 = tf.nn.conv2d(average_gray5, kernel_v5, strides=[1, 1, 1, 1], padding='SAME') vertical5 = tf.squeeze(vertical5, [3]) #return tf.shape(horizon5) horizon = tf.concat([horizon1, horizon2, horizon3, horizon4, horizon5], 0) vertical = tf.concat( [vertical1, vertical2, vertical3, vertical4, vertical5], 0) h_max_index = tf.to_int32(tf.argmax(horizon, 0)) v_max_index = tf.to_int32(tf.argmax(vertical, 0)) h_max = tf.reduce_max(horizon, 0) v_max = tf.reduce_max(vertical, 0) comp = tf.greater(h_max, v_max) Sbest = tf.where(comp, h_max_index, v_max_index) #return tf.shape(Sbest) Sbest = tf.to_float(tf.pow(2, Sbest)) frcs = tf.reduce_mean(Sbest) return frcs
def call(self, item_his_emb, item_eb, t_batch_ph, t_his_batch_ph, mask, ta): t_batch_ph = tf.tile(tf.expand_dims(t_batch_ph, axis=1), [1, self.seq_len]) t_emb_batch = self.time_encoder.get_time_encode(t_batch_ph) t_emb_his_batch = self.time_encoder.get_time_encode(t_his_batch_ph) delta_t_emb_batch = self.time_encoder.get_time_encode( (t_batch_ph - t_his_batch_ph)) time_feature = tf.concat( [t_emb_batch, t_emb_his_batch, delta_t_emb_batch], axis=2) # print_op = tf.print(time_feature) hidden1 = tf.layers.dense(time_feature, 100, activation=tf.nn.relu, name='t_layer1') t_factor = tf.layers.dense(hidden1, 1, activation=tf.nn.sigmoid, name='t_layer2') t_factor = tf.transpose(t_factor, [0, 2, 1]) #g_factor phi = tf.mod((tf.expand_dims(t_his_batch_ph, axis=2) - self.phase), self.tao) / self.tao # gate = 2*phi/self.ratio*tf.sign(tf.maximum(phi - 0.5*self.ratio, 0))\ # +(2-2*phi/self.ratio)*tf.sign(tf.maximum(phi>0.5*self.ratio and phi<self.ratio, 0))\ # +self.alpha*phi*tf.sign(tf.maximum(phi-self.ratio, 0)) gate = tf.where(tf.less(phi, self.ratio), tf.maximum(tf.sin(3.14159 * phi / self.ratio), 0), self.alpha * phi) gate_ratio = tf.multiply( tf.nn.softmax(tf.matmul(item_his_emb, self.gate_embeddings_var), axis=2), gate) g_factor = tf.expand_dims(tf.reduce_sum(gate_ratio, axis=2), axis=1) # with tf.variable_scope('t_layer1',reuse=True): # print_op = tf.print(tf.get_variable('kernel')) reg1 = tf.norm(t_factor) reg2 = tf.norm(self.ratio) + tf.norm(self.phase) / 100000 loss_reg = 0.1 * reg1 + 1 * reg2 + 1 / reg1 + 0.1 / reg2 print_ops = [] # print_ops.append(tf.print(t_his_batch_ph, 't_his')) # print_ops.append(tf.print(g_factor,'g_factor')) # print_ops.append(tf.print(t_factor,'t_factor')) # print_ops.append(tf.print(phi, 'phi')) # print_ops.append(tf.print(self.ratio, 'ratio')) # print_ops.append(tf.print(gate, 'gate')) # print_ops.append(tf.print(self.tao, 'tao')) # print_ops.append(tf.print(self.phase, 'phase')) print_ops.append(tf.print(loss_reg, 'loss_reg')) # print_ops.append(tf.print(tf.norm(t_factor),'t_factor_norm')) # print_ops.append(tf.print(tf.norm(self.ratio),'ratio_norm')) # print_ops.append(tf.print(tf.norm(self.phase)/100000,'phase_norm')) # print_ops.append(tf.print(tf.norm(self.tao)/100000000,'tao_norm')) if ta == 1: loss_reg = 1 * reg1 + 1 / reg1 factor = tf.tile(t_factor, [1, self.num_interest, 1]) if ta == 2: loss_reg = 0.1 * reg1 + 1 * reg2 + 1 / reg1 + 0.1 / reg2 factor = tf.tile(t_factor + g_factor, [1, self.num_interest, 1]) with tf.variable_scope('bilinear'): if self.bilinear_type == 0: item_emb_hat = tf.layers.dense(item_his_emb, self.dim, activation=None, bias_initializer=None) item_emb_hat = tf.tile(item_emb_hat, [1, 1, self.num_interest]) elif self.bilinear_type == 1: item_emb_hat = tf.layers.dense(item_his_emb, self.dim * self.num_interest, activation=None, bias_initializer=None) else: w = tf.get_variable('weights', shape=[ 1, self.seq_len, self.num_interest * self.dim, self.dim ], initializer=tf.random_normal_initializer()) # [N, T, 1, C] u = tf.expand_dims(item_his_emb, axis=2) # [N, T, num_caps * dim_caps] item_emb_hat = tf.reduce_sum(w[:, :self.seq_len, :, :] * u, axis=3) item_emb_hat = tf.reshape( item_emb_hat, [-1, self.seq_len, self.num_interest, self.dim]) item_emb_hat = tf.transpose(item_emb_hat, [0, 2, 1, 3]) item_emb_hat = tf.reshape( item_emb_hat, [-1, self.num_interest, self.seq_len, self.dim]) if self.stop_grad: item_emb_hat_iter = tf.stop_gradient(item_emb_hat, name='item_emb_hat_iter') else: item_emb_hat_iter = item_emb_hat if self.bilinear_type > 0: capsule_weight = tf.stop_gradient( tf.zeros([ get_shape(item_his_emb)[0], self.num_interest, self.seq_len ])) else: capsule_weight = tf.stop_gradient( tf.truncated_normal([ get_shape(item_his_emb)[0], self.num_interest, self.seq_len ], stddev=1.0)) for i in range(3): atten_mask = tf.tile(tf.expand_dims(mask, axis=1), [1, self.num_interest, 1]) paddings = tf.zeros_like(atten_mask) capsule_softmax_weight = tf.nn.softmax(capsule_weight, axis=1) capsule_softmax_weight = tf.where(tf.equal(atten_mask, 0), paddings, capsule_softmax_weight) capsule_softmax_weight = tf.expand_dims(capsule_softmax_weight, 2) if i < 2: interest_capsule = tf.matmul(capsule_softmax_weight, item_emb_hat_iter) cap_norm = tf.reduce_sum(tf.square(interest_capsule), -1, True) scalar_factor = cap_norm / (1 + cap_norm) / tf.sqrt(cap_norm + 1e-9) interest_capsule = scalar_factor * interest_capsule delta_weight = tf.matmul( item_emb_hat_iter, tf.transpose(interest_capsule, [0, 1, 3, 2])) delta_weight = tf.reshape( delta_weight, [-1, self.num_interest, self.seq_len]) if ta: with tf.control_dependencies(print_ops): delta_weight = delta_weight * factor capsule_weight = capsule_weight + delta_weight else: interest_capsule = tf.matmul(capsule_softmax_weight, item_emb_hat) cap_norm = tf.reduce_sum(tf.square(interest_capsule), -1, True) scalar_factor = cap_norm / (1 + cap_norm) / tf.sqrt(cap_norm + 1e-9) interest_capsule = scalar_factor * interest_capsule interest_capsule = tf.reshape(interest_capsule, [-1, self.num_interest, self.dim]) if self.relu_layer: interest_capsule = tf.layers.dense(interest_capsule, self.dim, activation=tf.nn.relu, name='proj') atten = tf.matmul(interest_capsule, tf.reshape(item_eb, [-1, self.dim, 1])) atten = tf.nn.softmax( tf.pow(tf.reshape(atten, [-1, self.num_interest]), 1)) if self.hard_readout: readout = tf.gather( tf.reshape(interest_capsule, [-1, self.dim]), tf.argmax(atten, axis=1, output_type=tf.int32) + tf.range(tf.shape(item_his_emb)[0]) * self.num_interest) else: readout = tf.matmul( tf.reshape(atten, [get_shape(item_his_emb)[0], 1, self.num_interest]), interest_capsule) readout = tf.reshape(readout, [get_shape(item_his_emb)[0], self.dim]) return interest_capsule, readout, loss_reg
def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh( tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
def loss_func(self): with tf.name_scope('cost'): self.x_reconstruct = self.h_d[-1] loss = tf.pow((self.x_reconstruct - self.x_target), 2) self.cost = tf.reduce_mean(loss)
inputs = tf.placeholder('float64', [in_size]) targets = tf.placeholder('float64', [out_size]) hidden_weights = tf.Variable( np.random.uniform(low=0, high=1, size=(num_hidden, in_size))) output_weights = tf.Variable( np.random.uniform(low=0, high=1, size=(out_size, num_hidden))) inputs_prime = tf.transpose(tf.expand_dims(inputs, 1)) # Peform Computation hidden_out = noisy_or_activation(inputs_prime, hidden_weights) output = noisy_and_activation(hidden_out, output_weights) errors = tf.pow(tf.subtract(tf.expand_dims(targets, 1), output), 2.0) error = tf.reduce_sum(errors) train_op = tf.train.GradientDescentOptimizer(0.1).minimize(error) clip_op_hidden = tf.assign(hidden_weights, tf.clip_by_value(hidden_weights, 0, 1)) clip_op_output = tf.assign(output_weights, tf.clip_by_value(output_weights, 0, 1)) model = tf.global_variables_initializer() points, out = generate_expressions(n, 1)[0] data = list(map(lambda x: transform_input(x), points)) with tf.Session() as session: session.run(model)