def disjunction_of_literals(literals, label="no_label"): list_of_literal_tensors = [lit.tensor for lit in literals] literals_tensor = tf.concat(1,list_of_literal_tensors) if default_tnorm == "product": result = 1.0-tf.reduce_prod(1.0-literals_tensor, 1, keep_dims=True) if default_tnorm == "yager2": result = tf.minimum(1.0, tf.sqrt(tf.reduce_sum(tf.square(literals_tensor), 1, keep_dims=True))) if default_tnorm == "luk": print "data aggregator is lukas" result = tf.minimum(1.0, tf.reduce_sum(literals_tensor, 1, keep_dims=True)) PR(result) if default_tnorm == "goedel": result = tf.reduce_max(literals_tensor, 1, keep_dims=True, name=label) if default_aggregator == "product": return tf.reduce_prod(result, keep_dims=True) if default_aggregator == "mean": print "data aggregator is mean" return tf.reduce_mean(result, keep_dims=True, name=label) if default_aggregator == "gmean": return tf.exp(tf.mul(tf.reduce_sum(tf.log(result), keep_dims=True), tf.inv(tf.to_float(tf.size(result)))), name=label) if default_aggregator == "hmean": print "data aggregator is hmean" return tf.div(tf.to_float(tf.size(result)), tf.reduce_sum(tf.inv(result), keep_dims=True)) if default_aggregator == "min": print "data aggregator is min" return tf.reduce_min(result, keep_dims=True, name=label) if default_aggregator == "qmean": print "data aggregator is qmean" return tf.sqrt(tf.reduce_mean(tf.square(result), keep_dims=True), name=label) if default_aggregator == "cmean": print "data aggregator is cmean" return tf.pow(tf.reduce_mean(tf.pow(result, 3), keep_dims=True), tf.inv(tf.to_float(3)), name=label)
def evaluate(self, y, mu, sigma, w): # - y: [sample_id, dim] # - mu: 3d tensor containing the means for the gaussians. # the "depth" dimention (3rd) is used to index the # gaussians. [sample_id, kernel_id, dim] # - sigma: 3d tensor containing the covariance matrix of the # gaussians. [sample_id, kernel_id, dim] for diagonal matrices # - w: vector in form of a 3d tensor containing the weights # for each one of the gaussians, they have to sum one. # [sample_id, kernel_id] if (self.diagonal == True) and (self.method =='tf'): norm_const = tf.inv( tf.sqrt((math.pow(2*np.pi, self.n_dim)) * tf.reduce_prod(sigma, 2))) # shape: [sample_id, kernel_id] sigma_inv = tf.inv( sigma ) # 1/x element-wise, shape: [sample_id, kernel_id, sigma...] x_mu = tf.reshape(y, [-1, 1, self.n_dim]) - mu # shape: [sample_id, kernel_id, x-mu] sigma_inv_x_mu = tf.mul( x_mu, sigma_inv ) gaussians = tf.mul( norm_const, tf.exp( -0.5* tf.reduce_sum( x_mu * sigma_inv_x_mu, 2 ) ) ) #[sample_id, kernel_id] y = tf.reduce_sum( tf.mul( w, gaussians ), 1 ) elif (self.diagonal == True) and (self.method =='tdl'): y,_,_ = tdl.gmm_model(y, w, mu, sigma) # TODO: non-diagonal covariances return y
def tf_normal(y, mu, sigma): oneDivSqrtTwoPI = 1 / math.sqrt(2 * math.pi) result = tf.sub(y, mu) result = tf.transpose(result, [2, 1, 0]) result = tf.mul(result, tf.inv(sigma + 1e-8)) result = -tf.square(result) / 2 result = tf.mul(tf.exp(result), tf.inv(sigma + 1e-8)) * oneDivSqrtTwoPI result = tf.reduce_prod(result, reduction_indices=[0]) return result
def tf_normal(self, y, mu, sigma): norm = 1 / np.sqrt(2*np.pi) ytile = tf.tile(tf.reshape(y,[-1,self.npred,1]),[1,1,self.n_components]) result = tf.sub(ytile, mu) result = tf.mul(result,tf.inv(sigma)) result = -tf.div(tf.square(result),2) result = tf.reduce_sum(result, 1, keep_dims=True) detsigma = tf.reduce_sum(sigma, 1, keep_dims=True) return tf.mul(tf.mul(tf.exp(result),tf.inv(detsigma)),norm)
def loss(self, truth, prediction): y = prediction[0] mu = truth sigma = op.get(prediction[1], self.inputs.actions) # Gaussian log-likelihood result = op.tofloat(y - mu) # Primarily to prevent under/overflow since they are already float16 result = tf.cast(result, 'float32') * tf.inv(sigma) result = -tf.square(result) / 2 result = result + tf.log(tf.inv(sigma)) return tf.reduce_mean(-result)
def __init__(self, train_set, test_set, dictparam, type='elostd'): self.type = type # Define constants self.nb_times = train_set['nb_times'] self.nb_teams = train_set['nb_teams'] first_time = ToolBox.first_time(self.nb_times) timediff = ToolBox.timediff_gen(self.nb_times) # Define meta-parameters self.param = {} for key in dictparam: self.param[key] = tf.Variable(dictparam[key], trainable=False) # Define training and testing set self.train_data = {} self.test_data = {} for key in train_set: self.train_data[key] = tf.Variable(train_set[key], validate_shape=False, trainable=False) for key in test_set: self.test_data[key] = tf.Variable(test_set[key], validate_shape=False, trainable=False) # Define parameters self.elo = tf.Variable(tf.zeros([self.nb_teams, self.nb_times])) # Define the model self.res = {} for key, proxy in [('train', self.train_data), ('test', self.test_data)]: elomatch = tf.matmul(proxy['team_h'] - proxy['team_a'], self.elo) elomatch = tf.reduce_sum(elomatch * proxy['time'], reduction_indices=[1]) elomatch += self.param['bais_ext'] self.res[key] = tf.inv(1. + tf.exp(ELOCONST * elomatch)) # Define the costs self.cost_entropy = {} self.cost_regularized = {} for key, proxy in [('train', self.train_data), ('test', self.test_data)]: costs = [] entropies = proxy['res']*tf.log(self.res[key]+1e-9) + (1-proxy['res'])*tf.log(1-self.res[key]+1e-9) self.cost_entropy[key] = tf.reduce_mean(-entropies) costs.append(self.cost_entropy[key]) cost_rawelo = tf.reduce_mean(tf.square(tf.matmul(self.elo, first_time))) cost_rawelo *= self.param['metaparam1'] * ELOCONST ** 2 cost_rawelo += tf.reduce_mean(tf.square(self.elo)) * self.param['metaparam0'] * ELOCONST ** 2 costs.append(cost_rawelo) if self.nb_times > 1: cost_diffelo = tf.reduce_mean(tf.square(tf.matmul(self.elo, timediff))) cost_diffelo *= self.param['metaparam2'] * ELOCONST ** 2 costs.append(cost_diffelo) self.cost_regularized[key] = tf.add_n(costs) # Define the cost minimization method self.train_step = tf.train.AdamOptimizer(0.1).minimize(self.cost_regularized['train']) # Create the session self.session = tf.Session() self.session.run(tf.initialize_all_variables())
def get_mixture_coef(output): # returns the tf slices containing mdn dist params # ie, eq 18 -> 23 of http://arxiv.org/abs/1308.0850 z = output z_eos = z[:, 0:1] z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split(1, 6, z[:, 1:]) # process output z's into MDN paramters # end of stroke signal z_eos = tf.sigmoid(z_eos) # should be negated, but doesn't matter. # softmax all the pi's: max_pi = tf.reduce_max(z_pi, 1, keep_dims=True) z_pi = tf.sub(z_pi, max_pi) z_pi = tf.exp(z_pi) normalize_pi = tf.inv(tf.reduce_sum(z_pi, 1, keep_dims=True)) z_pi = tf.mul(normalize_pi, z_pi) # exponentiate the sigmas and also make corr between -1 and 1. z_sigma1 = tf.exp(z_sigma1) z_sigma2 = tf.exp(z_sigma2) z_corr = tf.tanh(z_corr) return [z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_eos]
def disjunction_of_literals(literals, label="no_label"): list_of_literal_tensors = [lit.tensor for lit in literals] literals_tensor = tf.concat(list_of_literal_tensors, 1) if default_tnorm == "product": result = 1.0 - tf.reduce_prod(1.0 - literals_tensor, 1, keep_dims=True) if default_tnorm == "yager2": result = tf.minimum( 1.0, tf.sqrt( tf.reduce_sum(tf.square(literals_tensor), 1, keep_dims=True))) if default_tnorm == "luk": result = tf.minimum(1.0, tf.reduce_sum(literals_tensor, 1, keep_dims=True)) if default_tnorm == "goedel": result = tf.reduce_max(literals_tensor, 1, keep_dims=True, name=label) if default_aggregator == "product": return tf.reduce_prod(result, keep_dims=True) if default_aggregator == "mean": return tf.reduce_mean(result, keep_dims=True, name=label) if default_aggregator == "gmean": return tf.exp(tf.mul(tf.reduce_sum(tf.log(result), keep_dims=True), tf.inv(tf.to_float(tf.size(result)))), name=label) if default_aggregator == "hmean": return tf.div(tf.to_float(tf.size(result)), tf.reduce_sum(tf.reciprocal(result), keep_dims=True)) if default_aggregator == "min": return tf.reduce_min(result, keep_dims=True, name=label)
def KernelHyperParameterLearning(iteration, learningRate, trainingX, trainingY): numDataPoints = len(trainingY) numDimension = len(trainingX[0]) # Input and Output Declaration for tensorflow obsX = tf.placeholder(tf.float32, [numDataPoints, numdimension]) obsY = tf.placeholder(tf.float32, [numDataPoints, 1]) # Learning Parameter Variable Declaration for tensorflow theta0 = tf.Variable(1.0) theta1 = tf.Variable(1.0) theta2 = tf.Variable(1.0) theta3 = tf.Variable(1.0) beta = tf.Variable(10.0) # Kernel building matCovarianceLinear = [] for i in range(numDataPoints): for j in range(numDataPoints): kernelEvaluationResult = kernelFunctionWithTensorflow( theta0, theta1, theta2, theta3, tf.slice(obsX, [i, 0], [1, numdimension]), tf.slice(obsX, [j, 0], [1, numDimension])) if i != j: matCovarianceLinear.append(kernelEvaluationResult) if i == j: matCovarianceLinear.append(kernelEvaluationResult + tf.div(1.0, beta)) matCovarianceCombined = tf.pack(matCovarianceLinear) matCovariance = tf.reshape(matCovarianceCombined, [numDataPoints, numDataPoints]) matCovarianceInv = tf.inv(matCovariance)
def tags(features, n_tags, name='tags'): '''Construct a time-varying tag module Parameters ---------- features : tf.Tensor The input features to predict from n_tags : int > 0 The number of output tags name : str A name for this submodule Returns ------- tags : tf.Tensor The prediction output for this module: probability for each tag being active at each time. ''' target_tags = tf.placeholder(tf.bool, shape=[None, None, n_tags], name='output_{:s}'.format(name)) mask_tags = tf.placeholder(tf.bool, shape=[None], name='mask_{:s}'.format(name)) with tf.name_scope(name): z_tag = ops.expand_mask(mask_tags, name='mask_tag') # Make the logits tag_logit = layers.conv2_multilabel(features, n_tags, squeeze_dims=[2], name='tag_module') # Mean-pool the logits over time tag_predict = tf.exp(tag_logit, name='tags_{:s}'.format(name)) # Set up the losses with tf.name_scope('loss'): f_mask = tf.inv(tf.reduce_mean(z_tag)) with tf.name_scope('tag'): tag_loss = tf.reduce_mean( z_tag * tf.nn.sigmoid_cross_entropy_with_logits( tag_logit, tf.to_float(target_tags)), name='loss') tf.add_to_collection('outputs', tag_predict) tf.add_to_collection('inputs', target_tags) tf.add_to_collection('inputs', mask_tags) tf.add_to_collection('loss', tag_loss) tf.scalar_summary('tags/{:s}'.format(name), f_mask * tag_loss) return tag_predict
def predict(x, model_path): """Predicts targets using a batch of predictors and a model trained by the logsitic regression train method Args: x: The covariates or factors of the model in an n by m array (n is number) of data points and m is number of factors model_path: location of the tf model file Raises: TODO Returns: a num data by 1 array of predictions """ num_predictors = len(x[0]) num_data = len(x) x = np.array(x) with tf.Graph().as_default() as _: X = tf.placeholder(tf.float32, [num_data, num_predictors]) W = tf.Variable(tf.zeros([num_predictors, 1])) b = tf.Variable(1.0) saver = tf.train.Saver([W, b]) Predictions = tf.inv(tf.exp( -(tf.matmul(X, W) + b) ) + 1) with tf.Session() as sess: saver.restore(sess, model_path) predictions = sess.run([Predictions], feed_dict={X:x}) return predictions
def sparse_dropout(x, keep_prob, noise_shape): """Dropout for sparse tensors.""" random_tensor = keep_prob random_tensor += tf.random_uniform(noise_shape) dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool) pre_out = tf.sparse_retain(x, dropout_mask) return pre_out * tf.inv(keep_prob)
def predict(x, model_path): """Predicts targets using a batch of predictors and a model trained by the logsitic regression train method Args: x: The covariates or factors of the model in an n by m array (n is number) of data points and m is number of factors model_path: location of the tf model file Raises: TODO Returns: a num data by 1 array of predictions """ num_predictors = len(x[0]) num_data = len(x) x = np.array(x) with tf.Graph().as_default() as _: X = tf.placeholder(tf.float32, [num_data, num_predictors]) W = tf.Variable(tf.zeros([num_predictors, 1])) b = tf.Variable(1.0) saver = tf.train.Saver([W, b]) Predictions = tf.inv(tf.exp(-(tf.matmul(X, W) + b)) + 1) with tf.Session() as sess: saver.restore(sess, model_path) predictions = sess.run([Predictions], feed_dict={X: x}) return predictions
def photoAugmentation(source, target, mean): """ Includes contrast and brightness, color channel and gamma change, adding additive gaussian noise """ num_batch = source.get_shape()[0].value height = source.get_shape()[1].value width = source.get_shape()[2].value photo_source_list = [] photo_target_list = [] for batch_idx in xrange(num_batch): img0 = source[batch_idx,:,:,:] img1 = target[batch_idx,:,:,:] # Contrast and brightness change contrast = tf.random_uniform([], minval=-0.3, maxval=0.3) contrast = contrast + 1.0 bright_sigma = 0.2 # tf.random_uniform([], minval=0.0, maxval=0.2) brightnessImage = tf.random_normal([height,width,3], mean=0.0, stddev=bright_sigma, dtype=tf.float32) img0_contrast = tf.add(tf.scalar_mul(contrast, img0), brightnessImage) img1_contrast = tf.add(tf.scalar_mul(contrast, img1), brightnessImage) # Color change, may be bad for unsupervised learning color_change_B = tf.random_uniform([], minval=0.9, maxval=1.1) color_change_G = tf.random_uniform([], minval=0.9, maxval=1.1) color_change_R = tf.random_uniform([], minval=0.9, maxval=1.1) img0_color_B = tf.scalar_mul(color_change_B, img0_contrast[:,:,0]) img0_color_G = tf.scalar_mul(color_change_G, img0_contrast[:,:,1]) img0_color_R = tf.scalar_mul(color_change_R, img0_contrast[:,:,2]) img0_color = tf.pack([img0_color_B, img0_color_G, img0_color_R], axis=2) img1_color_B = tf.scalar_mul(color_change_B, img1_contrast[:,:,0]) img1_color_G = tf.scalar_mul(color_change_G, img1_contrast[:,:,1]) img1_color_R = tf.scalar_mul(color_change_R, img1_contrast[:,:,2]) img1_color = tf.pack([img1_color_B, img1_color_G, img1_color_R], axis=2) img0_color = tf.clip_by_value(img0_color, 0.0, 1.0) img1_color = tf.clip_by_value(img1_color, 0.0, 1.0) # Gamma gamma = tf.random_uniform([], minval=0.7, maxval=1.5) gamma_inv = tf.inv(gamma) img0_gamma = tf.pow(img0_color, gamma_inv) img1_gamma = tf.pow(img1_color, gamma_inv) # Additive gaussian noise sigma = tf.random_uniform([], minval=0.0, maxval=0.04) noiseImage = tf.random_normal([height,width,3], mean=0.0, stddev=sigma, dtype=tf.float32) img0_noise = tf.add(img0_gamma, noiseImage) img1_noise = tf.add(img1_gamma, noiseImage) # Subtract mean img0_mean = tf.sub(img0_noise, tf.truediv(mean, 255.0)) img1_mean = tf.sub(img1_noise, tf.truediv(mean, 255.0)) photo_source_list.append(img0_mean) photo_target_list.append(img1_mean) return tf.pack(photo_source_list, axis=0), tf.pack(photo_target_list, axis=0)
def psnr_loss(inference_tensor, reference_tensor, name="loss_layer"): with tf.name_scope(name) as scope: l2 = tf.square(inference_tensor - reference_tensor, name='l2_difference') MSE = tf.reduce_mean(l2, name='MSE') # MSE = tf.nn.l2_loss(inference_tensor - reference_tensor, name='MSE') loss = tf.neg(tf.log(tf.inv(tf.sqrt(MSE + FLAGS.eps))), name='psnr') tf.add_to_collection('losses', loss) return loss
def tags(features, n_tags, name='tags'): '''Construct a time-varying tag module Parameters ---------- features : tf.Tensor The input features to predict from n_tags : int > 0 The number of output tags name : str A name for this submodule Returns ------- tags : tf.Tensor The prediction output for this module: probability for each tag being active at each time. ''' target_tags = tf.placeholder(tf.bool, shape=[None, None, n_tags], name='output_{:s}'.format(name)) mask_tags = tf.placeholder(tf.bool, shape=[None], name='mask_{:s}'.format(name)) with tf.name_scope(name): z_tag = ops.expand_mask(mask_tags, name='mask_tag') # Make the logits tag_logit = layers.conv2_multilabel(features, n_tags, squeeze_dims=[2], name='tag_module') # Mean-pool the logits over time tag_predict = tf.exp(tag_logit, name='tags_{:s}'.format(name)) # Set up the losses with tf.name_scope('loss'): f_mask = tf.inv(tf.reduce_mean(z_tag)) with tf.name_scope('tag'): tag_loss = tf.reduce_mean(z_tag * tf.nn.sigmoid_cross_entropy_with_logits(tag_logit, tf.to_float(target_tags)), name='loss') tf.add_to_collection('outputs', tag_predict) tf.add_to_collection('inputs', target_tags) tf.add_to_collection('inputs', mask_tags) tf.add_to_collection('loss', tag_loss) tf.scalar_summary('tags/{:s}'.format(name), f_mask * tag_loss) return tag_predict
def mstep(X, resp, min_covar=MIN_COVAR_DEFAULT): weights = tf.reduce_sum(resp, 1) # K invweights = tf.expand_dims(tf.inv(weights + 10 * EPS), 1) # Kx1 alphas = EPS + weights / (tf.reduce_sum(weights) + 10 * EPS) # K weighted_cluster_sum = tf.matmul(resp, X) # KxD mus = weighted_cluster_sum * invweights avg_X2 = tf.matmul(resp, tf.square(X)) * invweights avg_mu2 = tf.square(mus) avg_X_mu = mus * weighted_cluster_sum * invweights sigmas = avg_X2 - 2 * avg_X_mu + avg_mu2 + min_covar return mus, sigmas, alphas
def inv_model_spec(y): # construct inverse pass for sampling shape = final_latent_dimension z = tf.reshape(y, [-1, shape[1], shape[2], shape[3]]) y = None for layer in reversed(layers): y,z = layer.backward(y,z) # inverse logit x = tf.inv(1 + tf.exp(-y)) return x
def mstep(X, resp, min_covar=MIN_COVAR_DEFAULT): weights = tf.reduce_sum(resp, 1) # K invweights = tf.expand_dims(tf.inv(weights + 10 * EPS), 1) # Kx1 alphas = EPS + weights / (tf.reduce_sum(weights) + 10 * EPS) # K weighted_cluster_sum = tf.matmul(resp, X) # KxD mus = weighted_cluster_sum * invweights avg_X2 = tf.matmul(resp, tf.square(X)) * invweights avg_mu2 = tf.square(mus) avg_X_mu = mus * weighted_cluster_sum * invweights sigmas = avg_X2 - 2 * avg_X_mu + avg_mu2 + min_covar # (x - mu) (x-mu)^T for banded. return mus, sigmas, alphas
def moments(x, axes, name=None): with tf.op_scope([x, axes], name, "moments"): x = tf.convert_to_tensor(x, name="x") divisor = tf.constant(1.0) for d in xrange(len(x.get_shape())): if d in axes: divisor *= tf.to_float(tf.shape(x)[d]) divisor = tf.inv(divisor, name="divisor") axes = tf.constant(axes, name="axes") mean = tf.mul(tf.reduce_sum(x, axes), divisor, name="mean") var = tf.mul(tf.reduce_sum(tf.square(x - mean), axes), divisor, name="variance") return mean, var
def dropout(x, dropout_prob, seed=None, name=None): with tf.variable_scope(name or 'Dropout'): if isinstance(dropout,float) and not 0<dropout_prob<=1: raise ValueError("dropout probability must be a scalar tensor or a value in " "range (0,1]") x = tf.convert_to_tensor(x) dropout_prob = tf.convert_to_tensor(dropout_prob,dtype=x.dtype) random_tensor = tf.random_uniform(x.get_shape(),minval=0,maxval=1,dtype=x.dtype,seed=seed) binary_tensor = tf.floor(random_tensor+dropout_prob) ret = x * tf.inv(dropout_prob) * binary_tensor ret.set_shape(x.get_shape()) return ret
def main(argv=None): imagenet = evaluater.ImageNet() # input variable with tf.variable_scope('input') as scope: v = tf.get_variable('input', shape=(96, 96, 3), initializer=tf.random_uniform_initializer(0.0, 1.0)) # per_image_whitening without relu image = tf.mul(tf.clip_by_value(v, 0.0, 1.0), 255.5) mean, variance = tf.nn.moments(image, [0, 1, 2]) pixels = tf.reduce_prod(tf.shape(image)) stddev = tf.sqrt(tf.maximum(variance, 0)) image = tf.sub(image, mean) image = tf.div(image, tf.maximum( stddev, tf.inv(tf.sqrt(tf.cast(pixels, tf.float32))))) # loss and train inputs = tf.expand_dims(image, 0) filename = 'generated-%03d.jpg' % TARGET_CLASS output_image = tf.image.convert_image_dtype(v, tf.uint8, saturate=True) eval_image_path = os.path.join(OUTPUT_DIR, filename) logits = imagenet.inference(eval_image_path) logits_v = tf.Variable(logits) #logits = r.inference(inputs, FLAGS.num_classes) softmax = tf.nn.softmax(logits_v) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits_v, [TARGET_CLASS]) train_op = tf.train.AdamOptimizer().minimize(losses, var_list=[v]) # variable_averages = tf.train.ExponentialMovingAverage( # r.MOVING_AVERAGE_DECAY) # variables_to_restore = {} # for key, value in variable_averages.variables_to_restore().items(): # if not key.startswith('input'): # variables_to_restore[key] = value # saver = tf.train.Saver(variables_to_restore) # checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir) with tf.Session() as sess: sess.run(tf.initialize_local_variables()) #saver.restore(sess, checkpoint) for step in range(1000): print(step) with open(eval_image_path, 'wb') as f: f.write(sess.run(tf.image.encode_jpeg( output_image, quality=100, chroma_downsampling=False))) _, loss_value, softmax_value = sess.run( [train_op, losses, softmax]) print('%04d - loss: %f (%f)' % (step, loss_value[0], softmax_value.flatten().tolist()[TARGET_CLASS]))
def predict(self, obs_X, train_X, train_Y, n_data): ''' 새로운 데이터포인트(점)에 대한 Mean and covariance of P(t_new|T_train) Multivariate normal distribution 의 conditional breakdown theorem을 이용해서 전체 covariance 중 새로 update 되는 부분만 구하면됨 new_cov: new data point x_new 와 x_train 내의 전체 data의 kernel vector self.cov: x_train data 포인트 간의 kernel matrix μ_t_new = (new_cov.T) × (self.cov.inverse) × (train_Y) cov_t_new = (variance of x_new) - (new_cov.T) × (self.cov.inverse) × (new.cov) - Gaussian Process를 사용한 Regression 목적이라면 μ_t_new 가 정답~ ''' numDimension = obs_X.shape[1] # len(X[0]) numData = n_data new_cov = [] obs_X = tf.cast(obs_X, tf.float32) train_X = tf.cast(train_X, tf.float32) train_Y = tf.cast(train_Y, tf.float32) q_Y = tf.add(tf.scalar_mul(2.0, train_Y), -1.0) for i in range(tf.shape(self.cov)[0]): kernel_output = self.kernel(obs_X, tf.slice(train_X, [i, 0], [1, numDimension])) new_cov.append(kernel_output) new_cov = tf.reshape(tf.stack(new_cov), [1, numData]) pred_mu = tf.matmul(tf.matmul(new_cov, tf.inv(self.cov)), train_Y) pred_sigma = tf.sub(tf.reshape(self.kernel(obs_X, obs_X), [1, 1]), tf.matmul(tf.matmul(new_cov, tf.inv(self.cov)), tf.transpose(new_cov))) probit = self.probit(pred_mu, pred_sigma) if probit[0] >= 0.5: pred_class = 1 else: pred_class = -1 return pred_class
def get_mixture_coef(output): # out_pi=tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name='mixparam') # out_sigma=tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name='mixparam') # out_mu=tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name='mixparam') out_pi, out_sigma, out_mu = tf.split(1, 3, output) max_pi = tf.reduce_max(out_pi, 1, keep_dims=True) out_pi = tf.sub(out_pi, max_pi) out_pi = tf.exp(out_pi) normalize_pi = tf.inv(tf.reduce_sum(out_pi, 1, keep_dims=True)) out_pi = tf.mul(normalize_pi, out_pi) out_sigma = tf.exp(out_sigma) return out_pi, out_sigma, out_mu
def drop_layer(x, keep_prob, seed=None, name=None): """Computes dropout. With probability `keep_prob`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected sum is unchanged. Args: x: A tensor. keep_prob: A scalar `Tensor` with the same type as x. The probability that each element is kept. noise_shape: A 1-D `Tensor` of type `int32`, representing the shape for randomly generated keep/drop flags. seed: A Python integer. Used to create random seeds. See [`set_random_seed`](../../api_docs/python/constant_op.md#set_random_seed) for behavior. name: A name for this operation (optional). Returns: A Tensor of the same shape of `x`. Raises: ValueError: If `keep_prob` is not in `(0, 1]`. """ with tf.op_scope([x], name, "drop_layer") as name: x = tf.convert_to_tensor(x, name="x") if isinstance(keep_prob, float) and not 0 < keep_prob <= 1: raise ValueError("keep_prob must be a scalar tensor or a float in the " "range (0, 1], got %g" % keep_prob) keep_prob = tf.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob") keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar()) noise_shape = [ tf.shape(x)[0], 1 ] # uniform [keep_prob, 1.0 + keep_prob) random_tensor = keep_prob random_tensor += tf.random_uniform( noise_shape, seed=seed, dtype=x.dtype ) # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) binary_tensor = tf.floor(random_tensor) ret = x * tf.inv(keep_prob) * binary_tensor ret.set_shape(x.get_shape()) return ret
def get_mixture_coef(output, KMIX=24, OUTPUTDIM=1): out_pi = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam") out_sigma = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam") out_mu = tf.placeholder(dtype=tf.float32, shape=[None,KMIX*OUTPUTDIM], name="mixparam") splits = tf.split(1, 2 + OUTPUTDIM, output) out_pi = splits[0] out_sigma = splits[1] out_mu = tf.pack(splits[2:], axis=2) out_mu = tf.transpose(out_mu, [1,0,2]) # use softmax to normalize pi into prob distribution max_pi = tf.reduce_max(out_pi, 1, keep_dims=True) out_pi = tf.sub(out_pi, max_pi) out_pi = tf.exp(out_pi) normalize_pi = tf.inv(tf.reduce_sum(out_pi, 1, keep_dims=True)) out_pi = tf.mul(normalize_pi, out_pi) # use exponential to make sure sigma is positive out_sigma = tf.exp(out_sigma) return out_pi, out_sigma, out_mu
def get_mixture_coef(output): out_pi = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam") out_sigma = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam") out_mu = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam") out_pi, out_sigma, out_mu = tf.split(1, 3, output) max_pi = tf.reduce_max(out_pi, 1, keep_dims=True) out_pi = tf.sub(out_pi, max_pi) out_pi = tf.exp(out_pi) normalize_pi = tf.inv(tf.reduce_sum(out_pi, 1, keep_dims=True)) out_pi = tf.mul(normalize_pi, out_pi) out_sigma = tf.exp(out_sigma) return out_pi, out_sigma, out_mu
def _call(self, inputs): x = inputs norm_x = tf.nn.l2_normalize(x, axis=1) norm_support = tf.nn.l2_normalize(self.support, axis=0) norm_mix = tf.cross(norm_x, norm_support) norm_mix = norm_mix*tf.inv(tf.reduce_sum(norm_mix)) sampledIndex = tf.multinomial(tf.log(norm_mix), self.rank) new_support = dot(self.support,tf.diag(norm_mix),sparse=True) # dropout if self.sparse_inputs: x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero) else: x = tf.nn.dropout(x, 1-self.dropout) # convolve # supports = list() # for i in range(len(self.support)): # if not self.featureless: # pre_sup = dot(x, self.vars['weights_' + str(i)], # sparse=self.sparse_inputs) # else: # pre_sup = self.vars['weights_' + str(i)] # support = dot(self.support[i], pre_sup, sparse=True) # supports.append(support) # output = tf.add_n(supports) if not self.featureless: pre_sup = dot(x, self.vars['weights_0'], sparse=self.sparse_inputs) else: pre_sup = self.vars['weights_0'] output = dot(new_support, pre_sup, sparse=True) # bias if self.bias: output += self.vars['bias'] return self.act(output)
def get_mixture_params(self, output): pi = tf.placeholder(dtype=tf.float32, shape=[None,self.n_components]) sigma = tf.placeholder(dtype=tf.float32, shape=[None,self.npred, self.n_components]) mu = tf.placeholder(dtype=tf.float32, shape=[None,self.npred, self.n_components]) pi_ = tf.slice(output, [0,0], [-1,self.n_components]) sigma_ = tf.slice(output, [0,self.n_components], [-1,self.npred*self.n_components]) mu_ = tf.slice(output, [0,self.n_components*(1+self.npred)], [-1,self.npred*self.n_components]) sigma_ = tf.reshape(sigma_, [-1, self.npred, self.n_components]) mu = tf.reshape(mu_, [-1, self.npred, self.n_components]) max_pi = tf.reduce_max(pi_, 1, keep_dims=True) sub_pi = tf.exp(tf.sub(pi_, max_pi)) norm_pi = tf.inv(tf.reduce_sum(sub_pi, 1, keep_dims=True)) pi = tf.mul(norm_pi, sub_pi) sigma = tf.exp(sigma_) return pi, sigma, mu
def tf_log_normals(X, mus, sigmas): # p(X) = sqrt(a * b * c) # a = (2 pi)^(-p) # b = det(sigma)^(-1) # c = exp(-(x - mu)^T sigma^(-1) (x - mu)) [expanded for numerical stability] # # Below we make simplifications since sigma is diag D = tf_ncols(mus) XT = tf.transpose(X) # pxN invsig = tf.inv(sigmas) loga = -tf.cast(D, 'float64') * tf.log(tf.constant(2 * np.pi, dtype='float64')) # scalar logb = tf.reduce_sum(tf.log(invsig), 1, keep_dims=True) # Kx1 logc = \ - tf.reduce_sum(invsig * tf.square(mus), 1, keep_dims=True) \ + 2 * tf.matmul(invsig * mus, XT) \ - tf.matmul(invsig, tf.square(XT)) # KxN return 0.5 * (loga + logb + logc)
def write_layer(self, input_tensor, time, attention=True): # generates 'write' layer, shape depending on the 'attention' parameter # parameter : # input_tensor : input tensor # time : current timestamp # attention : boolean for attention # returns : # result tensor of write layer if attention: out_shape = [ self.mini_batch_size, self.N, self.N, self.image_shape[2] ] out_dim = out_shape[1] * out_shape[2] * out_shape[3] else: out_shape = self.batch_image_shape out_dim = self.input_dim w_t = tf.reshape( self.single_linear([self.dec_size, out_dim], input_tensor, time, scope_name="write"), out_shape) if attention: gamma, filter_x, filter_y = self.attention_extract(time, "write") gamma = tf.reshape(tf.inv(gamma), [self.mini_batch_size, 1, 1]) filtered_tensors = [tf.batch_matmul(tf.batch_matmul(tf.transpose(filter_y, perm=[0, 2, 1]), tf.squeeze(tensor)), filter_x) \ for tensor in tf.split(3, self.image_shape[2], w_t)] gamma_mul_tensors = [ tf.reshape(tensor * gamma, [ self.mini_batch_size, self.image_shape[0], self.image_shape[1], 1 ]) for tensor in filtered_tensors ] return tf.concat(3, gamma_mul_tensors) else: return w_t
def get_mixture_coef(output): # returns the tf slices containing mdn dist params # ie, eq 18 -> 23 of http://arxiv.org/abs/1308.0850 z = output z_pen = z[:, 0:3] # end of stroke, end of character/content, continue w/ stroke z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split(1, 6, z[:, 3:]) # process output z's into MDN paramters # softmax all the pi's: max_pi = tf.reduce_max(z_pi, 1, keep_dims=True) z_pi = tf.sub(z_pi, max_pi) z_pi = tf.exp(z_pi) normalize_pi = tf.inv(tf.reduce_sum(z_pi, 1, keep_dims=True)) z_pi = tf.mul(normalize_pi, z_pi) # exponentiate the sigmas and also make corr between -1 and 1. z_sigma1 = tf.exp(z_sigma1) z_sigma2 = tf.exp(z_sigma2) z_corr = tf.tanh(z_corr) return [z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen]
def __init__(self, label, clauses, save_path=""): print "defining the knowledge base", label self.label = label self.clauses = clauses self.parameters = [par for cl in self.clauses for par in cl.parameters] if not self.clauses: self.tensor = tf.constant(1.0) else: clauses_value_tensor = tf.concat(0, [cl.tensor for cl in clauses]) if default_clauses_aggregator == "min": print "clauses aggregator is min" self.tensor = tf.reduce_min(clauses_value_tensor) if default_clauses_aggregator == "mean": print "clauses aggregator is mean" self.tensor = tf.reduce_mean(clauses_value_tensor) if default_clauses_aggregator == "hmean": print "clauses aggregator is hmean" self.tensor = tf.div( tf.to_float(tf.size(clauses_value_tensor)), tf.reduce_sum(tf.inv(clauses_value_tensor), keep_dims=True)) if default_clauses_aggregator == "wmean": print "clauses aggregator is weighted mean" weights_tensor = tf.constant([cl.weight for cl in clauses]) self.tensor = tf.div( tf.reduce_sum(tf.mul(weights_tensor, clauses_value_tensor)), tf.reduce_sum(weights_tensor)) if default_positive_fact_penality != 0: self.loss = smooth(self.parameters) + \ tf.mul(default_positive_fact_penality, self.penalize_positive_facts()) - \ PR(self.tensor) else: self.loss = smooth(self.parameters) - PR(self.tensor) self.save_path = save_path self.train_op = train_op(self.loss, default_optimizer) self.saver = tf.train.Saver(max_to_keep=20) print "knowledge base", label, "is defined"
def __init__(self, train_set, test_set, dictparam): super(Elostd, self).__init__(train_set, test_set, dictparam) # Define parameters self.elo = tf.Variable(tf.zeros([self.nb_teams, self.nb_times])) # Define the model for key, proxy in [('train', self.train_data), ('test', self.test_data)]: elomatch = ToolBox.get_elomatch(proxy['team_h'] - proxy['team_a'], proxy['time'], self.elo) elomatch += self.param['bais_ext'] self.res[key] = tf.inv(1. + tf.exp(-elomatch)) # Define the costs self.init_cost() for key in ['train', 'test']: cost = ToolBox.get_raw_elo_cost(self.param['metaparam0'], self.param['metaparam1'], self.elo, self.nb_times) self.regulizer[key].append(cost) cost = ToolBox.get_timediff_elo_cost(self.param['metaparam2'], self.elo, self.nb_times) self.regulizer[key].append(cost) # Finish the initialization super(Elostd, self).finish_init()
def gaussian2d(x, y, cx, cy, a, b, dtype = tf.float32): """ This cunction calcuate sum of N 2D Gaussian probability density functions in m points y, x : m x n 2D tensor. Position of calculation points m is number of calculation points n is number of Gaussian functions cx, cy, a, b : m x n 2D tensor. Parameters of Gaussian function cx and cy are center position a and b are the width in x and y firection """ # A = 1/(2*pi*a*b) A = tf.inv(tf.mul(tf.constant(2.0*np.pi, dtype), tf.mul(a, b))) # powerX = (x-xc)^2 / (2*a^2) powerX = tf.truediv(tf.pow(tf.sub(x, cx) , tf.constant(2.0, dtype)), tf.mul(tf.constant(2.0, dtype),tf.pow(a, tf.constant(2.0, dtype)))) # powerY = (y-yc)^2 / (2*b^2) powerY = tf.truediv(tf.pow(tf.sub(y, cy) , tf.constant(2.0, dtype)), tf.mul(tf.constant(2.0, dtype),tf.pow(a, tf.constant(2.0, dtype)))) # p = A*exp(- powerX - powerY) standard 2D Gaussian distribution probability = tf.reduce_sum( tf.mul(A, tf.exp(tf.neg(tf.add(powerX, powerY)))), 1) return probability
def chord(features, name='chord'): '''Construct the submodule for chord estimation Parameters ---------- features : tf.Tensor The input tensor to the module name : str The name of this subgraph Returns ------- pitches, root, bass : tf.Tensor Prediction nodes for pitches, root, and bass pitches are n-by-time-by-12, encoding the probability that each pitch class is active. root and bass are n-by-time-by-13, encoding the distribution over pitches, including an additional `N` coordinate for unpitched predictions. ''' # Set up the targets target_pc = tf.placeholder(tf.bool, shape=[None, None, 12], name='output_pitches') target_root = tf.placeholder(tf.bool, shape=[None, None, 13], name='output_root') target_bass = tf.placeholder(tf.bool, shape=[None, None, 13], name='output_bass') mask_chord = tf.placeholder(tf.bool, shape=[None], name='mask_chord') with tf.name_scope(name): z_chord = ops.expand_mask(mask_chord, name='mask_chord') pitch_logit = layers.conv2_multilabel(features, 12, squeeze_dims=[2], name='pitches_module') root_logit = layers.conv2_softmax(features, 13, squeeze_dims=[2], name='root_module') bass_logit = layers.conv2_softmax(features, 13, squeeze_dims=[2], name='bass_module') pitches = tf.exp(pitch_logit, name='pitches') root = tf.exp(root_logit, name='root') bass = tf.exp(bass_logit, name='bass') # Set up the losses with tf.name_scope('loss'): f_mask = tf.inv(tf.reduce_mean(z_chord)) with tf.name_scope('pitches'): pc_loss = tf.reduce_mean(z_chord * tf.nn.sigmoid_cross_entropy_with_logits(pitch_logit, tf.to_float(target_pc)), name='loss') with tf.name_scope('root'): root_loss = tf.reduce_mean(z_chord * ops.ndxent(root_logit, tf.to_float(target_root), [2]), name='loss') with tf.name_scope('bass'): bass_loss = tf.reduce_mean(z_chord * ops.ndxent(bass_logit, tf.to_float(target_bass), [2]), name='loss') tf.add_to_collection('outputs', pitches) tf.add_to_collection('outputs', root) tf.add_to_collection('outputs', bass) tf.add_to_collection('inputs', target_pc) tf.add_to_collection('inputs', target_root) tf.add_to_collection('inputs', target_bass) tf.add_to_collection('inputs', mask_chord) tf.add_to_collection('loss', pc_loss) tf.add_to_collection('loss', root_loss) tf.add_to_collection('loss', bass_loss) tf.scalar_summary('{:s}/pitches'.format(name), f_mask * pc_loss) tf.scalar_summary('{:s}/root'.format(name), f_mask * root_loss) tf.scalar_summary('{:s}/bass'.format(name), f_mask * bass_loss) return pitches, root, bass
sess.close() a = tf.constant(3) b = tf.constant(2) c = tf.constant(-1) d = tf.constant([1.2,4.3,2.9]) tf.add(a,b)#求和 tf.subtract(a,b)#减法 tf.multiply(a,b)#乘法 tf.div(a,b)#除法 tf.mod(a,b)#取模 tf.abs(c)#求绝对值 tf.negative(a)#取负 tf.sign(a)#返回符号 tf.inv(a)#取反 tf.square(a)#计算平方 tf.round(d)#舍入最接近的整数 tf.sqrt(a)#开方 tf.pow(a,b)#a的b次方 tf.exp(a)#e的a次方 tf.log(a)#一次输入是以e为底a的对数,两次输入是以第二个为底 tf.maximum(a,b)#返回最大值 tf.minimum(a,b)#返回最小值 tf.cos(a)#三角函数cos #数据类型转换 e = tf.constant("abcde") tf.string_to_number(e)#字符串转换为数字
def train(x, y, **kwargs): """Implements stochastic gradient decent on logistic regression as seen in Stanford 229 (http://cs229.stanford.edu/notes/cs229-notes1.pdf) Args: x: The covariates or factors of the model in an n by m array (n is number) of data points and m is number of factors y: The targets or labels of the model in an n by 1 array kwargs: model_path: the location where the tf model file should be saved iterations: The number of steps to train batch_size: The number of samples to use per step verbosity_step: The number of steps between each printout of the cost of the model (negative for no printouts) step_size: The distance we step down the gradient each step seed: the seed for choosing our batches (0 if no seed) Raises: TODO Returns: A (Weights, Bias) tuple """ # extract the kwargs model_path = kwargs.get("model_path", "") iterations = kwargs.get("iterations", 100) batch_size = kwargs.get("batch_size", 10) verbosity_step = kwargs.get("verbosity_step", 20) step_size = kwargs.get("step_size", 10e-1) seed = kwargs.get("seed", 0) if seed: np.random.seed(seed) num_predictors = len(x[0]) x = np.array(x) y = np.array(y) with tf.Graph().as_default() as _: X = tf.placeholder(tf.float32, [batch_size, num_predictors]) Y = tf.placeholder(tf.float32, [batch_size, 1]) W = tf.Variable(tf.zeros([num_predictors, 1])) b = tf.Variable(1.0) saver = tf.train.Saver([W, b]) logit = tf.inv(tf.exp( -(tf.matmul(X, W) + b) ) + 1) cost = tf.reduce_sum(tf.square(logit - Y)) train_step = tf.train.GradientDescentOptimizer(step_size).minimize(cost) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) for i in range(iterations): sample_indexes = np.random.choice(len(y), batch_size) sample_xs = x[sample_indexes] sample_ys = y[sample_indexes] weights, bais, batch_cost, _ = sess.run( [W, b, cost, train_step], feed_dict={X:sample_xs, Y:sample_ys}) if i % verbosity_step == 0: print(batch_cost) if model_path: saver.save(sess, model_path) Parameters = namedtuple("Parameters", ["Weights", "Biases"]) return Parameters(weights, bais)
def __init__(self, config): """Hyperparameters""" num_layers = config['num_layers'] hidden_size = config['hidden_size'] max_grad_norm = config['max_grad_norm'] batch_size = config['batch_size'] sl = config['sl'] mixtures = config['mixtures'] crd = config['crd'] learning_rate = config['learning_rate'] MDN = config['MDN'] self.sl = sl self.crd = crd self.batch_size = batch_size # Nodes for the input variables self.x = tf.placeholder( "float", shape=[batch_size, crd, sl], name='Input_data') self.y_ = tf.placeholder(tf.int64, shape=[batch_size], name='Ground_truth') self.keep_prob = tf.placeholder("float") with tf.name_scope("LSTM") as scope: cell = tf.nn.rnn_cell.LSTMCell(hidden_size, use_peepholes=True) cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers) cell = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=self.keep_prob) # Initial state initial_state = cell.zero_state(batch_size, tf.float32) inputs = tf.unpack(self.x, axis=2) # outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32) outputs, _ = tf.nn.rnn(cell, inputs, initial_state=initial_state) # outputs = [] # self.states = [] # state = initial_state # for time_step in range(sl): # if time_step > 0: tf.get_variable_scope().reuse_variables() # (cell_output, state) = cell(self.x[:, :, time_step], state) # outputs.append(cell_output) # self.states.append(state) # self.final_state = state # outputs is now a list of length seq_len with tensors [ batch_size by # hidden_size ] with tf.name_scope("SoftMax") as scope: final = outputs[-1] W_c = tf.Variable(tf.random_normal([hidden_size, 2], stddev=0.01)) b_c = tf.Variable(tf.constant(0.1, shape=[2])) self.h_c = tf.matmul(final, W_c) + b_c loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.h_c, self.y_) self.cost = tf.reduce_mean(loss) loss_summ = tf.scalar_summary("cross entropy_loss", self.cost) with tf.name_scope("Output_MDN") as scope: params = 8 # 7+theta # Two for distribution over hit&miss, params for distribution parameters output_units = mixtures * params W_o = tf.Variable(tf.random_normal( [hidden_size, output_units], stddev=0.01)) b_o = tf.Variable(tf.constant(0.5, shape=[output_units])) # For comparison with XYZ, only up to last time_step # --> because for final time_step you cannot make a prediction outputs_tensor = tf.concat(0, outputs[:-1]) # is of size [batch_size*(seq_len-1) by output_units] h_out_tensor = tf.nn.xw_plus_b(outputs_tensor, W_o, b_o) with tf.name_scope('MDN_over_next_vector') as scope: # Next two lines are rather ugly, But its the most efficient way to # reshape the data h_xyz = tf.reshape(h_out_tensor, (batch_size, sl - 1, output_units)) # transpose to [batch_size, output_units, sl-1] h_xyz = tf.transpose(h_xyz, [0, 2, 1]) # x_next = tf.slice(x,[0,0,1],[batch_size,3,sl-1]) #in size [batch_size, # output_units, sl-1] x_next = tf.sub(self.x[:, :3, 1:], self.x[:, :3, :sl - 1]) # From here any, many variables have size [batch_size, mixtures, sl-1] xn1, xn2, xn3 = tf.split(1, 3, x_next) self.mu1, self.mu2, self.mu3, self.s1, self.s2, self.s3, self.rho, self.theta = tf.split( 1, params, h_xyz) # make the theta mixtures # softmax all the theta's: max_theta = tf.reduce_max(self.theta, 1, keep_dims=True) self.theta = tf.sub(self.theta, max_theta) self.theta = tf.exp(self.theta) normalize_theta = tf.inv(tf.reduce_sum(self.theta, 1, keep_dims=True)) self.theta = tf.mul(normalize_theta, self.theta) # Deviances are non-negative and tho between -1 and 1 self.s1 = tf.exp(self.s1) self.s2 = tf.exp(self.s2) self.s3 = tf.exp(self.s3) self.rho = tf.tanh(self.rho) # probability in x1x2 plane px1x2 = tf_2d_normal(xn1, xn2, self.mu1, self.mu2, self.s1, self.s2, self.rho) px3 = tf_1d_normal(xn3, self.mu3, self.s3) px1x2x3 = tf.mul(px1x2, px3) # Sum along the mixtures in dimension 1 px1x2x3_mixed = tf.reduce_sum(tf.mul(px1x2x3, self.theta), 1) print('You are using %.0f mixtures' % mixtures) # at the beginning, some errors are exactly zero. loss_seq = -tf.log(tf.maximum(px1x2x3_mixed, 1e-20)) self.cost_seq = tf.reduce_mean(loss_seq) self.cost_comb = self.cost if MDN: # The magic line where both heads come together. self.cost_comb += self.cost_seq with tf.name_scope("train") as scope: tvars = tf.trainable_variables() # We clip the gradients to prevent explosion grads = tf.gradients(self.cost_comb, tvars) grads, _ = tf.clip_by_global_norm(grads, 0.5) # Some decay on the learning rate global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay( learning_rate, global_step, 14000, 0.95, staircase=True) optimizer = tf.train.AdamOptimizer(lr) gradients = zip(grads, tvars) self.train_step = optimizer.apply_gradients( gradients, global_step=global_step) # The following block plots for every trainable variable # - Histogram of the entries of the Tensor # - Histogram of the gradient over the Tensor # - Histogram of the grradient-norm over the Tensor self.numel = tf.constant([[0]]) for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient self.numel += tf.reduce_sum(tf.size(variable)) # # h1 = tf.histogram_summary(variable.name, variable) # h2 = tf.histogram_summary(variable.name + "/gradients", grad_values) # h3 = tf.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values])) with tf.name_scope("Evaluating_accuracy") as scope: correct_prediction = tf.equal(tf.argmax(self.h_c, 1), self.y_) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) accuracy_summary = tf.scalar_summary("accuracy", self.accuracy) # Define one op to call all summaries self.merged = tf.merge_all_summaries()
def approximate_conv_jitter_multigpu_complex(n_cells, lam_w, window, stride, step_sz, tc_mean, su_channels, config_params, stim_downsample_window, stim_downsample_stride): ## Sets up the entire graph and summary ops. # Stimulus is first smoothened to lower dimensions # using convolution with w_stimlr and max pooling. # Followed by approximate convolutional architecture and poisson spiking. # An "approximate convolutional model" one where weights in # each convolutional window is sum of a common component (wmother) and # subunit specific modification (wdeltai)(wi = wmother+deltawi) ## Build a configuration specifying multi-GPU and multi-replicas. config = DeploymentConfig.parse(config_params) print(config) ## Start building the graph with tf.device(config.variables_device()): global_step = tf.contrib.framework.create_global_step() ## Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): opt = tf.train.AdagradOptimizer(step_sz) ## Make stimulus and response placeholders with tf.device(config.inputs_device()): d1 = 640 d2 = 320 colors = 3 stim_cpu = tf.placeholder(tf.float32, shape=[None, d1, d2, colors], name='stim_cpu') resp_cpu = tf.placeholder(tf.float32, shape=[None, n_cells], name='resp_cpu') ## Set up variables with tf.device(config.variables_device()): ## Convert stimulus into lower resolution _, dimx_lr, dimy_lr, _ = get_windows(stim_downsample_window, stim_downsample_stride, n_channels=1) w_stim_lr = tf.Variable(np.array( 0.05 + 0 * np.random.randn(2 * stim_downsample_window + 1, 2 * stim_downsample_window + 1, 3, 1), dtype='float32'), name="w_stim_lr") print('dimx_lr %d, dimy_lr %d' % (dimx_lr, dimy_lr)) # max pooling _, dimx_maxpool, dimy_maxpool, _ = get_windows(FLAGS.window_maxpool, FLAGS.stride_maxpool, n_channels=1, d1=dimx_lr, d2=dimy_lr) print('dimx_maxpool %d, dimy_maxpool %d' % (dimx_maxpool, dimy_maxpool)) ## Set parameters for "almost convolutional model" # get window locations mask_tf, dimx, dimy, n_pix = get_windows(window, stride, n_channels=1, d1=dimx_maxpool, d2=dimy_maxpool) print('dimx %d, dimy %d' % (dimx, dimy)) # mother subunit w_mother = tf.Variable(np.array( 0.05 + 0 * np.random.randn(2 * window + 1, 2 * window + 1, 1, 1), dtype='float32'), name='w_mother') # subunit specific modifications to each window w_del = tf.Variable(np.array(0.001 * np.random.randn(dimx, dimy, n_pix), dtype='float32'), name='w_del') #w_del = tf.constant(np.array( 0 * np.random.randn(dimx, dimy, n_pix), # dtype='float32'), name='w_del') # weights from each subunit to each cell a = tf.Variable(np.array(1 * np.random.rand(dimx * dimy, n_cells), dtype='float32'), name='a') # biases for each subunit and each cell bias_su = tf.Variable(np.array(0.00001 * np.random.rand(dimx, dimy), dtype='float32'), name="bias_su") bias_cell = tf.Variable(np.array(0.00001 * np.random.rand(n_cells), dtype='float32'), name="bias_cells") # time course derived from STA time_course = tf.constant(np.array(tc_mean, dtype='float32')) # make summary op for each parameter vars_lst = variables_lr(w_mother, w_del, a, w_stim_lr, bias_su, bias_cell, time_course) for ivar in [w_mother, w_del, a, w_stim_lr]: tf.histogram_summary(ivar.name, ivar) ## Compute which subunits will be connected to which cell su_cell_mask = get_su_cell_overlap(n_cells, window, stride, stim_downsample_window, stim_downsample_stride) su_cell_mask_tf = tf.constant(np.array(su_cell_mask, dtype='float32')) # add projection op # Mixed norm (L2/L1) projection for W_del for block sparsity v_norm = tf.sqrt(tf.reduce_sum(tf.pow(w_del, 2), 2)) # if v_norm is 0, the it gives NaN scale = tf.clip_by_value(1 - lam_w * FLAGS.step_sz * tf.inv(v_norm), 0, float('inf')) w_del_old = w_del # proj_wdel = tf.assign(w_del, tf.transpose(tf.mul(tf.transpose(w_del, (2, 0, 1)), scale), (1, 2, 0))) # mixed L2/L1 norm on w_del # proj_ops = [proj_wdel] # probe_ops = [v_norm, scale, w_del, w_del_old] # proximal step for L1 for sparsity in a #a_new = tf.nn.relu(a - FLAGS.lam_a) - tf.nn.relu(a - FLAGS.lam_a) #proj_a = tf.assign(a, a_new) #proj_ops = [proj_a] #probe_ops = [a] #proj_op_list=[] #for icell in np.arange(n_cells): ## old code ## proj_op = l1_projection_tf.Project(a, 0, tf.constant(float(50)), tf.constant(0.01)) ## proj_op_list.append(proj_op) ##proj_ops = [tf.group(*proj_op_list)] #a_proj = tf.nn.relu(l1_projection_tf.Project(a, 0, tf.constant(float(FLAGS.rad_a)), tf.constant(0.01))) #a_proj_assign = tf.assign(a, a_proj) #proj_ops = a_proj_assign #probe_ops = [] # if a is not passed through SFM, then make sure to keep a positive proj_ops = [] if not (FLAGS.if_a_sfm): a_new = tf.nn.relu(a) proj_a_positive = tf.assign(a, a_new) proj_ops += [proj_a_positive] print('projections happening') # project a to have support determined by su_cell_mask if not (FLAGS.if_a_sfm): a_proj_support = tf.assign(a, tf.mul(a, su_cell_mask_tf)) else: a_proj_support = tf.assign(a, (tf.mul(a, su_cell_mask_tf) - 40 * (1 - su_cell_mask_tf))) proj_ops += [a_proj_support] probe_ops = [a] print("a support is fixed") # make sure b_cell is non-negative bias_cell_new = tf.nn.relu(bias_cell) bias_cell_proj = tf.assign(bias_cell, bias_cell_new) proj_ops += [bias_cell_proj] print('Number of projection ops are: %d' % len(proj_ops)) print("projection op made") ## Set up identical model on each tower (GPU) (based on user configuration) # to convert stimulus into firing rate across cell tower_fn = build_model_complex tower_args = (n_cells, lam_w, tc_mean, window, stride, stim_downsample_window, stim_downsample_stride, step_sz, su_channels, stim_cpu, resp_cpu, vars_lst, config.num_towers) model_combined = deploy(config, tower_fn, optimizer=opt, args=tower_args) train_step = model_combined.train_op #opt.minimize(loss, var_list=vars_fit, global_step=tf.contrib.framework.get_global_step()) global probe_nodes probe_ops.append(probe_nodes) ## compute stimulus to maximize output of a particular unit # Merge all the summary writer ops into one op # (this way, calling one op stores all summaries) #merged_summary = tf.merge_all_summaries() summary_op = model_combined.summary_op dims = dimensions(dimx, dimy) dims_slr = dimensions_stimlr(dimx_lr, dimy_lr) return model2(train_step, summary_op, vars_lst, dims, dims_slr, model_combined.total_loss, proj_ops, probe_ops), stim_cpu, resp_cpu, global_step
def tf_normal(y, mu, sigma): result = tf.sub(y, mu) result = tf.mul(result,tf.inv(sigma)) result = -tf.square(result)/2 return tf.mul(tf.exp(result),tf.inv(sigma))*oneDivSqrtTwoPI
def _mini_batch_training_op(self, inputs, cluster_idx_list, cluster_centers, cluster_centers_var, total_counts): """Creates an op for training for mini batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor of cluster centers, possibly normalized. cluster_centers_var: Tensor Ref of cluster centers. total_counts: Tensor Ref of cluster counts. Returns: An op for doing an update of mini-batch k-means. """ update_ops = [] for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp): assert total_counts is not None cluster_idx = tf.reshape(cluster_idx, [-1]) # Dedupe the unique ids of cluster_centers being updated so that updates # can be locally aggregated. unique_ids, unique_idx = tf.unique(cluster_idx) num_unique_cluster_idx = tf.size(unique_ids) # Fetch the old values of counts and cluster_centers. with ops.colocate_with(total_counts): old_counts = tf.gather(total_counts, unique_ids) with ops.colocate_with(cluster_centers): old_cluster_centers = tf.gather(cluster_centers, unique_ids) # Locally aggregate the increment to counts. count_updates = tf.unsorted_segment_sum( tf.ones_like(unique_idx, dtype=total_counts.dtype), unique_idx, num_unique_cluster_idx) # Locally compute the sum of inputs mapped to each id. # For a cluster with old cluster value x, old count n, and with data # d_1,...d_k newly assigned to it, we recompute the new value as # x += (sum_i(d_i) - k * x) / (n + k). # Compute sum_i(d_i), see comment above. cluster_center_updates = tf.unsorted_segment_sum( inp, unique_idx, num_unique_cluster_idx) # Shape to enable broadcasting count_updates and learning_rate to inp. # It extends the shape with 1's to match the rank of inp. broadcast_shape = tf.concat( 0, [tf.reshape(num_unique_cluster_idx, [1]), tf.ones(tf.reshape(tf.rank(inp) - 1, [1]), dtype=tf.int32)]) # Subtract k * x, see comment above. cluster_center_updates -= tf.cast( tf.reshape(count_updates, broadcast_shape), inp.dtype) * old_cluster_centers learning_rate = tf.inv(tf.cast(old_counts + count_updates, inp.dtype)) learning_rate = tf.reshape(learning_rate, broadcast_shape) # scale by 1 / (n + k), see comment above. cluster_center_updates *= learning_rate # Apply the updates. update_counts = tf.scatter_add( total_counts, unique_ids, count_updates) update_cluster_centers = tf.scatter_add( cluster_centers_var, unique_ids, cluster_center_updates) update_ops.extend([update_counts, update_cluster_centers]) return tf.group(*update_ops)
def test_Inv(self): t = tf.inv(self.random(4, 3)) self.check(t)
def __init__(self, dim, args, infer=False): self.dim = dim self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) if (infer == False and args.keep_prob < 1): # training mode cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=args.keep_prob) self.cell = cell self.input_data = tf.placeholder( dtype=tf.float32, shape=[None, args.seq_length, self.dim]) self.target_data = tf.placeholder( dtype=tf.float32, shape=[None, args.seq_length, self.dim]) self.initial_state = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) self.num_mixture = args.num_mixture NOUT = self.num_mixture * (1 + 2 * self.dim) # prob + mu + sig # [prob 1-20, dim1 mu, dim1 sig, dim2,... ] with tf.variable_scope('rnnlm'): output_w = tf.get_variable("output_w", [args.rnn_size, NOUT]) output_b = tf.get_variable("output_b", [NOUT]) inputs = tf.split(1, args.seq_length, self.input_data) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) output = tf.nn.xw_plus_b(output, output_w, output_b) self.final_state = states # reshape target data so that it is compatible with prediction shape flat_target_data = tf.reshape(self.target_data, [-1, self.dim]) #[x1_data, x2_data, eos_data] = tf.split(1, 3, flat_target_data) x_data = flat_target_data def tf_normal(x, mu, sig): return tf.exp(-tf.square(x - mu) / (2 * tf.square(sig))) / (sig * tf.sqrt(2 * np.pi)) #def tf_multi_normal(x, mu, sig, ang): # use n (n+1) / 2 to parametrize covariance matrix # 1. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.31.494&rep=rep1&type=pdf # 2. https://en.wikipedia.org/wiki/Triangular_matrix # 3. https://makarandtapaswi.wordpress.com/2011/07/08/cholesky-decomposition-for-matrix-inversion/ # A = LL' by 1 # det(L) = prod of diagonals by 2 # det(A) = det(L)^2 by 3 # A-1 = (L-1)'(L-1) by 3 # We're parametrizing using L^-1 # Sigma^-1 = (L^-1)'(L^-1) # |Sigma| = 1 / det(L^-1)^2 = 1 / (diagonal product of L^-1)^2 #return tf.exp(-tf.square(x - mu) / (2 * tf.square(sig + 0.01))) / ((sig + 0.01) * tf.sqrt(2 * np.pi)) # z_mu, z_sig, x_data [batch_size x mixture], z_pi [batch_size x mixture] def get_lossfunc(z_pi, z_mu, z_sig, x_data): result0 = tf_normal(x_data, z_mu, z_sig) result1 = tf.reduce_sum(result0 * z_pi, 1, keep_dims=True) result2 = -tf.log(tf.maximum(result1, 1e-20)) return tf.reduce_sum(result2) self.pi = output[:, 0:self.num_mixture] max_pi = tf.reduce_max(self.pi, 1, keep_dims=True) self.pi = tf.exp(tf.sub(self.pi, max_pi)) normalize_pi = tf.inv(tf.reduce_sum(self.pi, 1, keep_dims=True)) self.pi = normalize_pi * self.pi output_each_dim = tf.split(1, self.dim, output[:, self.num_mixture:]) self.mu = [] self.sig = [] self.cost = 0 for i in range(self.dim): [o_mu, o_sig] = tf.split(1, 2, output_each_dim[i]) o_sig = tf.exp(o_sig) + args.sig_epsilon self.mu.append(o_mu) self.sig.append(o_sig) lossfunc = get_lossfunc(self.pi, o_mu, o_sig, x_data[:, i:i + 1]) self.cost += lossfunc / (args.batch_size * args.seq_length * self.dim) self.mu = tf.concat(1, self.mu) self.sig = tf.concat(1, self.sig) self.loss_summary = tf.scalar_summary("loss", self.cost) self.summary = tf.merge_all_summaries() self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, label, clauses, save_path=""): print "defining the knowledge base", label self.label = label self.clauses = clauses self.parameters = [par for cl in self.clauses for par in cl.parameters] if not self.clauses: self.tensor = tf.constant(1.0) else: clauses_value_tensor = tf.concat(0, [cl.tensor for cl in clauses]) if default_clauses_aggregator == "min": print "clauses aggregator is min" self.tensor = tf.reduce_min(clauses_value_tensor) if default_clauses_aggregator == "mean": print "clauses aggregator is mean" self.tensor = tf.reduce_mean(clauses_value_tensor) if default_clauses_aggregator == "hmean": print "clauses aggregator is hmean" self.tensor = tf.div(tf.to_float(tf.size(clauses_value_tensor)), tf.reduce_sum(tf.inv(clauses_value_tensor), keep_dims=True)) if default_clauses_aggregator == "wmean": print "clauses aggregator is weighted mean" weights_tensor = tf.constant([cl.weight for cl in clauses]) self.tensor = tf.div(tf.reduce_sum(tf.mul(weights_tensor, clauses_value_tensor)), tf.reduce_sum(weights_tensor)) if default_positive_fact_penality != 0: self.loss = smooth(self.parameters) + \ tf.mul(default_positive_fact_penality, self.penalize_positive_facts()) - \ PR(self.tensor) else: self.loss = smooth(self.parameters) - PR(self.tensor) self.save_path = save_path self.train_op = train_op(self.loss, default_optimizer) self.saver = tf.train.Saver(max_to_keep=20) print "knowledge base", label, "is defined"
def __init__(self, args, infer=False): self.dim = 1 self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) if (infer == False and args.keep_prob < 1): # training mode cell = rnn_cell.DropoutWrapper(cell, output_keep_prob = args.keep_prob) self.cell = cell self.input_data = tf.placeholder(dtype=tf.float32, shape=[None, args.seq_length, self.dim]) self.target_data = tf.placeholder(dtype=tf.float32, shape=[None, args.seq_length, self.dim]) self.initial_state = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) self.num_mixture = args.num_mixture NOUT = self.num_mixture * (1 + 2 * self.dim) # prob + mu + sig # [prob 1-20, dim1 mu, dim1 sig, dim2,... ] with tf.variable_scope('rnnlm'): output_w = tf.get_variable("output_w", [args.rnn_size, NOUT]) output_b = tf.get_variable("output_b", [NOUT]) self.w = output_w inputs = tf.split(1, args.seq_length, self.input_data) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) output = tf.nn.xw_plus_b(output, output_w, output_b) self.final_state = states # reshape target data so that it is compatible with prediction shape flat_target_data = tf.reshape(self.target_data,[-1, self.dim]) #[x1_data, x2_data, eos_data] = tf.split(1, 3, flat_target_data) x_data = flat_target_data def tf_normal(x, mu, sig): return tf.exp(-tf.square(x - mu) / (2 * tf.square(sig))) / (sig * tf.sqrt(2 * np.pi)) def get_lossfunc(z_pi, z_mu, z_sig, x_data): result0 = tf_normal(x_data, z_mu, z_sig) result1 = tf.reduce_sum(result0 * z_pi, 1, keep_dims=True) result2 = -tf.log(tf.maximum(result1, 1e-20)) return tf.reduce_sum(result2) self.pi = output[:, 0:self.num_mixture] max_pi = tf.reduce_max(self.pi, 1, keep_dims=True) self.pi = tf.exp(tf.sub(self.pi, max_pi)) normalize_pi = tf.inv(tf.reduce_sum(self.pi, 1, keep_dims=True)) self.pi = normalize_pi * self.pi output_each_dim = tf.split(1, self.dim, output[:, self.num_mixture:]) self.mu = [] self.sig = [] self.cost = 0 for i in range(self.dim): [o_mu, o_sig] = tf.split(1, 2, output_each_dim[i]) o_sig = tf.exp(o_sig) self.mu.append(o_mu) self.sig.append(o_sig) lossfunc = get_lossfunc(self.pi, o_mu, o_sig, x_data[:,i:i+1]) self.cost += lossfunc / (args.batch_size * args.seq_length * self.dim) self.mu = tf.concat(1, self.mu) self.sig = tf.concat(1, self.sig) self.loss_summary = tf.scalar_summary("loss", self.cost) self.summary = tf.merge_all_summaries() self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def test_Inv(self): if td._tf_version[:2] <= (0, 11): t = tf.inv(self.random(4, 3)) self.check(t)
def gaussian(x, mean, std): result = tf.sub(x, mean) result = tf.mul(result,tf.inv(std)) result = tf.exp(-tf.square(result)/2) return tf.mul(result,tf.inv(std*tf.sqrt(math.pi * 2)))
def _capped_sqrt_grad(op, grad): y = op.outputs[0] # y = x^(1/2) # Cap the gradient. return grad * tf.select(tf.less(y, 0.0001), tf.zeros_like(y) + 50.0, (.5 * tf.inv(y)))
# The input data data = [Row(x=[float(x), float(2 * x)], key=str(x % 2)) for x in range(1, 6)] df = sqlContext.createDataFrame(data) df = tfs.analyze(sqlContext.createDataFrame(data)) # The geometric mean: # TODO(tjh) make a test out of this, it found some bugs # - non numeric columns (string) # - unused columns # - output that has a child col_name = "x" col_key = "key" with tf.Graph().as_default() as g: x = tfs.block(df, col_name) invs = tf.inv(tf.to_double(x), name="invs") df2 = tfs.map_blocks([invs, tf.ones_like(invs, name="count")], df) # The geometric mean gb = df2.select(col_key, "invs", "count").groupBy("key") with tf.Graph().as_default() as g: x_input = tfs.block(df2, "invs", tf_name="invs_input") count_input = tfs.block(df2, "invs", tf_name="count_input") x = tf.reduce_sum(x_input, [0], name='invs') count = tf.reduce_sum(count_input, [0], name='count') df3 = tfs.aggregate([x, count], gb) with tf.Graph().as_default() as g: invs = tfs.block(df2, "invs") count = tfs.block(df2, "count")