def jaccard_coef(y_true, y_pred): intersection = tf.sum(y_true * y_pred, axis=[0, -1, -2]) sum_ = tf.sum(y_true + y_pred, axis=[0, -1, -2]) jac = (intersection + smooth) / (sum_ - intersection + smooth) return tf.mean(jac)
def cost(self, X, Y, XXM, YYM, batch_sz=None, num_steps=None, lam=0.0005): ''' Returns loss X - source indice Y - target indice Note that number of batch size is not fixed per update''' if batch_sz is None: batch_sz = tf.shape(Y)[0] if num_steps is None: num_steps = self.TT preds = self.fp(X, XXM, batch_sz, \ num_esteps=num_steps, num_dsteps=num_steps) preds = tf.transpose(preds, perm=[1, 0, 2]) ## Measured based on perplexity - measures how surprised the network ## is to see the next character in a sequence. py = preds.reshape((batch_sz * num_steps, self.D)) Y_len = tf.cast(tf.sum(YYM, 1), 'float32') cost = -tf.log(py)[tf.arange(batch_sz * num_steps), Y.flatten()] * YYM.flatten() cost = cost.reshape((batch_sz, num_steps)) / Y_len.dimshuffle(0, 'x') cost = tf.exp(tf.sum(cost, axis=1)) cost = tf.sum(cost) / tf.cast(batch_sz, 'float32') #l2_loss = tf.add_n([tf.nn.l2_loss(v) \ # for v in tf.trainable_variables()]) with tf.variable_scope('summary'): tf.histogram_summary("prediction_error", preds) tf.scalar_summary("Cost", cost) self.summarize = tf.merge_all_summaries() return cost #+ lam * l2_loss
def focal_loss1(y_true, y_pred): gamma = 2.0 alpha = 0.25 pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) return -tf.sum(alpha * tf.pow(1. - pt_1, gamma) * tf.log(pt_1)) \ - tf.sum((1 - alpha) * tf.pow(pt_0, gamma) * tf.log(1. - pt_0))
def __call__(self, x): regularization = 0. if self.l1: regularization += tf.sum(self.l1 * tf.abs(x)) if self.l2: regularization += tf.sum(self.l2 * tf.square(x)) return regularization
def iou_score(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True, threshold=None): ''' 参数: gt: ground truth 4D keras tensor (B, H, W, C) pr: prediction 4D keras tensor (B, H, W, C) class_weights: 1. or list of class weights, len(weights) = C smooth: value to avoid division by zero per_image: if ``True``, metric is calculated as mean over images in batch (B), else over whole batch threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction prediction will not be round 返回: IoU/Jaccard score in range [0, 1] ''' if per_image: axes = [1, 2] else: axes = [0, 1, 2] if threshold is not None: pr = tf.greater(pr, threshold) pr = tf.cast(pr, dtype=tf.float32) intersection = tf.sum(gt * pr, axis=axes) union = tf.sum(gt + pr, axis=axes) - intersection iou = (intersection + smooth) / (union + smooth) # mean per image if per_image: iou = tf.mean(iou, axis=0) # weighted mean per class iou = tf.mean(iou * class_weights) return iou
def jaccard_coef_int(y_true, y_pred): y_pred_pos = tf.round(K.clip(y_pred, 0, 1)) intersection = tf.sum(y_true * y_pred_pos, axis=[0, -1, -2]) sum_ = tf.sum(y_true + y_pred, axis=[0, -1, -2]) jac = (intersection + smooth) / (sum_ - intersection + smooth) return tf.mean(jac)
def single_class_dice_coefficient(y_true, y_pred, axis=(0, 1, 2), epsilon=0.00001): """ Compute dice coefficient for single class. Args: y_true (Tensorflow tensor): tensor of ground truth values for single class. shape: (x_dim, y_dim, z_dim) y_pred (Tensorflow tensor): tensor of predictions for single class. shape: (x_dim, y_dim, z_dim) axis (tuple): spatial axes to sum over when computing numerator and denominator of dice coefficient. Hint: pass this as the 'axis' argument to the K.sum function. epsilon (float): small constant added to numerator and denominator to avoid divide by 0 errors. Returns: dice_coefficient (float): computed value of dice coefficient. """ dice_numerator = 2. * tf.sum(y_true * y_pred, axis=axis) + epsilon dice_denominator = tf.sum(y_true, axis=axis) + tf.sum(y_pred, axis=axis) + epsilon dice_coefficient = (dice_numerator) / (dice_denominator) return dice_coefficient
def diceCoeff(label, logit, smooth=1.0): intersection = tf.sum(label * logit) score = (2. * intersection + smooth) / (tf.sum(label) + tf.sum(logit) + smooth) return score
def focal_loss_fixed(y_true, y_pred, gamma=2., alpha=.25): pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) pt_1 = tf.clip(pt_1, 1e-3, .999) pt_0 = tf.clip(pt_0, 1e-3, .999) return -tf.sum(alpha * tf.pow(1. - pt_1, gamma) * tf.log(pt_1)) - tf.sum( (1 - alpha) * tf.pow(pt_0, gamma) * tf.log(1. - pt_0))
def loss_(y_true, y_pred): # scale predictions so that the class probas of each sample sum to 1 y_pred /= tf.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = tf.clip(y_pred, tf.epsilon(), 1 - tf.epsilon()) # calc loss = y_true * tf.log(y_pred) * weights loss = -tf.sum(loss, -1) return loss
def f_score(gt, pr, class_weights=1, beta=1, smooth=SMOOTH, per_image=True, threshold=None): # F_score(Dice系数)可以解释为精确度和召回率的加权平均值, # 其中F-score在1时达到其最佳值,在0时达到最差分数。 # 精确率和召回率对F1-score的相对影响是一样的,公式表示为: # $F_\beta(precision, recall) = (1 + \beta^2) \frac{precision \cdot recall} # {\beta^2 \cdot precision + recall}$ # 公式还有另外一种表达形式: # $F_\beta(A, B) = \frac{(1 + \beta^2) TP} {(1 + \beta^2) TP + \beta^2 FN + FP}$ # 其中 TP表示ture positive # FP表示fasle positive # FN表示false negtive # 参数: # gt: ground truth 4D keras tensor (B, H, W, C) # pr: prediction 4D keras tensor (B, H, W, C) # class_weights: 1. or list of class weights, len(weights) = C # beta: f-score coefficient # smooth: value to avoid division by zero # per_image: if ``True``, metric is calculated as mean over images in batch (B), # else over whole batch # threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction prediction will not be round # 返回: # [0, 1]区间内的F-score if per_image: axes = [1, 2] else: axes = [0, 1, 2] if threshold is not None: pr = tf.greater(pr, threshold) pr = tf.cast(pr, dtype=tf.float32) tp = tf.sum(gt * pr, axis=axes) fp = tf.sum(pr, axis=axes) - tp fn = tf.sum(gt, axis=axes) - tp score = ((1 + beta ** 2) * tp + smooth) \ / ((1 + beta ** 2) * tp + beta ** 2 * fn + fp + smooth) # mean per image if per_image: score = tf.mean(score, axis=0) # weighted mean per class score = tf.mean(score * class_weights) return score
def recall(y_true, y_pred): """Recall metric. Computes the recall over the whole batch using threshold_value. """ threshold_value = threshold # Adaptation of the "round()" used before to get the predictions. Clipping to make sure that the predicted raw values are between 0 and 1. y_pred = tf.cast(tf.greater(tf.clip(y_pred, 0, 1), threshold_value), tf.floatx()) # Compute the number of true positives. Rounding in prevention to make sure we have an integer. true_positives = tf.round(tf.sum(tf.clip(y_true * y_pred, 0, 1))) # Compute the number of positive targets. possible_positives = tf.sum(tf.clip(y_true, 0, 1)) recall_ratio = true_positives / (possible_positives + tf.epsilon()) return recall_ratio
def error_func(pred_image, lab_images): x_1 = pred_image[:, :, :, 1] x_2 = pred_image[:, :, :, 2] y_1 = lab_images[:, :, :, 1] y_2 = lab_images[:, :, :, 2] x_1 = np.array(x_1) x_1 = np.array(x_1) x_1 = np.array(x_1) x_1 = np.array(x_2) z1 = tf.reduce_mean(tf.sum(x_1 * x_2)) z2 = tf.reduce_mean(tf.sum(y_1 * y_2)) z = (-1) * (z2) * tf.log(z1) return z
def __call__(self, y_true, y_pred): y_true_val = y_true[:, :, 0] mask = y_true[:, :, 1] # masked per-sample means of each loss num_items_masked = tf.sum(mask, axis=-1) + 1e-6 masked_cross_entropy = ( tf.sum(mask * tf.sparse_categorical_crossentropy(y_true_val, y_pred), axis=-1) / num_items_masked) masked_entropy = ( tf.sum(mask * -tf.sum(y_pred * tf.log(y_pred), axis=-1), axis=-1) / num_items_masked) return masked_cross_entropy - self.penalty_weight * masked_entropy
def call(self, unknown, known, unknow_feats, known_feats, training=True): """ :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features :param known: (B, m, 3) tensor of the xyz positions of the known features :param unknow_feats: (B, C1, n) tensor of the features to be propigated to :param known_feats: (B, C2, m) tensor of features to be propigated :return: new_features: (B, mlp[-1], n) tensor of the features of the unknown features """ if known is not None: dist, idx = pointnet2_utils.three_nn_gpu(unknown, known) dist_recip = 1.0 / (dist + 1e-8) norm = tf.sum(dist_recip, axis=2, keepdim=True) weight = dist_recip / norm interpolated_feats = pointnet2_utils.three_interpolate_gpu( known_feats, idx, weight) else: interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1)) if unknow_feats is not None: new_features = tf.concat([interpolated_feats, unknow_feats], axis=1) # (B, C2 + C1, n) else: new_features = interpolated_feats new_features = tf.expand_dims(new_features, axis=-1) new_features = self.mlp(new_features, training=training) return tf.squeeze(new_features, axis=-1)
def __call__(self, x): for depth in range(4): x = Conv2D(filters=64 * (depth + 1), kernel_size=5, padding='same', data_format='channels_last', kernel_initializer='glorot_uniform')(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(2, 2), data_format='channels_last')(x) x = Dropout(rate=self.dropout)(x) # finally, we get shape=(samples, feat_len//2^4, seq_len//2^4, 256) # global max-pooling x = GlobalAveragePooling2D(data_format='channels_last')(x) # finally, we get (samples, 256) tensors output1 = Dense(units=ASC_CLASS, kernel_initializer='uniform', activation='softmax', name='activation')(x) x = CollisionEncoder()(x) y = Lambda(lambda x: tf.reverse(x, axis=[0]))(x) z = Lambda(lambda x: tf.multiply(x[0], x[1]))([x, y]) output2 = Lambda(lambda x: tf.sum(x, axis=-1))(z) return output1, output2
def total_variation_loss(x): assert K.ndim(x) == 4 a = tf.square(x[:, :, :img_width - 1, :img_height - 1] - x[:, :, 1:, :img_height - 1]) b = tf.square(x[:, :, :img_width - 1, :img_height - 1] - x[:, :, :img_width - 1, 1:]) return tf.sum(tf.pow(a + b, 1.25))
def optimizer(discounted_rewards): action = tf.placeholder(shape=[None, 5]) discounted_rewards = tf.placeholder(shape=[ None, ]) # Calculate cross entropy error function action_prob = tf.sum(action * self.model.output, axis=1) cross_entropy = tf.log(action_prob) * discounted_rewards loss = -tf.sum(cross_entropy) # create training function optimizer = tf.Adam(lr=learning_rate) updates = optimizer.get_updates(self.model.trainable_weights, [], loss) train = tf.function([self.model.input, action, discounted_rewards], [], updates=updates) return train
def inner_cca_objective(y_true, y_pred): """ It is the loss function of CCA as introduced in the original paper. There can be other formulations. It is implemented by Theano tensor operations, and does not work on Tensorflow backend y_true is just ignored """ r1 = 1e-4 r2 = 1e-4 eps = 1e-12 o1 = o2 = y_pred.shape[1] // 2 # unpack (separate) the output of networks for view 1 and view 2 H1 = tf.transpose(y_pred[:, 0:o1]) H2 = tf.transpose(y_pred[:, o1:o1 + o2]) m = H1.shape[1] H1bar = H1 - (tf.math.divide(1, m)) * tf.dot(H1, tf.ones([m, m])) H2bar = H2 - (tf.math.divide(1, m)) * tf.dot(H2, tf.ones([m, m])) SigmaHat12 = (tf.math.divide(1, m-1)) * \ tf.dot(H1bar, tf.transpose(H2bar)) SigmaHat11 = (tf.math.divide(1, m - 1)) * tf.dot( H1bar, tf.transpose(H1bar)) + r1 * tf.eye(o1) SigmaHat22 = (tf.math.divide(1, m - 1)) * tf.dot( H2bar, tf.transpose(H2bar)) + r2 * tf.eye(o2) # Calculating the root inverse of covariance matrices by using eigen decomposition [D1, V1] = tf.nlinalg.eigh(SigmaHat11) [D2, V2] = tf.nlinalg.eigh(SigmaHat22) # Added to increase stability posInd1 = tf.gt(D1, eps).nonzero()[0] D1 = D1[posInd1] V1 = V1[:, posInd1] posInd2 = tf.gt(D2, eps).nonzero()[0] D2 = D2[posInd2] V2 = V2[:, posInd2] SigmaHat11RootInv = tf.dot(tf.dot(V1, tf.nlinalg.diag(D1**-0.5)), tf.transpose(V1)) SigmaHat22RootInv = tf.dot(tf.dot(V2, tf.nlinalg.diag(D2**-0.5)), tf.transpose(V2)) Tval = tf.dot(tf.dot(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv) if use_all_singular_values: # all singular values are used to calculate the correlation corr = tf.sqrt(tf.nlinalg.trace(tf.dot(tf.transpose(Tval), Tval))) else: # just the top outdim_size singular values are used [U, V] = tf.nlinalg.eigh(T.dot(tf.transpose(Tval), Tval)) U = U[tf.gt(U, eps).nonzero()[0]] U = U.sort() corr = tf.sum(tf.sqrt(U[0:outdim_size])) return -corr
def __init__(self, obs, nactions, actions, nobs, rewards, policy, trajectory_length=8, name='Env_Model', LR=tf.constant(1e-4), nh=64, nout=64, vcoef=0.5, activ = tf.nn.tanh, max_grad=0.5): all_trajectories = [] all_rewards = [] # rollout graph for action in range(nactions): action_list = [action] rollout_obs = [obs] rollout_rews = [] for t in range(trajectory_length): x_in = tf.concat(rollout_obs[t], tf.one_hot(action_list[t], nactions)) with tf.variable_scope(name): ns_1 = activ(fc(x_in, 'ns_1', nh, init_scale=np.sqrt(2))) ns_2 = tf.nn.sigmoid(fc(ns_1, 'ns_2', nout, init_scale=np.sqrt(2))) vf_1 = activ(fc(x_in, 'vf_1', nh, init_scale=np.sqrt(2))) vf_2 = activ(fc(vf_1, 'vf_1', 1, init_scale=np.sqrt(2))) rollout_obs.append(ns_2) rollout_rews.append(vf_2) action = self.pdtype.pdfromlatent(rollout_obs[t+1]).sample() action_list.append(action) all_trajectories.append(tf.stack(rollout_obs[1:])) all_rewards.append(tf.stack(rollout_rews)) # training graph with tf.variable_scope(name): X_IN = tf.concat(obs, tf.one_hot(actions, nactions)) ns_1 = activ(fc(X_IN, 'ns_1', nh, init_scale=np.sqrt(2))) ns_2 = tf.nn.sigmoid(fc(ns_1, 'ns_2', nout, init_scale=np.sqrt(2))) vf_1 = activ(fc(X_IN, 'vf_1', nh, init_scale=np.sqrt(2))) vf_2 = activ(fc(vf_1, 'vf_1', 1, init_scale=np.sqrt(2))) prediction_loss = tf.mean(tf.sum(tf.square(ns_2 - nobs), axis=-1)) value_loss = tf.mean(tf.sum(tf.square(vf_2 - rewards), axis=-1)) env_loss = prediction_loss + vcoef * value_loss optimizer = tf.train.AdamOptimizer(LR) params = tf.trainable_variables() grads = tf.gradients(env_loss, params) if max_grad is not None: grads, _grad_norm = tf.clip_by_global_norm(grads, max_grad) grads = list(zip(grads, params)) self.trainer = optimizer.apply_gradients(grads)
def im2dist_L2(images, k=3, r=1, scope='im2col'): N, H, W, C = tf.shape(images) # in shape [N, H, W, C*(k*k)], i.e, (x^1_1,...,x^1_c), (...), (x^{k*k}, ..., x^{k*k}_c); im2col_patch = im2col(images, k, r, 'im2col_patch') im2col_patch = tf.reshape(im2col_patch, [N, H, W, k * k, C]) im2col_tile = tf.reshape(tf.tile(images, [1, 1, 1, k * k]), [N, H, W, k * k, C]) l2_dist = tf.sqrt(tf.sum(tf.square(im2col_patch - im2col_tile), axis=-1)) return l2_dist # in shape [N,H,W,k*k]
def make_cases(): a = tf.constant([0.0, -2.0, 3.0, 4.0, -5.0, 6.0], shape=[2, 3], name='a') b = tf.constant([-1, -2, 0, -4, 6], shape=[ 5, ], name='b') out_a = hard_gate(a) out_b = hard_gate(b) grad_a = tf.gradients(tf.sum(out_a), a) return out_a, out_b, grad_a
def call(self, x, rep_mask): x = self.dropout(x) map1 = self.elu(self.fc1(x)) map2 = self.fc2(self.dropout(map1)) soft = masked_softmax(map2, rep_mask, dim=1) out = tf.sum(x * soft, dim=1) return out
def masked_perplexity(y_true, y_pred): """ Masked version of popular metric for evaluating performance of language modelling architectures. It assumes that y_pred has shape (batch_size, sequence_length, 2), containing both - the original token ids - and the mask (0s and 1s, indicating places where a word has been replaced). both stacked along the last dimension. Masked perplexity ignores all but masked words. More info: http://cs224d.stanford.edu/lecture_notes/LectureNotes4.pdf """ y_true_value = y_true[:, :, 0] mask = y_true[:, :, 1] cross_entropy = tf.sparse_categorical_crossentropy(y_true_value, y_pred) batch_perplexities = tf.exp( tf.sum(mask * cross_entropy, axis=-1) / (tf.sum(mask, axis=-1) + 1e-6)) return tf.mean(batch_perplexities)
def _loss_def(self): '''Initializes the loss function.''' def scores(h, t, l): s = self._score(h, t, l) # [b,n] return mean(s, 1) # [b] p = scores(*self._positive_instance(in_batch=True)) # [b] n = scores(*self._negative_instance(in_batch=True)) # [b] return sum(max(p - n + self.margin, 0)) # []
def antirectifier(self, x): sums = tf.sum(x, axis=1, keepdims=False) normalisers = tf.count_nonzero(K.count_nonzero(x, axis=2, keep_dims=False, dtype=K.float32), axis=1, keep_dims=True, dtype=K.float32) return sums / normalisers
def call(self, inputs, **kwargs): input_shape = tf.shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = tf.sigmoid( tf.reshape( K.bias_add(K.dot(tf.reshape(inputs, [-1, d_model]), self.halting_kernel), self.halting_biases, data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = tf.greater(self.halt_budget, 0) no_further_steps = tf.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = tf.switch( step_is_active, tf.switch(no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += tf.switch(step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = (self.time_penalty_t * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = tf.switch(no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # Otf to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = tf.zeros_like(inputs, name='zeros_like_input') # just because tf.any(step_is_active) doesn't work in PlaidML any_step_is_active = tf.greater( tf.sum(tf.cast(step_is_active, 'int32')), 0) step_weighted_output = tf.switch( any_step_is_active, tf.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def loss_function(output_prob, y, weight=None): y_flat = tf.reshape(y, [-1]) if weight is not None: w_flat = tf.reshape(weight, [-1]) n_labels = tf.shape(output_prob)[3] out_flat = tf.reshape(output_prob, [-1, n_labels]) # Achieve advance indexing throught tensorflow operator batch_nums = tf.range(0, limit=tf.shape(out_flat)[0]) indices = tf.stack((batch_nums, y_flat), axis=1) if weight is not None: los = -tf.sum( tf.mul( w_flat, tf.gather_nd(tf.log(tf.clip_by_value(out_flat, 1e-10, 1.0)), indices))) / tf.sum(w_flat) else: los = -tf.reduce_mean( tf.gather_nd(tf.log(tf.clip_by_value(out_flat, 1e-10, 1.0)), indices)) return los
def l1(vectors): ''' Implements the l1 norm on a vectorspace. Parameters vectors - Tensor of dimension at least one, returning vectors whose norm is to be computed. Return Value Tensor of reduced dimension returning the norms. The order is preserved. ''' from tensorflow import reduce_sum as sum return sum(abs(vectors), -1)
def l2(vectors): ''' Implements the euclidean norm on a vectorspace. Parameters vectors - Tensor of dimension at least one, returning vectors whose norm is to be computed. Return Value Tensor of reduced dimension returning the norms. The order is preserved. ''' from tensorflow import reduce_sum as sum, sqrt return sqrt(sum(vectors**2, -1))
def apply_activation( books, x, activation, activation_args=(), activation_kwargs=None): """Returns activation(x, *activation_args, **activation_kwargs). This applies the given activation and adds useful summaries specific to the activation. Args: books: The bookkeeper. x: The tensor to apply activation to. activation: An activation function. activation_args: Optional additional arguments for the activation. activation_kwargs: Optional keyword args for activation. Returns: A tensor with activation applied to x. """ if activation is None: return x if activation_kwargs is None: activation_kwargs = {} y = activation(x, *activation_args, **activation_kwargs) if activation in (tf.nn.relu, functions.leaky_relu, functions.softplus): books.add_scalar_summary( tf.reduce_mean(tf.cast(tf.less(x, 0.0), tf.float32)), '%s/zeros' % y.op.name) elif activation is tf.nn.relu6: books.add_scalar_summary( tf.reduce_mean(tf.cast(tf.less(x, 0.0), tf.float32)), '%s/zeros' % y.op.name) books.add_scalar_summary( tf.reduce_mean(tf.cast(tf.greater(x, 6.0), tf.float32)), '%s/sixes' % y.op.name) elif activation in (functions.l2_normalize, tf.nn.l2_normalize, functions.l1_normalize): books.add_scalar_summary(tf.reduce_mean(tf.sqrt(tf.sum(tf.square(x), 1))), '%s/length' % y.op.name) books.add_histogram_summary(y, '%s/activations' % y.op.name) return y
def dice_coefficient(y1, y2): y1 = tf.flatten(y1) y2 = tf.flatten(y2) return (2. * tf.sum(y1 * y2) + smoothness) / (tf.sum(y1) + tf.sum(y2) + smoothness)
def train(self,data,batch,saving=True,reg_weight=False,record_weight=False,reg_lambda=0.01,use_dropout=False,keep_prob=0.5,gradient='gradient',learning_rate=0.1,w_file="./weights.txt",model_name='./model.ckpt',display_w=False,verbose=True,le=False,tau=1.0,session=None,n_iters=1000,display=False,noise=False,noise_level=1.0): if(not(batch is None)): n_batch = len(batch) if((session is None) and (self.session is None)): session = self.init_network() elif(self.session is None): self.session = session self.use_droput=use_dropout self.keep_prob_dropout=keep_prob if(display): import matplotlib.pyplot as plt plt.axis([0, 1, 0, 1]) plt.ion() plt.show() best = 20000000 reg_lambda_with_decay = tf.placeholder("float",None) x = tf.placeholder("float",[None,self.units[0]]) x_noise = x+tf.truncated_normal([self.units[0]],mean=0.0,stddev=noise_level) y = self.enc_output(x) x_hat = self.output(x) x_noise_hat = self.output(x_noise) #reg_norm = tf.sqrt(tf.reduce_sum(tf.pow(x_hat,2))) reg_norm = tf.reduce_mean(tf.sqrt(tf.pow(x_hat,2))) #reg_norm_noise = tf.sqrt(tf.reduce_sum(tf.pow(x_noise_hat,2))) reg_norm_noise = tf.reduce_mean(tf.sqrt(tf.pow(x_noise_hat,2))) #neigh = tf.placeholder("float",[k,self.units[0]]) #rec_weigh = tf.exp(-tf.pow(tf.sqrt(tf.pow(x-neigh,2)),2)/tau) ############################# ############################# if(le): cost = tf.reduce_mean(tf.sum()) #dovrebbe essere la somma non la media ma dovrebbe andare uguale elif(reg_weight): for l in range(self.dec_enc_length): if(l==0): c_w = reg_lambda_with_decay*tf.pow(tf.reduce_sum(tf.pow((self.layers[l].W),2)),0.5)/((self.layers[l].n_out+self.layers[l].n_in)**0.5) else: c_w = c_w+reg_lambda_with_decay*tf.pow(tf.reduce_sum(tf.pow((self.layers[l].W),2)),0.5)/((self.layers[l].n_out+self.layers[l].n_in)**0.5) cost = tf.reduce_mean((tf.pow(x-x_hat,2)))+c_w else: #cost = tf.reduce_mean(tf.sqrt(tf.pow(x-x_hat,2)))-reg_lambda*reg_norm cost = tf.reduce_mean((tf.pow(x-x_hat,2))) #noise_cost = tf.reduce_mean(tf.sqrt(tf.pow(x-x_noise_hat,2)))-reg_lambda*reg_norm_noise noise_cost = tf.reduce_mean((tf.pow(x-x_noise_hat,2))) #opt = tf.train.AdamOptimizer() # Compute the gradients for a list of variables. #test = opt.compute_gradients(cost,[self.layers[0].W]) if(gradient=='gradient'): tr = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) elif(gradient=='adam'): tr = tf.train.AdamOptimizer(learning_rate).minimize(cost) elif(gradient=='adagrad'): tr = tf.train.AdagradOptimizer(learning_rate).minimize(cost) elif(gradient=='momentum'): tr = tf.train.MomentumOptimizer(learning_rate).minimize(cost) elif(gradient=='ftrl'): tr = tf.train.FtrlOptimizer(learning_rate).minimize(cost) elif(gradient=='rms'): tr = tf.train.RMSPropOptimizer(learning_rate).minimize(cost) else: print "Unknow method ",gradient," .Using Gradient Descent Optimizer" tr = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) tr_noise = tf.train.GradientDescentOptimizer(learning_rate).minimize(noise_cost) self.session.run(tf.initialize_all_variables()) saver = tf.train.Saver() saver.save(self.session,model_name) recorded_weight = [] for i in range(n_iters): list_w = [] if(record_weight): for l in range(self.dec_enc_length): list_w.extend([self.session.run(tf.pow(tf.reduce_sum(tf.pow(self.layers[l].W,2)),0.5))]) recorded_weight.append(list_w) if(batch is None): if(reg_weight): self.session.run(tr,feed_dict={x:data,reg_lambda_with_decay:(reg_lambda)}) #self.session.run(tr,feed_dict={x:data,reg_lambda_with_decay:(reg_lambda/(i+1))}) else: self.session.run(tr,feed_dict={x:data}) else: for l in range(n_batch): if(reg_weight): self.session.run(tr,feed_dict={x:batch[l],reg_lambda_with_decay:(reg_lambda)}) #self.session.run(tr,feed_dict={x:batch[l],reg_lambda_with_decay:(reg_lambda/(i+1))}) if(noise): #self.session.run(tr_noise,feed_dict={x:batch[l],reg_lambda_with_decay:(reg_lambda/(i+1))}) self.session.run(tr_noise,feed_dict={x:batch[l],reg_lambda_with_decay:(reg_lambda)}) else: self.session.run(tr,feed_dict={x:batch[l]}) if(noise): self.session.run(tr_noise,feed_dict={x:batch[l]}) #print self.session.run(test[0],feed_dict={x:data}) if(reg_weight): c=self.session.run(cost,feed_dict={x:data,reg_lambda_with_decay:(reg_lambda)}) #c=self.session.run(cost,feed_dict={x:data,reg_lambda_with_decay:(reg_lambda/(i+1))}) else: c=self.session.run(cost,feed_dict={x:data}) if(i==0): init_cost = c import numpy as np if(np.isnan(c)): saver.restore(self.session, model_name) break if(verbose): print "cost ",c," at iter ",i+1 if(display_w): for i in range(self.enc_length): print "Norm layer ",i,"weight: ",np.sqrt(np.sum(self.session.run(self.layers[i].W)**2)),"bias: ",np.sqrt(np.sum(self.session.run(self.layers[i].b)**2)) print "Mean layer ",i,"weight: ",np.mean(self.session.run(self.layers[i].W)),"bias: ",np.mean(self.session.run(self.layers[i].b)) print "-------------------------" if(c<best): if(display): ridotti = self.session.run(y,feed_dict={x:data}) #ricostruiti = self.session.run(x_hat,feed_dict={x:data}) plt.clf() plt.scatter(ridotti[:,0],ridotti[:,1]) plt.draw() if(saving): saver.save(self.session,model_name) #self.save_model(session=self.session) if(verbose): print "Best model found so far at iter: %d"%(i+1),"with cost %f"%c best = c #if(saving): #self.load_model("model.dat",session=self.session) saver.restore(self.session,model_name) if(record_weight): import numpy as np np.savetxt(w_file,np.vstack(recorded_weight),fmt='%1.8f') return init_cost,best
def euclidean_norm(tensor, reduction_indices = None, name = None): with tf.op_scope(tensor + reduction_indices, name, "euclidean_norm"): #need to have this for tf to work squareroot_tensor = tf.square(tensor) euclidean_norm = tf.sum(squareroot_tensor, reduction_indices = reduction_indices) return euclidean_norm
def train(self,data,batch,gradient='gradient',learning_rate=0.1,model_name='./model.ckpt',verbose=True,le=False,tau=1.0,session=None,n_iters=1000,display=False,noise=False,noise_level=1.0): if(not(batch is None)): n_batch = len(batch) if((session is None) and (self.session is None)): session = self.init_network() elif(self.session is None): self.session = session if(display): import matplotlib.pyplot as plt plt.axis([0, 1, 0, 1]) plt.ion() plt.show() best = 20000000 reg_lambda = 0.015 x = tf.placeholder("float",[None,self.units[0]]) x_noise = x+tf.truncated_normal([self.units[0]],mean=0.0,stddev=noise_level) y = self.enc_output(x) x_hat = self.output(x) x_noise_hat = self.output(x_noise) #reg_norm = tf.sqrt(tf.reduce_sum(tf.pow(x_hat,2))) reg_norm = tf.reduce_mean(tf.sqrt(tf.pow(x_hat,2))) #reg_norm_noise = tf.sqrt(tf.reduce_sum(tf.pow(x_noise_hat,2))) reg_norm_noise = tf.reduce_mean(tf.sqrt(tf.pow(x_noise_hat,2))) #neigh = tf.placeholder("float",[k,self.units[0]]) #rec_weigh = tf.exp(-tf.pow(tf.sqrt(tf.pow(x-neigh,2)),2)/tau) ############################# ############################# if(le): cost = tf.reduce_mean(tf.sum()) #dovrebbe essere la somma non la media ma dovrebbe andare uguale else: #cost = tf.reduce_mean(tf.sqrt(tf.pow(x-x_hat,2)))-reg_lambda*reg_norm cost = tf.reduce_mean((tf.pow(x-x_hat,2))) #noise_cost = tf.reduce_mean(tf.sqrt(tf.pow(x-x_noise_hat,2)))-reg_lambda*reg_norm_noise noise_cost = tf.reduce_mean((tf.pow(x-x_noise_hat,2))) #opt = tf.train.AdamOptimizer() # Compute the gradients for a list of variables. #test = opt.compute_gradients(cost,[self.layers[0].W]) if(gradient=='gradient'): tr = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) elif(gradient=='adam'): tr = tf.train.AdamOptimizer(learning_rate).minimize(cost) elif(gradident=='adagrad'): tr = tf.train.AdagradOptimizer(learning_rate).minimize(cost) elif(gradient=='momentum'): tr = tf.train.MomentumOptimizer(learning_rate).minimize(cost) elif(gradient=='ftrl'): tr = tf.train.FtrlOptimizer(learning_rate).minimize(cost) elif(gradient=='rms'): tr = tf.train.RMSPropOptimizer(learning_rate).minimize(cost) else: print "Unknow method ",gradient," .Using Gradient Descent Optimizer" tr = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) tr_noise = tf.train.GradientDescentOptimizer(learning_rate).minimize(noise_cost) self.session.run(tf.initialize_all_variables()) #writer = tf.python.training.summary_io.SummaryWriter("/home/ceru/Scrivania/graph_logs", self.session.graph_def) #print session.run(self.layers[0].W) saver = tf.train.Saver() saver.save(self.session,model_name) for i in range(n_iters): if(batch is None): self.session.run(tr,feed_dict={x:data}) else: for l in range(n_batch): self.session.run(tr,feed_dict={x:batch[l]}) if(noise): self.session.run(tr_noise,feed_dict={x:batch[l]}) #print self.session.run(test[0],feed_dict={x:data}) c=self.session.run(cost,feed_dict={x:data}) if(i==0): init_cost = c import numpy as np if(np.isnan(c)): saver.restore(self.session, model_name) break if(verbose): print "cost ",c," at iter ",i+1 if(c<best): if(display): ridotti = self.session.run(y,feed_dict={x:data}) ricostruiti = self.session.run(x_hat,feed_dict={x:data}) plt.clf() plt.scatter(ridotti[:,0],ridotti[:,1]) plt.draw() saver.save(self.session,model_name) #self.save_model(session=self.session) if(verbose): print "Best model found so far at iter: %d"%(i+1),"with cost %f"%c best = c #if(saving): #self.load_model("model.dat",session=self.session) saver.restore(self.session,model_name) return init_cost,best
def frobenius_norm(tensor, reduction_indices = None, name = None): with tf.op_scope(tensor + reduction_indices, name, "frobenius_norm"): #need to have this for tf to work squareroot_tensor = tf.square(tensor) tensor_sum = tf.sum(squareroot_tensor, reduction_indices = reduction_indices) frobenius_norm = tf.sqrt(tensor_sum) return frobenius_norm