def _initialize_ii_buffer(self, x): x_pad = K.spatial_2d_padding( x, ((self.max_wh // 2 + 1, self.max_wh // 2 + 1), (self.max_ww // 2 + 1, self.max_ww // 2 + 1))) ii_x = K.cumsum(x_pad, axis=1) ii_x2 = K.cumsum(ii_x, axis=2) return ii_x2
def rnn_model(input_shape, num_steps_crop, unroll=True): """Returns RNN model for counting. # Arguments input_shape: Tuple containing number of timesteps, number of features. num_steps_crop: How many timesteps to crop from RNN outputs. # Returns Keras model object. """ inputs = Input(shape=input_shape, name='input_1') out = Bidirectional(GRU(64, return_sequences=True, unroll=unroll))(inputs) out = Bidirectional(GRU(16, return_sequences=True, unroll=unroll))(out) out = Dense(1, activation='sigmoid')(out) out = Cropping1D(cropping=(num_steps_crop, num_steps_crop))(out) out = Flatten(name='current_values')(out) cumsum1 = Lambda(lambda x: K.cumsum(x))(out) cumsum2 = Lambda(lambda x: K.cumsum(K.reverse(x, axes=1)))(out) cumsum_value = concatenate([cumsum1, cumsum2], name='cumsum_values') model = Model(inputs=inputs, outputs=[out, cumsum_value]) model.compile(optimizer=Adam(lr=1e-4), loss={ 'current_values': 'binary_crossentropy', 'cumsum_values': 'mse' }, loss_weights={ 'current_values': 1.0, 'cumsum_values': 0.05 }) return model
def loss(target_tensor, prediction_tensor): """Computes loss. :param target_tensor: Tensor of target (actual) values. :param prediction_tensor: Tensor of predicted values. :return: loss: Scalar. """ target_net_flux_inc_tensor_w_m03 = ( target_tensor[..., down_flux_inc_channel_index] - target_tensor[..., up_flux_inc_channel_index]) predicted_net_flux_inc_tensor_w_m03 = ( prediction_tensor[..., down_flux_inc_channel_index] - prediction_tensor[..., up_flux_inc_channel_index]) loss = net_flux_increment_weight * ( predicted_net_flux_inc_tensor_w_m03 - target_net_flux_inc_tensor_w_m03)**2 target_net_flux_tensor_w_m02 = K.cumsum( target_net_flux_inc_tensor_w_m03 * grid_cell_width_matrix_metres, axis=1) predicted_net_flux_tensor_w_m02 = K.cumsum( predicted_net_flux_inc_tensor_w_m03 * grid_cell_width_matrix_metres, axis=1) loss += total_net_flux_weight * (predicted_net_flux_tensor_w_m02 - target_net_flux_tensor_w_m02)**2 if use_magnitude_weight: loss = loss * K.maximum(target_net_flux_inc_tensor_w_m03, predicted_net_flux_inc_tensor_w_m03) return K.mean(loss)
def call(self, seed_registration_map, mask=None): x_s = K.cumsum(seed_registration_map[..., 0] * 2, axis=1) y_s = K.cumsum(seed_registration_map[..., 1] * 2, axis=2) z_s = K.cumsum(seed_registration_map[..., 2] * 2, axis=3) registration_map = K.stack([x_s, y_s, z_s], axis=-1) return registration_map
def rating_cost_lambda_func(args): alpha = 1. std = 0.01 pred_score, true_ratings, input_masks, output_masks, D, d = args pred_score_cum = K.cumsum(pred_score, axis=2) prob_item_ratings = K.softmax(pred_score_cum) accu_prob_1N = K.cumsum(prob_item_ratings, axis=2) accu_prob_N1 = K.cumsum(prob_item_ratings[:, :, ::-1], axis=2)[:, :, ::-1] mask1N = K.cumsum(true_ratings[:, :, ::-1], axis=2)[:, :, ::-1] maskN1 = K.cumsum(true_ratings, axis=2) cost_ordinal_1N = -K.sum( (K.log(prob_item_ratings) - K.log(accu_prob_1N)) * mask1N, axis=2) cost_ordinal_N1 = -K.sum( (K.log(prob_item_ratings) - K.log(accu_prob_N1)) * maskN1, axis=2) cost_ordinal = cost_ordinal_1N + cost_ordinal_N1 nll_item_ratings = K.sum(-(true_ratings * K.log(prob_item_ratings)), axis=2) nll = std * K.sum(nll_item_ratings, axis=1) * 1.0 * D / (D - d + 1e-6) + \ alpha * K.sum(cost_ordinal, axis=1) * 1.0 * D / (D - d + 1e-6) cost = K.mean(nll) cost = K.expand_dims(cost, 0) return cost
def ExactAUC(label_arg, pred_arg, weight = None): N = K.tf.size(label_arg, name="N") y_true = K.reshape(label_arg, shape=(N,)) y_pred = K.reshape(pred_arg, shape=(N,)) if weight is None: weight = K.tf.fill(K.shape(y_pred), 1.0) sort_result = K.tf.nn.top_k(y_pred, N, sorted=False, name="sort") y = K.gather(y_true, sort_result.indices) y_hat = K.gather(y_pred, sort_result.indices) w = K.gather(weight, sort_result.indices) is_negative = K.equal(y, K.tf.constant(0.0)) is_positive = K.equal(y, K.tf.constant(1.0)) w_zero = K.tf.fill(K.shape(y_pred), 0.0) w_negative = K.tf.where(is_positive, w_zero, w, name="w_negative") w_positive = K.tf.where(is_negative, w_zero, w) cum_positive = K.cumsum(w_positive) cum_negative = K.cumsum(w_negative) is_diff = K.not_equal(y_hat[:-1], y_hat[1:]) is_end = K.tf.concat([is_diff, K.tf.constant([True])], 0) total_positive = cum_positive[-1] total_negative = cum_negative[-1] TP = K.tf.concat([ K.tf.constant([0.]), K.tf.boolean_mask(cum_positive, is_end), ], 0) FP = K.tf.concat([ K.tf.constant([0.]), K.tf.boolean_mask(cum_negative, is_end), ], 0) FPR = FP / total_negative TPR = TP / total_positive return K.sum((FPR[1:]-FPR[:-1])*(TPR[:-1]+TPR[1:])/2)
def call(self, inputs, mask=None): # pylint: disable=redefined-variable-type # This section implements the positional encoder on all the vectors at once. # The general idea is to use ones matrices in the shape of `inputs` to create indexes per # word. if mask is None: ones_like_x = K.ones_like(inputs) else: float_mask = K.cast(mask, 'float32') ones_like_x = K.ones_like(inputs) * K.expand_dims(float_mask, 2) # This is an odd way to get the number of words(ie the first dimension of inputs). # However, if the input is masked, using the dimension directly does not # equate to the correct number of words. We fix this by adding up a relevant # row of ones which has been masked if required. masked_m = K.expand_dims(K.sum(ones_like_x, 1), 1) if mask is None: one_over_m = ones_like_x / masked_m j_index = K.cumsum(ones_like_x, 1) else: one_over_m = switch(ones_like_x, ones_like_x / masked_m, K.zeros_like(ones_like_x)) j_index = K.cumsum(ones_like_x, 1) * K.expand_dims(float_mask, 2) k_over_d = K.cumsum(ones_like_x, 2) * 1.0 / K.cast( K.shape(inputs)[2], 'float32') l_weighting_vectors = (ones_like_x - (j_index * one_over_m)) - \ (k_over_d * (ones_like_x - 2 * j_index * one_over_m)) return K.sum(l_weighting_vectors * inputs, 1)
def earth_movers_distance(y_true, y_pred): """ Method solving regression tasks using way of classification """ cdf_true = K.cumsum(y_true, axis=-1) cdf_pred = K.cumsum(y_pred, axis=-1) emd = K.sqrt(K.mean(K.square(cdf_true - cdf_pred), axis=-1)) return K.mean(emd)
def emd_loss(y_true, y_pred): ''' Earth Mover's Distance loss ''' cdf_p = K.cumsum(y_true, axis = -1) cdf_phat = K.cumsum(y_pred, axis = -1) loss = K.mean(K.sqrt(K.mean(K.square(K.abs(cdf_p - cdf_phat)), axis = -1))) return loss
def compute_lovasz_gradient(truth): #sorted truth_sum = K.sum(truth) intersection = truth_sum - K.cumsum(truth, 0) union = truth_sum + K.cumsum(1 - truth, 0) jaccard = 1. - intersection / union jaccard = K.concatenate([jaccard[0:1], jaccard[1:] - jaccard[:-1]], axis=0) gradient = jaccard return gradient
def earth_mover_loss(y_true, y_pred): ''' Inputs: Outputs: ''' cdf_ytrue = K.cumsum(y_true, axis=-1) cdf_ypred = K.cumsum(y_pred, axis=-1) samplewise_emd = K.sqrt(K.mean(K.square(K.abs(cdf_ytrue - cdf_ypred)), axis=-1)) return K.mean(samplewise_emd)
def emd(p, q, norm=K.abs): # p and q are 1-hot vectors of two probability distributions. # assume they are normalized to one. # typically one is a collection of delta functions. # TODO: implement a metric on R P = K.cumsum(p, axis=-1) Q = K.cumsum(q, axis=-1) d = K.sum(norm(P - Q), axis=-1) return d
def earth_mover_loss(y_true, y_pred): """ Earth Mover's Distance loss. Reproduced from https://github.com/titu1994/neural-image-assessment/blob/master/train_inception_resnet.py """ cdf_ytrue = K.cumsum(y_true, axis=-1) cdf_ypred = K.cumsum(y_pred, axis=-1) samplewise_emd = K.sqrt( K.mean(K.square(K.abs(cdf_ytrue - cdf_ypred)), axis=-1)) return K.mean(samplewise_emd)
def step(inputs, states): prev_output = states[0] t = states[1] t_int = K.cast(t[0], 'int32') prev_output_aug = K.permute_dimensions(prev_output, pattern) inputs_aug = K.permute_dimensions(inputs, pattern) output_aug = K.switch(K.all(t_int > 0), K.cumsum(prev_output_aug, axis=0), K.cumsum(inputs_aug, axis=0)) output = K.permute_dimensions(output_aug, inv_pattern) return output, [output, t + 1]
def crps_loss(y_true, y_pred): if use_binary_crossentropy: d = K.binary_crossentropy( K.cumsum(y_true), K.cumsum(y_pred) ) else: d = summation_f( K.cumsum(y_pred - y_true) ) #/ int(y_pred.shape[1]) return K.mean(d)
def cos_att(input1, input2, cosinval): att_weigth = Lambda(lambda x: softmax(x, axis=1), output_shape=unchanged_shape)(cosinval) att_weigth = Lambda(lambda x: K.repeat(x, n=lstm_unit))(att_weigth) att_weigth = Permute([2, 1])(att_weigth) vec1 = Multiply()([input1, att_weigth]) vec2 = Multiply()([input2, att_weigth]) vec1 = Lambda(lambda x: K.cumsum(x, axis=1), output_shape=unchanged_shape)(vec1) vec2 = Lambda(lambda x: K.cumsum(x, axis=1), output_shape=unchanged_shape)(vec2) return vec1, vec2
def cumsoftmax(x, mode='l2r'): axis = K.ndim(x) - 1 if mode == 'l2r': x = K.softmax(x, axis=axis) x = K.cumsum(x, axis=axis) return x elif mode == 'r2l': x = x[..., ::-1] x = K.softmax(x, axis=axis) x = K.cumsum(x, axis=axis) return x[..., ::-1] else: return x
def emd(y_true, y_pred): """ Earth mover's distance (EMD) for 1D-histograms (also known as Wasserstein metric). Args: y_true: ground truth histograms (batch_size, bins) y_pred: predicted histograms (batch_size, bins) Returns: mean EMD over batch """ cdf_true = K.cumsum(y_true, axis=1) cdf_pred = K.cumsum(y_pred, axis=1) return K.mean(K.sum(K.abs(cdf_true - cdf_pred), axis=1))
def loss(y_true, y_pred): """Approximate CRPS function for categorical output. Args: y_true: One-hot-encoded output y_pred: Probability for each bin Returns: approx_crps: Approximate mean CRPS value for batch """ # [sample, cat] cum_obs = K.cumsum(y_true, axis=1) cum_preds = K.cumsum(y_pred, axis=1) approx_crps = K.sum(K.square(cum_obs - cum_preds), axis=1) * bin_width return K.mean(approx_crps)
def auc(y_true, y_pred): # eliminate shapes like (batch, 1) y_true = K.flatten(y_true) y_pred = K.flatten(y_pred) # total number of elements in this batch batch_size = K.shape(y_true)[0] # sorting the prediction values in descending order values, indices = tf.nn.top_k(y_pred, k = batch_size) # sorting the ground truth values based on the predictions above sorted_true = K.gather(y_true, indices) # getting the ground negative elements (already sorted above) negatives = 1 - sorted_true # the y_true positive count per threshold TP_curve = K.cumsum(sorted_true) #area under the curve auc = K.sum(TP_curve * negatives) # normalizing the result between 0 and 1 batch_size = K.cast(batch_size, K.floatx()) positive_count = K.sum(y_true) negative_count = batch_size - positive_count total_area = positive_count * negative_count return auc / (total_area + K.epsilon())
def __init__(self, h_size): self.inputs = Input(shape=(84,84,3)) self.actions = Input(shape=(1,), dtype='int32') self.actions_onehot = Lambda(K.one_hot, arguments={'num_classes':env.actions}, output_shape=(None, env.actions))(self.actions) x = Conv2D(filters=32, kernel_size=[8,8], strides=[4,4], input_shape=(84, 84, 3))(self.inputs) x = Conv2D(filters=64, kernel_size=[4,4],strides=[2,2])(x) x = Conv2D(filters=64, kernel_size=[3,3],strides=[1,1])(x) x = Conv2D(filters=h_size, kernel_size=[7,7],strides=[1,1])(x) #Splice outputs of last conv layer using lambda layer x_value = Lambda(lambda x: x[:,:,:,:h_size//2])(x) x_advantage = Lambda(lambda x: x[:,:,:,h_size//2:])(x) #Process spliced data stream into value and advantage function value = Dense(env.actions, activation="linear")(x_value) advantage = Dense(env.actions, activation="linear")(x_advantage) #Recombine value and advantage layers into Q layer q = QLayer()([value, advantage]) self.q_out = Multiply()([q, self.actions_onehot]) self.q_out = Lambda(lambda x: K.cumsum(x, axis=3), output_shape=(1,))(self.q_out) #need to figure out how to represent actions within training self.model = Model(inputs=[self.inputs, self.actions], outputs=[q, self.q_out]) self.model.compile(optimizer="Adam", loss="mean_squared_error") self.model.summary()
def cumsoftmax(x, mode='l2r'): """先softmax,然后cumsum, cumsum区分从左到右、从右到左两种模式 """ axis = K.ndim(x) - 1 if mode == 'l2r': x = K.softmax(x, axis=axis) x = K.cumsum(x, axis=axis) return x elif mode == 'r2l': x = x[..., ::-1] x = K.softmax(x, axis=axis) x = K.cumsum(x, axis=axis) return x[..., ::-1] else: return x
def prediction_layer(x): # x.shape = (?,6040,5) x_cumsum = K.cumsum(x, axis=2) # x_cumsum.shape = (?,6040,5) output = K.softmax(x_cumsum) # output = (?,6040,5) return output
def call(self, x): # x: (batch, max_length=20, embedding_size=16) if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) # embedding_size=16 batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow( 10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) # (embedding_size/2=8,) # [10^(-4*2*0/8), 10^(-4*2*1/8), 10^(-4*2*2/8)... 10^(-4*2*8/8)] # [10^0, 10^-1, 10^-2... 10^-8] position_j = K.expand_dims(position_j, 0) # coefficient # (1, embedding_size/2=8) # [[10^0, 10^-1, 10^-2... 10^-8]] position_i = K.cumsum(K.ones_like( x[:, :, 0]), 1) - 1 # K.arange generate vec dim value from 1 to dim/2 # (batch, max_length=20) # [[0,1,2...19],[0,1,2...19]...] position_i = K.expand_dims(position_i, 2) #pos # (batch, max_length=20, 1) # [[[0],[1],[2]...[19]],[[0],[1],[2]...[19]]...] position_ij = K.dot(position_i, position_j) # (batch, max_length=20, 1) * (1, embedding_size/2=8) = (batch, max_length=20, embedding_size/2=8) position_ij = K.concatenate( [K.cos(position_ij), K.sin(position_ij)], 2) # (batch, max_length=20, embedding_size=16) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def _negative_log_likelihood(E, risk): hazard_ratio = K.exp(risk) log_risk = K.log(K.cumsum(hazard_ratio)) uncensored_likelihood = risk - log_risk censored_likelihood = uncensored_likelihood * E neg_likelihood = -K.sum(censored_likelihood) return neg_likelihood
def loss(y_true, y_pred): hazard_ratio = K.exp(y_pred) log_risk = K.log(K.cumsum(hazard_ratio)) uncensored_likelihood = K.transpose(y_pred) - log_risk censored_likelihood = uncensored_likelihood * E num_observed_event = K.sum([float(e) for e in E]) return K.sum(censored_likelihood) / num_observed_event * (-1)
def neg_log_pl(y_true, y_pred): # Sort by survival time (descending) so that # - If there are no tied survival times, the risk set # for event i is individuals 0 through i # - If there are ties, and time[i - k] through time[i] # represent all times equal to time[i], then the risk set # for events i - k through i is individuals 0 through i sorting = tf.nn.top_k(y_true[:, 0], k=n) time = K.gather(y_true[:, 0], indices=sorting.indices) xbeta = K.gather(y_pred[:, 0], indices=sorting.indices) risk = K.exp(xbeta) # For each set of tied survival times, put the sum of the # corresponding risk (exp[x * beta]) values at the first # position in the sorted array of times while setting other # positions to 0 so that the cumsum operation will result # in each of the positions having the same sum of risks for i in range(time.shape[0] - 1, 0, -1): # Going from smallest survival times to largest if time[i] == time[i - 1]: # Push risk to the later time (earlier in array position) risk[i - 1] = risk[i - 1] + risk[i] risk[i] = 0 event = K.gather(y_true[:, 1], indices=sorting.indices) denom = K.cumsum(risk) terms = xbeta - K.log(denom) loglik = K.cast(event, dtype=terms.dtype) * terms return -K.sum(loglik)
def loss(y_true, y_pred): hazard_ratio = K.exp(y_pred) log_risk = K.log(K.cumsum(hazard_ratio)) uncensored_likelihood = K.transpose(y_pred) - log_risk censored_likelihood = uncensored_likelihood * E neg_likelihood = -K.sum(censored_likelihood) / NUM_E return neg_likelihood
def Bernoulli_ODE_Loss(encoder, f, decoder, frames): x = encoder.inp s_v_μ_0, s_v_σ_0, z = encoder(x) sLsg = f(z) x_rec = decoder(sLsg) a = frames * tf.keras.losses.binary_crossentropy(x, x_rec) log_p_x_z = - K.sum(a, axis=(1, 2)) log_pz = tfd.MultivariateNormalDiag(loc=tf.zeros( 2*frames*latent_dim), scale_diag=tf.ones(2*frames*latent_dim)).log_prob(tf.keras.layers.Flatten()(z)) diag_eval = tf.keras.layers.Concatenate(axis=1)([z[:, 0, 0, :], z[:, 1, 0, :]]) log_qz_0 = tfd.MultivariateNormalDiag(loc=(s_v_μ_0), scale_diag=(s_v_σ_0)).log_prob(diag_eval) Trace = 0 for i in range(latent_dim): z_2 = z + tf.constant(A[i]) f_2 = f(z_2) Trace += f_2[:, :, i] - sLsg[:, :, i] Int = K.cumsum(Trace, axis=1) log_qz = frames * log_qz_0 - K.sum(Int, axis=1) ode_regul = 0.01 * np.sum([np.sum(f.get_weights()[i] ** 2) for i in range(len(f.get_weights()))]) ELBO = - ode_regul + log_p_x_z + log_pz - log_qz_0 - log_qz return - ELBO
def LOSS_L2(y_true, y_pred): # MAX_SEQ_LEN=1 BATCH_SIZE = int(k_n.get_value()) L2_NORM = 0.001 sorting = tf.nn.top_k(y_true[:, 0], k=int(k_n.get_value())) # time = K.gather(y_true[:, 0], indices = sorting.indices) xbeta = K.gather( y_pred, indices=sorting.indices) #tf.gather()用来取出tensor中指定索引位置的元素。 risk = K.exp(xbeta) event = K.gather(y_true[:, 1], indices=sorting.indices) # self.preds = preds final_dead_rate = xbeta final_survival_rate = 1.0 - final_dead_rate predict = K.stack([final_survival_rate, final_dead_rate]) cross_entropy = -K.cumsum(event * K.log(final_dead_rate)) cost = cross_entropy # final_survival_rate=tf.subtract(tf.constant(1.0, dtype=tf.float32), final_dead_rate) # predict = tf.transpose(tf.stack([final_survival_rate, final_dead_rate]), name="predict") ## predict =predict[-1,:,:] # cross_entropy = -tf.reduce_sum( event*tf.log(tf.clip_by_value(predict,1e-10,1.0))) # tvars = tf.trainable_variables() #tf.trainable_variables 返回所有 当前计算图中 在获取变量时未标记 trainable=False 的变量集合 # lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in tvars ]) * L2_NORM # cost = tf.add(cross_entropy, lossL2, name = "cost") / BATCH_SIZE # Loss2=K.categorical_crossentropy( event, xbeta) Loss = cost return Loss
def mask_logits(self, inputs, mask, mask_value=-1e12): shapes = [x if x != None else -1 for x in inputs.shape.as_list()] mask = K.cast(mask, tf.int32) mask = K.one_hot(mask[:, 0], shapes[-1]) mask = 1 - K.cumsum(mask, 1) mask = tf.cast(mask, tf.float32) mask = tf.reshape(mask, [shapes[0], 1, 1, shapes[-1]]) return inputs + mask_value * (1 - mask)
def mask_logits(self, inputs, mask, clen, mask_value=-1e12): shapes = [x if x != None else -1 for x in inputs.shape.as_list()] mask = K.cast(mask, tf.int32) mask = K.one_hot(mask[:, 0], shapes[-1]) mask = 1 - K.cumsum(mask, 1) mask = tf.cast(mask, tf.float32) mask = tf.tile(tf.expand_dims(mask, axis=1), [1, clen, 1]) return inputs + mask_value * (1 - mask)
def positions_func(inputs, pad=0): """ A layer filling i-th column of a 2D tensor with 1+ln(1+i) when it contains a meaningful symbol and with 0 when it contains PAD """ position_inputs = kb.cumsum(kb.ones_like(inputs, dtype="float32"), axis=1) position_inputs *= kb.cast(kb.not_equal(inputs, pad), "float32") return kb.log(1.0 + position_inputs)
def Mask(self, inputs, seq_len, axis=1, time_dim=1, mode='mul'): if seq_len == None: return inputs else: seq_len=K.cast(seq_len,tf.int32) mask = K.one_hot(seq_len[:, 0], K.shape(inputs)[time_dim]) mask = 1 - K.cumsum(mask, 1) mask = K.expand_dims(mask, axis) if mode == 'mul': return inputs * mask if mode == 'add': return inputs - (1 - mask) * 1e12
def Mask(self, inputs, seq_len, mode='mul'): if seq_len == None: return inputs else: mask = K.one_hot(seq_len[:,0], K.shape(inputs)[1]) mask = 1 - K.cumsum(mask, 1) for _ in range(len(inputs.shape)-2): mask = K.expand_dims(mask, 2) if mode == 'mul': return inputs * mask if mode == 'add': return inputs - (1 - mask) * 1e12
def call(self, x, mask=None): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 # K.arange不支持变长,只好用这种方法生成 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def output_sampling(self, output, rand_matrix): # Generates a sampled selection based on raw output state vector # Creates a cdf vector and compares against a randomly generated vector # Requires a pre-generated rand_matrix (i.e. generated outside step function) sampled_output = output / K.sum(output, axis=-1, keepdims=True) # (batch_size, self.units) mod_sampled_output = sampled_output / K.exp(self.temperature) norm_exp_sampled_output = mod_sampled_output / K.sum(mod_sampled_output, axis=-1, keepdims=True) cdf_vector = K.cumsum(norm_exp_sampled_output, axis=-1) cdf_minus_vector = cdf_vector - norm_exp_sampled_output rand_matrix = K.stack([rand_matrix], axis=0) rand_matrix = K.stack([rand_matrix], axis=2) compared_greater_output = K.cast(K.greater(cdf_vector, rand_matrix), dtype='float32') compared_lesser_output = K.cast(K.less(cdf_minus_vector, rand_matrix), dtype='float32') final_output = compared_greater_output * compared_lesser_output return final_output
def earth_movers_distance(y_true, y_pred): cdf_true = K.cumsum(y_true, axis=-1) cdf_pred = K.cumsum(y_pred, axis=-1) emd = K.sqrt(K.mean(K.square(cdf_true - cdf_pred), axis=-1)) return K.mean(emd)