def evaluate(regressor, X_test, Y_test, dataset_object, name:str, senti:str): yhat = regressor.predict(X_test) Y_test_ = Y_test[:,0] yhat_= yhat[:,0] print('Test RMSE: %.3f' % mean_squared_error(Y_test_, yhat_).numpy()) print('Test MAE: %.3f' % mean_absolute_error(Y_test_, yhat_).numpy()) #print(MAPE(Y_test, yhat)) # invert scaling for forecast inv_yhat = dataset_object.y_scaler.inverse_transform(yhat) inv_yhat = inv_yhat[:, 0] # invert scaling for actual test_y = Y_test.reshape((len(Y_test), 1)) inv_y = dataset_object.y_scaler.inverse_transform(test_y) inv_y = inv_y[:, 0] print(inv_y.shape, inv_yhat.shape, dataset_object.test_dates.shape) """pd.DataFrame({"predictions": inv_yhat, "Close": inv_y, "Date": dataset_object.test_dates }).to_csv("data/"+name+"_senti_"+senti+".csv", index=False)""" # calculate RMSE rmse = mean_squared_error(inv_y, inv_yhat) print('Test RMSE: %.3f' % rmse) print('Test MAE: %.3f' % mean_absolute_error(inv_y, inv_yhat)) plot_preds(inv_yhat, inv_y)
def loss_fn(y_true, y_pred): loss = tf.concat( (mean_squared_error(y_true[:, :-1], y_pred[:, :-1]), tf.expand_dims(mean_squared_error(y_true[:, -1], y_pred[:, -1]) * (1 + weight), axis=1)), axis=-1) return loss
def eval(self, eval_fl): features = eval_fl.features_c_norm predictions = self.model.predict(features) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, predictions) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(predictions)) else: mse = mean_squared_error(eval_fl.labels, predictions) mse_norm = mse return predictions, mse, mse_norm
def create_dem_aug(self, kernel_initializer = 'he_normal', img_flat_len = 1024): attr_input = layers.Input(shape = (50,), name = 'attr') word_emb = layers.Input(shape = (600,), name = 'wv') img_input = layers.Input(shape = (64, 64, 3)) # imag_classifier = layers.Input(shape = (img_flat_len,), name = 'img') self.img_flat_model.trainable = False imag_classifier = self.img_flat_model(img_input) attr_dense = layers.Dense(600, use_bias = True, kernel_initializer=kernel_initializer, kernel_regularizer = l2(1e-4), name = 'attr_dense')(attr_input) if self.only_emb: attr_word_emb = word_emb else: attr_word_emb = layers.Concatenate(name = 'attr_word_emb')([word_emb, attr_dense]) attr_word_emb_dense = self.full_connect_layer(attr_word_emb, hidden_dim = [ int(img_flat_len * 2), int(img_flat_len * 1.5), int(img_flat_len * 1.25), # int(img_flat_len * 1.125), # int(img_flat_len * 1.0625) ], \ activation = 'relu', resnet = False, drop_out_ratio = 0.2) attr_word_emb_dense = self.full_connect_layer(attr_word_emb_dense, hidden_dim = [img_flat_len], activation = 'relu') mse_loss = K.mean(mean_squared_error(imag_classifier, attr_word_emb_dense)) model = Model([img_input, attr_input, word_emb], outputs = [attr_word_emb_dense, imag_classifier]) #, vgg_output]) model.add_loss(mse_loss) model.compile(optimizer=Adam(lr=1e-4), loss=None) return model
def kl_reconstruction_loss(true, pred): # Reconstruction loss reconstruction_loss = mean_squared_error(true, pred) kl_loss = -0.5 * tf.reduce_mean( tf.reduce_sum( (1 + sigma - tf.math.pow(mu, 2) - tf.math.exp(sigma)), axis=1)) return K.mean(reconstruction_loss + BETA * kl_loss)
def testOptimizerWithCallableVarList(self): train_samples = 20 input_dim = 1 num_classes = 2 (x, y), _ = testing_utils.get_test_data( train_samples=train_samples, test_samples=10, input_shape=(input_dim,), num_classes=num_classes) y = keras.utils.to_categorical(y) num_hidden = 1 model = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes) opt = adam.Adam() loss = lambda: losses.mean_squared_error(model(x), y) var_list = lambda: model.trainable_weights with self.assertRaisesRegexp( ValueError, 'Weights for model .* have not yet been created'): var_list() train_op = opt.minimize(loss, var_list) if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) self.assertEqual( [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) self.evaluate(train_op) self.assertNotEqual( [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) self.assertLen(var_list(), 4)
def create_rnn_model(self): """ """ seq_input = Input(shape=(self.dense_input_len, 1)) seq_output = Input(shape=(self.dense_input_len, 1)) # norm_seq_input = BatchNormalization(name = 'Dense_BN_trainable')(seq_input) rnn_out = Bidirectional( LSTM(self.rnn_units[0], return_sequences=True, activation='relu'))(seq_input) rnn_out = Bidirectional( LSTM(self.rnn_units[1], return_sequences=True, activation='relu'))(rnn_out) seq_pred = TimeDistributed(Dense(self.hidden_dim[0], activation='relu'))(rnn_out) seq_pred = TimeDistributed(Dense(1, activation='relu'))(seq_pred) # seq_pred = Dense(1, activation = 'relu')(rnn_out) seq_pred = Reshape((self.dense_input_len, ))(seq_pred) seq_input_reshape = Reshape((self.dense_input_len, ))(seq_input) model = Model(seq_input, seq_pred) loss = K.mean( mean_squared_error(seq_input_reshape[:, 1:], seq_pred[:, :-1])) model.add_loss(loss) # def _mean_squared_error(y_true, y_pred): # return K.mean(K.square(y_pred - y_true)) model.compile(optimizer='adam', loss=None) #_mean_squared_error) return model
def testOptimizerWithCallableVarList(self): train_samples = 20 input_dim = 1 num_classes = 2 (x, y), _ = testing_utils.get_test_data(train_samples=train_samples, test_samples=10, input_shape=(input_dim, ), num_classes=num_classes) y = keras.utils.to_categorical(y) num_hidden = 1 model = testing_utils.get_small_sequential_mlp(num_hidden=num_hidden, num_classes=num_classes) opt = adam.Adam() loss = lambda: losses.mean_squared_error(model(x), y) var_list = lambda: model.trainable_weights with self.assertRaisesRegexp( ValueError, 'Weights for model .* have not yet been created'): var_list() train_op = opt.minimize(loss, var_list) if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) self.assertEqual([[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) self.evaluate(train_op) self.assertNotEqual([[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm'))) self.assertLen(var_list(), 4)
def so3_loss_func(y_true, y_pred): pos_vel_contrib = l1_loss(y_true[:, :3], y_pred[:, :3]) + l1_loss( y_true[:, 3:6], y_pred[:, 3:6]) att_contrib = 0 try: att_contrib += mean_squared_error(y_true[:, 6:9], y_pred[:, 6:9]) except TypeError: att_contrib = pos_vel_contrib return pos_vel_contrib + att_contrib
def vae_loss(self, x, x_decoded_mean): _, encoder_mean, encoder_logvar = self.encoder.layers z_mean = encoder_mean(x) z_logvar = encoder_logvar(x) # 1項目の計算 latent_loss = -0.5 * K.sum( 1 + z_logvar - K.square(z_mean) - K.exp(z_logvar), axis=-1) # 2項目の計算 reconst_loss = K.mean(mean_squared_error(x, x_decoded_mean), axis=-1) return latent_loss + reconst_loss
def create_model(self): """ """ # VAE model = encoder + decoder # build encoder model input_shape = (self.original_dim, ) inputs = Input(shape=input_shape, name='encoder_input') x = Dense(self.intermediate_dim, activation='relu')(inputs) z_mean = Dense(self.latent_dim, name='z_mean')(x) z_log_var = Dense(self.latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(self.sampling, name='z')([z_mean, z_log_var]) # instantiate encoder model # encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') # print(encoder.summary()) # plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True) # build decoder model # latent_inputs = Input(shape=(self.latent_dim,), name='z_sampling') # x = Dense(self.intermediate_dim, activation='relu')(latent_inputs) x = Dense(self.intermediate_dim, activation='relu')(z) outputs = Dense(self.original_dim, activation='sigmoid')(x) # instantiate decoder model # decoder = Model(latent_inputs, outputs, name='decoder') # print(decoder.summary()) # plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) # instantiate VAE model # outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') # VAE loss = mse_loss or xent_loss + kl_loss if self.mse: reconstruction_loss = mean_squared_error(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= self.original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam', loss = None) # print (vae.summary()) return vae
def eval(self, eval_fl): eval_features = eval_fl.features_c_norm predictions = [] for features in eval_features.tolist(): single_expt = [] for idx in list(range(1, self.numel)): single_expt.append( self.model.predict(np.array(features + [idx])[None, ...])[0][0]) predictions.append(single_expt) predictions = np.array(predictions) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, predictions) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(predictions)) else: mse = mean_squared_error(eval_fl.labels, predictions) mse_norm = mse return predictions, mse, mse_norm
def eval(self, eval_fl): features = eval_fl.features_c_norm if self.labels_norm: labels = eval_fl.labels_norm.tolist() labels_actual = eval_fl.labels.tolist() predictions = self.model.predict(features) predictions = [prediction.T for prediction in predictions] predictions = np.vstack(predictions).T predictions = predictions.tolist() predictions_actual = eval_fl.labels_scaler.inverse_transform( predictions) # Calculating metrics mse = mean_squared_error(labels_actual, predictions_actual) mse_norm = mean_squared_error(labels, predictions) else: labels = eval_fl.labels.tolist() predictions = self.model.predict(features) predictions = [prediction.T for prediction in predictions] predictions = np.vstack(predictions).T predictions_actual = predictions.tolist() mse = mean_squared_error(labels, predictions_actual) mse_norm = mse return predictions_actual, mse, mse_norm
def create_ae(self, kernel_initializer = 'he_normal', img_flat_len = 1024): gamma = 0.5 attr_input = layers.Input(shape = (50,), name = 'attr') word_emb = layers.Input(shape = (600,), name = 'wv') imag_classifier = layers.Input(shape = (img_flat_len,), name = 'img') attr_dense = layers.Dense(600, use_bias = True, kernel_initializer=kernel_initializer, kernel_regularizer = l2(1e-4), name = 'attr_dense')(attr_input) # attr_dense = self.full_connect_layer(attr_dense, hidden_dim = [int(img_flat_len * 1.5), # int(img_flat_len * 1.25), # # int(img_flat_len * 1.125), # # int(img_flat_len * 0.5) # ], \ # activation = 'relu', resnet = False, drop_out_ratio = 0.2) attr_word_emb = layers.Concatenate(name = 'attr_word_emb')([word_emb, attr_dense]) attr_word_emb_dense = self.full_connect_layer(attr_word_emb, hidden_dim = [ int(img_flat_len * 2), int(img_flat_len * 1.5), int(img_flat_len * 1.25), # int(img_flat_len * 1.125), # int(img_flat_len * 1.0625) ], \ activation = 'relu', resnet = False, drop_out_ratio = 0.2) attr_word_emb_dense = self.full_connect_layer(attr_word_emb_dense, hidden_dim = [img_flat_len], activation = 'relu') mse_loss = K.mean(mean_squared_error(imag_classifier, attr_word_emb_dense)) out_size = 50 attr_preds = self.full_connect_layer(attr_word_emb_dense, hidden_dim = [ int(out_size * 20), int(out_size * 15), int(out_size * 7), # int(img_flat_len * 1.125), # int(img_flat_len * 1.0625) ], \ activation = 'relu', resnet = False, drop_out_ratio = 0.2) attr_preds = self.full_connect_layer(attr_preds, hidden_dim = [out_size], activation = 'sigmoid') log_loss = K.mean(binary_crossentropy(attr_input, attr_preds)) loss = (1 - gamma) * mse_loss + gamma * log_loss model = Model([attr_input, word_emb, imag_classifier], outputs = [attr_word_emb_dense, attr_preds]) model.add_loss(loss) model.compile(optimizer=Adam(lr=1e-4), loss=None) return model
def loop_dataset(model, dataset, optimizer= None, print_every = 32): mean_loss = 0 for it, (mols, props) in enumerate(dataset): with tf.GradientTape() as tape: props_pred = model(mols) loss = mean_squared_error(props_pred, props) loss_value = loss.numpy().mean() mean_loss = (it * mean_loss + loss_value) / (it + 1) if optimizer: variables = model.variables grads = tape.gradient(loss, variables) optimizer.apply_gradients(zip(grads, variables)) if it % print_every == 0: print("%d: loss %.4f." %(it, mean_loss)) return mean_loss
def test_linear_model_with_sparse_input_and_custom_training(self): batch_size = 64 indices = [] values = [] target = np.zeros((batch_size, 1)) with context.eager_mode(): for i in range(64): rand_int = np.random.randint(3) if rand_int == 0: indices.append((i, 0)) val = np.random.uniform(low=-5, high=5) values.append(val) target[i] = 0.3 * val elif rand_int == 1: indices.append((i, 1)) val = np.random.uniform(low=-5, high=5) values.append(val) target[i] = 0.2 * val else: indices.append((i, 0)) indices.append((i, 1)) val_1 = np.random.uniform(low=-5, high=5) val_2 = np.random.uniform(low=-5, high=5) values.append(val_1) values.append(val_2) target[i] = 0.3 * val_1 + 0.2 * val_2 indices = np.asarray(indices) values = np.asarray(values) shape = constant_op.constant([batch_size, 2], dtype=dtypes.int64) inp = sparse_tensor.SparseTensor(indices, values, shape) model = linear.LinearModel(use_bias=False) opt = gradient_descent.SGD() for _ in range(20): with backprop.GradientTape() as t: output = model(inp) loss = backend.mean( losses.mean_squared_error(target, output)) grads = t.gradient(loss, model.trainable_variables) grads_and_vars = zip(grads, model.trainable_variables) opt.apply_gradients(grads_and_vars)
def create_gcn(self, img_flat_len = 1024): adj_graph = 1 - sklearn.metrics.pairwise.pairwise_distances( np.array(list(self.class_id_emb_attr['emb']))[:, :300], metric = 'cosine') attr_input = layers.Input(tensor= tf.constant(np.array(list(self.class_id_emb_attr['attr']), dtype = 'float32'))) all_word_emb = layers.Input(tensor= tf.constant(extract_array_from_series(self.class_id_emb_attr['emb']), dtype = 'float32')) #Input(shape = (230, 300,), name = 'wv') class_index = layers.Input(shape = (1, ), name = 'class_index', dtype = 'int32') adj_graphs = layers.Input(tensor=tf.constant(adj_graph, dtype = 'float32')) #Input(shape = (230, 230,), name = 'adj_graph') imag_classifier = layers.Input(shape = (img_flat_len,), name = 'img') attr_dense = layers.Dense(600, use_bias = False, kernel_initializer='he_normal', kernel_regularizer = l2(1e-4))(attr_input) attr_word_emb = layers.Concatenate()([all_word_emb, attr_dense]) all_classifier = self.full_connect_layer(attr_word_emb, hidden_dim = [ int(img_flat_len * 2), int(img_flat_len * 1.5), int(img_flat_len * 1.25 ), # img_flat_len ], activation = 'relu', adj_graphs = adj_graphs, drop_out_ratio = 0.2) all_classifier = self.full_connect_layer(all_classifier, hidden_dim = [img_flat_len], activation = 'relu', adj_graphs = adj_graphs) x = tf.gather_nd(all_classifier, class_index) mse_loss = K.mean(mean_squared_error(imag_classifier, x)) model = Model([class_index, imag_classifier, attr_input, all_word_emb, adj_graphs], outputs = [all_classifier]) #, vgg_output]) model.add_loss(mse_loss) model.compile(optimizer=Adam(lr=1e-4), loss=None) # model.summary() return model
outputs.append(z) log_priors.append(l1.log_prior + l2.log_prior) log_posteriors.append(l1.log_posterior + l2.log_posterior) outputs = tf.convert_to_tensor( outputs) # Shape w_samples, batch_size, 2 log_priors = tf.convert_to_tensor(log_priors) # shape w_samples log_posteriors = tf.convert_to_tensor( log_posteriors) # shape w_samples # means = outputs means = tf.squeeze(tf.reduce_mean(outputs, axis=0), 1) # stddevs = tf.math.softplus(outputs[..., 1]) mse = mean_squared_error(y, tf.reduce_mean(means, axis=0)) mses.append(mse) likelihood_dist = tfd.Normal(loc=means, scale=1) # Vector of (w_samples, batch_size) log_likelihood = likelihood_dist.log_prob(y) # kl_weights = tf.cast((tf.pow(2, n_batches - batch_id)) / (tf.pow(2, n_batches) - 1), tf.float32) kl_weights = 1 / n_batches running_posterior += tf.reduce_sum(log_posteriors) running_prior += tf.reduce_sum(log_priors) running_likelihood += tf.reduce_sum(log_likelihood) loss = kl_weights * (tf.reduce_sum(log_posteriors) - tf.reduce_sum( log_priors)) - tf.reduce_sum(log_likelihood)
def adr(frames, actions, states, context_frames, Ec, Eo, A, Do, Da, La=None, gaussian_a=False, use_seq_len=12, lstm_units=256, lstm_layers=1, learning_rate=0.001, random_window=True, reconstruct_random_frame=True): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len > use_seq_len frame_inputs, action_state, initial_state, _, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian_a, a_units=lstm_units, a_layers=lstm_layers) # context frames at the beginning xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) x_to_recover = frame_inputs n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) skips = slice_skips(skips_0, start=context_frames - 1, length=1) if reconstruct_random_frame: a_s_dim = action_state.shape[-1] rand_index_1 = tf.random.uniform((), minval=0, maxval=use_seq_len, dtype='int32') action_state = tf.slice(action_state, (0, 0, 0), (bs, rand_index_1 + 1, a_s_dim)) x_to_recover = tf.slice(frames, (0, rand_index_1, 0, 0, 0), (bs, 1, w, h, c)) n_frames = rand_index_1 + 1 else: skips = repeat_skips(skips, use_seq_len) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian_a: _, za, _, _ = La([hc_ha, initial_state]) hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1) if reconstruct_random_frame: _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1) _, ha = tf.split(ha, [-1, 1], axis=1) hc_repeat = hc_0 x_rec_a = Da([hc_ha, skips]) # --> Changed the input to Eo from the error image to the full frame and the action only prediction x_rec_a_pos = K.relu(x_to_recover - x_rec_a) x_rec_a_neg = K.relu(x_rec_a - x_to_recover) # xo_rec_a = K.concatenate([x_rec_a_pos, x_rec_a_neg], axis=-1) xo_rec_a = K.concatenate([x_to_recover, x_rec_a], axis=-1) ho, _ = Eo(xo_rec_a) # ho = Eo(xo_rec_a) h = K.concatenate([hc_repeat, ha, ho], axis=-1) # multiple reconstruction x_err = Do([h, skips]) x_err_pos = x_err[:, :, :, :, :3] x_err_neg = x_err[:, :, :, :, 3:] x_recovered = x_err_pos - x_err_neg x_target = x_to_recover - x_rec_a x_target_pos = x_rec_a_pos x_target_neg = x_rec_a_neg # == Autoencoder model = Model(inputs=ins, outputs=x_recovered) rec_loss = mean_squared_error(x_target, x_recovered) model.add_metric(K.mean(rec_loss), name='rec_loss', aggregation='mean') rec_loss_pos = mean_squared_error(x_target_pos, x_err_pos) model.add_metric(rec_loss_pos, name='rec_loss_pos', aggregation='mean') rec_loss_neg = mean_squared_error(x_target_neg, x_err_neg) model.add_metric(rec_loss_neg, name='rec_loss_neg', aggregation='mean') rec_action_only_loss = mean_squared_error(x_rec_a, x_to_recover) model.add_metric(rec_action_only_loss, name='rec_A', aggregation='mean') model.add_loss( K.mean(rec_loss) + (K.mean(rec_loss_pos) + K.mean(rec_loss_neg))) model.compile(optimizer=Adam(lr=learning_rate)) return model
def build_model(data_set, tensorboard_callback): x_train = data_set.x_train() y_train = data_set.y_train() assert (len(x_train.shape) == 2) assert (len(y_train.shape) == 2) x_input_size = util.shape_i(x_train, 1) y_input_size = util.shape_i(y_train, 1) x_input = tf.keras.Input(shape=(x_input_size, )) y_input = tf.keras.Input(shape=(y_input_size, )) prev_layer_size = x_input_size assert (len(y_train.shape) == 2) y_ouput_size = util.shape_i(y_train, 1) layers_x_to_y = [] layers_y_to_x = [] is_last_layer = False representation_layer_size = None for spec in data_set.params().LAYERS_SPEC: assert (not is_last_layer) is_representation_layer = False if len(spec) > 2: is_representation_layer = spec[2] layer_type = spec[0] layer_size = spec[1] if layer_size == -1: layer_size = y_ouput_size is_last_layer = True # size -1 is only for last layer is_tied = layer_type in [ layers.tied.TiedDenseLayer, layers.tied.LocallyDenseLayer ] layer_kwargs = dict(units=layer_size, kernel_regularizer=regularizers.l2( data_set.params().WEIGHT_DECAY)) LXY = layer_type(**layer_kwargs) activation_xy = activation_yx = None if not is_last_layer: activation_xy = LeakyReLU(alpha=data_set.params().LEAKINESS) activation_yx = LeakyReLU(alpha=data_set.params().LEAKINESS) batch_norm_xy = batch_norm_yx = None if not is_last_layer and data_set.params().BN: gamma_coef = data_set.params().GAMMA_COEF batch_norm_xy = BatchNormalization( gamma_regularizer=inverse_l2_reg_func(gamma_coef)) batch_norm_yx = BatchNormalization( gamma_regularizer=inverse_l2_reg_func(gamma_coef)) # We need to build LXY so we can tie internal kernel to LYX xy_input_shape = (None, prev_layer_size) print(f"Layer X->Y build {type(LXY)}(kwargs={layer_kwargs})") LXY.build(input_shape=xy_input_shape) # We use prev_layer_size as number of units to the reverse layer layer_kwargs = dict(units=prev_layer_size, kernel_regularizer=regularizers.l2( data_set.params().WEIGHT_DECAY)) if is_tied: layer_kwargs["tied_layer"] = LXY LYX = layer_type(**layer_kwargs) print(f"Layer Y->X build {type(LYX)}(kwargs={layer_kwargs})") noise_XY = noise_YX = None if not is_last_layer and data_set.params().NOISE_LAYER: noise_kwargs = dict(rate=data_set.params().DROP_PROBABILITY) noise_XY = data_set.params().NOISE_LAYER(**noise_kwargs) print( f"Noise X->Y build {type(noise_XY)}(input_shape={(None, layer_size)})" ) noise_XY.build(input_shape=(None, layer_size)) if data_set.params().NOISE_LAYER == layers.tied.TiedDropoutLayer: noise_kwargs["tied_layer"] = noise_XY noise_YX = data_set.params().NOISE_LAYER(**noise_kwargs) # Build channel x-->y if is_representation_layer: # add a "bookmark" to the layers list layers_x_to_y.append(BOOKMARK_REPRESENTATION_LAYER) representation_layer_size = layer_size layers_x_to_y.append(LXY) if data_set.params().BN_ACTIVATION: layers_x_to_y.append(batch_norm_xy) layers_x_to_y.append(activation_xy) else: layers_x_to_y.append(activation_xy) layers_x_to_y.append(batch_norm_xy) layers_x_to_y.append(noise_XY) # Build channel x-->y in reverse layers_y_to_x.append(LYX) layers_y_to_x.append(noise_YX) if data_set.params().BN_ACTIVATION: # oposite from above if because reversed layers_y_to_x.append(activation_yx) layers_y_to_x.append(batch_norm_yx) else: layers_y_to_x.append(batch_norm_yx) layers_y_to_x.append(activation_yx) if is_representation_layer: # add a "bookmark" to the layers list (in reverse) layers_y_to_x.append(BOOKMARK_REPRESENTATION_LAYER) prev_layer_size = layer_size channel_x_to_y = x_input is_representation_layer = False representation_layer_xy = None # loop layers_x_to_y to build the channel for lay in layers_x_to_y: if lay is None: continue if lay == BOOKMARK_REPRESENTATION_LAYER: is_representation_layer = True # mark for next continue # Using Keras functional API to stack the layers. channel_x_to_y = lay(channel_x_to_y) if is_representation_layer: # in this channel the bookmark is BEFORE the layer assert (representation_layer_xy is None) representation_layer_xy = channel_x_to_y is_representation_layer = False channel_y_to_x = y_input representation_layer_yx = None # loop reversed(layers_y_to_x) to build the other channel for lay in reversed(layers_y_to_x): if lay is None: continue if lay == BOOKMARK_REPRESENTATION_LAYER: # in this channel the bookmark is AFTER the layer assert (representation_layer_yx is None) representation_layer_yx = channel_y_to_x continue # Using Keras functional API to stack the layers. channel_y_to_x = lay(channel_y_to_x) # Combined Loss loss_x = data_set.params().LOSS_X * losses.mean_squared_error( x_input, channel_y_to_x) loss_y = data_set.params().LOSS_Y * losses.mean_squared_error( y_input, channel_x_to_y) loss_representation = 0.0 # assert(representation_layer_xy is not None and representation_layer_yx is not None) if data_set.params( ).L2_LOSS != 0.0 and representation_layer_xy is not None: # loss_representation is named 'loss_l2' in original code. loss_representation = data_set.params( ).L2_LOSS * losses.mean_squared_error(representation_layer_xy, representation_layer_yx) loss_withen_x = 0.0 loss_withen_y = 0.0 cov_x = None cov_y = None if representation_layer_xy is not None: # mean_squared_error takes into account the batch size. # when calculating the covariance matrix - we need to do this also cov_x = K.dot(tf.transpose(representation_layer_xy), representation_layer_xy) / data_set.params().BATCH_SIZE # TODO(Franji): using BACH_SIZE here means in test mode loss_withen_x is wrong loss_withen_x = data_set.params().WITHEN_REG_X * ( K.sqrt(K.sum(K.sum(cov_x**2))) - K.sqrt(K.sum(tf.diag(cov_x)**2))) if representation_layer_yx is not None: cov_y = K.dot(tf.transpose(representation_layer_yx), representation_layer_yx) / data_set.params().BATCH_SIZE loss_withen_y = data_set.params().WITHEN_REG_Y * ( K.sqrt(K.sum(K.sum(cov_y**2))) - K.sqrt(K.sum(tf.diag(cov_y)**2))) def combined_loss(_y_true_unused, _y_pred_unused): return loss_x + loss_y + loss_representation + loss_withen_x + loss_withen_y # add images to see what's going on: dummy_metic_for_images, image_variables = data_set.get_tb_image_varibles( x_input, y_input, channel_y_to_x, channel_x_to_y) tensorboard_callback.add_image_variables(image_variables) if representation_layer_yx is not None: dummy_metric_for_cov, cov_image_variables = get_cov_image_varibles( cov_x, cov_y) tensorboard_callback.add_image_variables(cov_image_variables) # We have a model model = tf.keras.Model(inputs=[x_input, y_input], outputs=[channel_x_to_y, channel_y_to_x]) base_lr = data_set.params().BASE_LEARNING_RATE batches = util.shape_i(x_train, 0) // data_set.params().BATCH_SIZE steps = data_set.params().EPOCH_NUMBER * batches learning_rate_control = LearningRateControl( min_lr=base_lr, max_lr=base_lr * 50, step_max_lr=int(steps) // 2, step_min_lr=int(steps), tensorboardimage=tensorboard_callback) optimizer = tf.train.MomentumOptimizer( use_nesterov=True, learning_rate= learning_rate_control, ###data_set.params().BASE_LEARNING_RATE, momentum=data_set.params().MOMENTUM) # if tensorboard_callback: # tensorboard_callback.add_scalar("combined_loss", combined_loss(0,0)) def metric_learning_rate(_y_true_unused, _y_pred_unused): return learning_rate_control() def calculate_cca(): return util.cross_correlation_analysis(representation_layer_xy, representation_layer_yx, representation_layer_size) def metric_cca(_y_true_unused, _y_pred_unused): # return K.switch(K.learning_phase(), tf.constant(0.0), calculate_cca) return calculate_cca() def metric_var_x(_y_true_unused, _y_pred_unused): return K.mean(K.var(representation_layer_xy)) def metric_var_y(_y_true_unused, _y_pred_unused): return K.mean(K.var(representation_layer_yx)) model.compile( optimizer, loss=combined_loss, metrics=[ # dummy_metic_for_images, # metric_learning_rate, # dummy_metric_for_cov, metric_cca, metric_var_x, metric_var_y, ]) return model
def adr_ao(frames, actions, states, context_frames, Ec, A, D, learning_rate=0.01, gaussian=False, kl_weight=None, L=None, use_seq_len=12, lstm_units=None, lstm_layers=None, training=True, reconstruct_random_frame=False, random_window=True): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len >= use_seq_len frame_inputs, action_state, initial_state, _, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian, a_units=lstm_units, a_layers=lstm_layers) rand_index_1 = tf.random.uniform(shape=(), minval=0, maxval=use_seq_len - context_frames + 1, dtype='int32') # Random xc_0, as an artificial way of augmenting the dataset xc_0 = tf.slice(frame_inputs, (0, rand_index_1, 0, 0, 0), (-1, context_frames, -1, -1, -1)) xc_1 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) x_to_recover = frame_inputs n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_1, _ = Ec(xc_1) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) hc_1 = tf.slice(hc_1, (0, context_frames - 1, 0), (-1, 1, -1)) skips = slice_skips(skips_0, start=context_frames - 1, length=1) if reconstruct_random_frame: action_state_len = action_state.shape[-1] rand_index_2 = tf.random.uniform(shape=(), minval=0, maxval=use_seq_len, dtype='int32') action_state = tf.slice(action_state, (0, 0, 0), (bs, rand_index_2 + 1, action_state_len)) x_to_recover = tf.slice(frame_inputs, (0, rand_index_2, 0, 0, 0), (bs, 1, w, h, c)) n_frames = rand_index_2 + 1 else: skips = repeat_skips(skips, use_seq_len) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian: z, mu, logvar, state = L([hc_ha, initial_state]) z = mu if training is False else z hc_ha = K.concatenate([hc_repeat, ha, z], axis=-1) if reconstruct_random_frame: _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1) if gaussian: _, mu = tf.split(mu, [-1, 1], axis=1) _, logvar = tf.split(logvar, [-1, 1], axis=1) x_recovered = D([hc_ha, skips]) rec_loss = mean_squared_error(x_to_recover, x_recovered) sim_loss = mean_squared_error(hc_0, hc_1) if gaussian: ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover, mu, logvar]) else: ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover]) ED.add_metric(rec_loss, name='rec_loss', aggregation='mean') ED.add_metric(sim_loss, name='sim_loss', aggregation='mean') if gaussian: kl_loss = kl_unit_normal(mu, logvar) ED.add_metric(kl_loss, name='kl_loss', aggregation='mean') ED.add_loss( K.mean(rec_loss) + K.mean(sim_loss) + kl_weight * K.mean(kl_loss)) else: ED.add_loss(K.mean(rec_loss) + K.mean(sim_loss)) ED.compile(optimizer=Adam(lr=learning_rate)) return ED
def adr_vp_teacher_forcing(frames, actions, states, context_frames, Ec, Eo, A, Do, Da, L, La=None, gaussian_a=False, use_seq_len=12, lstm_a_units=256, lstm_a_layers=1, lstm_units=256, lstm_layers=2, learning_rate=0.001, random_window=False): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len >= use_seq_len frame_inputs, action_state, initial_state_a, initial_state, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian_a, a_units=lstm_a_units, a_layers=lstm_a_layers, units=lstm_units, layers=lstm_layers, lstm=True) # context frames at the beginning xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) skips_0 = slice_skips(skips_0, start=context_frames - 1, length=1) skips = repeat_skips(skips_0, n_frames) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian_a: _, za, _, _ = La([hc_ha, initial_state_a]) # za taken as the mean hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1) x_rec_a = Da([hc_ha, skips]) # agent only prediction x_err_pos = K.relu(frame_inputs - x_rec_a) x_err_neg = K.relu(x_rec_a - frame_inputs) # xo_rec_a = K.concatenate([frame_inputs, x_rec_a], axis=-1) # --> Here the action only image is not needed xo_rec_a = K.concatenate([x_err_pos, x_err_neg], axis=-1) # ground truth error components remove_first_step = Lambda( lambda _x: tf.split(_x, [1, -1], axis=1)) # new operations remove_last_step = Lambda(lambda _x: tf.split(_x, [-1, 1], axis=1)) ho, _ = Eo(xo_rec_a) hc = RepeatVector(n_frames - 1)(K.squeeze(hc_0, axis=1)) skips = repeat_skips(skips_0, ntimes=n_frames - 1) ha_t, _ = remove_last_step(ha) # [0 to 18] _, ha_tp1 = remove_first_step(ha) # [1 to 19] ho_t, _ = remove_last_step(ho) # [0 to 18] h = tf.concat([hc, ha_t, ha_tp1, ho_t], axis=-1) # [0 to 18] ho_pred, _ = L([h, initial_state]) # [1 to 19] _, ho_tp1 = remove_first_step(ho) # [1 to 19] Target for LSTM outputs x_rec_a_t, _ = remove_last_step(x_rec_a) # [0 to 18] Used to obtain x_curr _, x_rec_a_tp1 = remove_first_step( x_rec_a) # [1 to 19] Used to obtain x_pred _, x_target_pred = remove_first_step( frame_inputs) # Target for Do pred reconstruction _, x_err_pos_target = remove_first_step( x_err_pos) # Target for Do pred reconstruction _, x_err_neg_target = remove_first_step( x_err_neg) # Target for Do pred reconstruction # reconstruct current step h = tf.concat([hc, ha_t, ho_t], axis=-1) x_err_curr = Do([h, skips]) x_target_curr, _ = remove_last_step( frame_inputs) # [0 to 18] Target for x_curr x_err_curr_pos = x_err_curr[:, :, :, :, :3] x_err_curr_neg = x_err_curr[:, :, :, :, 3:] x_curr = x_rec_a_t + x_err_curr_pos - x_err_curr_neg # predict one step ahead h = tf.concat([hc, ha_tp1, ho_pred], axis=-1) x_err_pred = Do([h, skips]) x_err_pred_pos = x_err_pred[:, :, :, :, :3] x_err_pred_neg = x_err_pred[:, :, :, :, 3:] x_pred = x_rec_a_tp1 + x_err_pred_pos - x_err_pred_neg model = Model(inputs=ins, outputs=[ho_pred, x_curr, x_pred, x_rec_a, x_target_pred], name='vp_model') ho_mse = mean_squared_error(y_pred=ho_pred, y_true=ho_tp1) model.add_metric(K.mean(ho_mse), name='ho_mse', aggregation='mean') rec_curr = mean_squared_error(y_pred=x_curr, y_true=x_target_curr) model.add_metric(rec_curr, name='rec_curr', aggregation='mean') rec_pred = mean_squared_error(y_pred=x_pred, y_true=x_target_pred) model.add_metric(rec_pred, name='rec_pred', aggregation='mean') rec_pos = mean_squared_error(y_pred=x_err_pred_pos, y_true=x_err_pos_target) rec_neg = mean_squared_error(y_pred=x_err_pred_neg, y_true=x_err_neg_target) rec_A = mean_squared_error(y_pred=x_rec_a, y_true=frame_inputs) model.add_metric(rec_A, name='rec_A', aggregation='mean') # why did I have rec_curr?? # model.add_loss(0.5*K.mean(ho_mse) + 0.125*K.mean(rec_curr) + 0.125*K.mean(rec_pred) # + 0.125*K.mean(rec_pos) + 0.125*K.mean(rec_neg)) # model.add_loss(0.5*K.mean(ho_mse) + 0.5/3*(K.mean(rec_pred)) + K.mean(rec_pos) + K.mean(rec_neg)) model.add_loss(K.mean(rec_pred) + K.mean(rec_pos) + K.mean(rec_neg)) model.compile(Adam(lr=learning_rate)) return model
def adr_vp_feedback_frames(frames, actions, states, context_frames, Ec, Eo, A, Do, Da, L, La=None, gaussian_a=False, use_seq_len=12, lstm_a_units=256, lstm_a_layers=1, lstm_units=256, lstm_layers=2, learning_rate=0.0, random_window=False): bs, seq_len, w, h, c = [int(s) for s in frames.shape] assert seq_len >= use_seq_len frame_inputs, action_state, initial_state_a, initial_state, ins = get_ins( frames, actions, states, use_seq_len=use_seq_len, random_window=random_window, gaussian=gaussian_a, a_units=lstm_a_units, a_layers=lstm_a_layers, units=lstm_units, layers=lstm_layers, lstm=True) # context frames at the beginning xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0), (-1, context_frames, -1, -1, -1)) n_frames = use_seq_len # ===== Build the model hc_0, skips_0 = Ec(xc_0) hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1)) skips_0 = slice_skips(skips_0, start=context_frames - 1, length=1) skips = repeat_skips(skips_0, n_frames) ha = A(action_state) hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1)) hc_ha = K.concatenate([hc_repeat, ha], axis=-1) if gaussian_a: _, za, _, _ = La([hc_ha, initial_state_a]) # za taken as the mean hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1) x_rec_a = Da([hc_ha, skips]) # agent only prediction # x_err_pos = K.relu(frame_inputs - x_rec_a) # x_err_neg = K.relu(x_rec_a - frame_inputs) # xo_rec_a = K.concatenate([x_err_pos, x_err_neg], axis=-1) # ground truth error components # ho, _ = Eo(xo_rec_a) x_pred = [] prev_state = initial_state hc_t = hc_0 ha_t, _ = tf.split(ha, [-1, 1], axis=1) # remove last step _, ha_tp1 = tf.split(ha, [1, -1], axis=1) # remove first step _, xa_tp1 = tf.split(x_rec_a, [1, -1], axis=1) x = frame_inputs xa = x_rec_a for i in range(n_frames - 1): xa_t, xa = tf.split(xa, [1, -1], axis=1) xa_pred, xa_tp1 = tf.split(xa_tp1, [1, -1], axis=1) x_t, x = tf.split(x, [1, -1], axis=1) if i >= context_frames: x_t = x_pred_t x_xa_t = K.concatenate([x_t, xa_t], axis=-1) ho_t, _ = Eo(x_xa_t) _ha_t, ha_t = tf.split(ha_t, [1, -1], axis=1) _ha_tp1, ha_tp1 = tf.split(ha_tp1, [1, -1], axis=1) h = tf.concat([hc_t, _ha_t, _ha_tp1, ho_t], axis=-1) ho_pred, state = L([h, prev_state]) h_pred_t = tf.concat([hc_t, _ha_tp1, ho_pred], axis=-1) x_err_pred_t = Do([h_pred_t, skips_0]) x_err_pred_pos = x_err_pred_t[:, :, :, :, :3] x_err_pred_neg = x_err_pred_t[:, :, :, :, 3:] x_pred_t = xa_pred + x_err_pred_pos - x_err_pred_neg x_pred.append(x_pred_t) prev_state = state # Obtain predicted frames x_pred = tf.squeeze(tf.stack(x_pred, axis=1), axis=2) _, x_target = tf.split(frame_inputs, [1, -1], axis=1) outs = [x_pred, x_pred, x_pred, x_rec_a, x_target] # repetitions to match teacher forcing version model = Model(inputs=ins, outputs=outs, name='vp_model') rec_pred = mean_squared_error(y_pred=x_pred, y_true=x_target) model.add_metric(rec_pred, name='rec_pred', aggregation='mean') rec_A = mean_squared_error(y_pred=x_rec_a, y_true=frame_inputs) model.add_metric(rec_A, name='rec_A', aggregation='mean') model.add_loss(K.mean(rec_pred)) model.compile(optimizer=Adam(lr=learning_rate)) return model
def bce_dice_loss(y_true, y_pred): loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)+losses.mean_squared_error(y_true,y_pred) return loss
def root_mean_squared_error(y_true, y_pred): return K.sqrt(mean_squared_error(y_true, y_pred))
def discriminator_loss(y_true, y_pred): loss = mean_squared_error(y_true, y_pred) is_large = k.greater(loss, k.constant(_disc_train_thresh)) # threshold is_large = k.cast(is_large, k.floatx()) return loss * is_large # binary threshold the loss to prevent overtraining the discriminator
def __init__(self, hidden=100, input_dim=28 * 28, thresh=.5, l1=0.01, l2=.01, ternary=True): self.W = [] self.thresh = thresh self.input_dim = input_dim self.input = tf.placeholder(tf.float32, shape=(None, input_dim), name='input') with tf.variable_scope('MaskLayer'): self.x = self._O2OTernaryLayer(self.input, thresh=thresh) with tf.variable_scope('L1'): if (ternary): self.y = self._TernaryFC(self.x, hidden, thresh=thresh, name='1') else: self.y = self._fc(self.x, hidden, name='1') self.y = tf.nn.sigmoid(self.y) #x = Dense(hidden, activation='sigmoid', kernel_regularizer='l2')(self.layer1) #self.output = Dense(input_dim, kernel_regularizer='l2')(x) with tf.variable_scope('L2'): if (ternary): self.output = self._TernaryFC(self.y, input_dim, thresh=thresh, name='2') self.output = self._TernaryFC(self.output, input_dim, thresh=thresh, name='3') self.output = self._TernaryFC(self.output, input_dim, thresh=thresh, name='4') else: self.output = self._fc(self.y, input_dim, name='2') self.desired_output = tf.placeholder(tf.float32, shape=(None, input_dim)) #vars_all = tf.trainable_variables() #lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in vars_all ]) #var = [v for v in tf.trainable_variables() if v.name == "MaskLayer/O2OTernary:0"][0] #var = [v for v in tf.trainable_variables()] #print(var) #! To Do # regularization on ternarize l1_loss = tf.reduce_mean(tf.abs(self.W[0])) l2_loss = tf.reduce_mean(tf.square(self.W[0])) + tf.reduce_mean( tf.square(self.W[1])) + tf.reduce_mean(tf.square(self.W[3])) self.loss = tf.reduce_mean( tf.sqrt(mean_squared_error( self.output, self.desired_output))) + l1 * l1_loss + l2 * l2_loss #self.loss = tf.reduce_mean(mean_squared_error(self.output, self.desired_output)) self.opt = tf.train.AdamOptimizer().minimize(self.loss) self.sess = tf.Session() self.sess.run(tf.initialize_all_variables()) print(tf.trainable_variables())
def mse(self, y_true, y_pred): y_true_argmax, y_pred_argmax = tf.argmax(y_true, axis=-1), tf.argmax(y_pred, axis=-1) return mean_squared_error(y_true_argmax, y_pred_argmax)
def my_mse_loss(y_true, y_pred): mse_loss = tf.reduce_mean( losses.mean_squared_error(tf.expand_dims(y_true[:, 1], axis=-1), tf.expand_dims(y_pred[:, 1], axis=-1))) return mse_loss