def gan_generator_neg_log(d_out_given_fake_for_gen, d_out_given_fake_for_dis, d_out_given_real): d_loss_fake = binary_crossentropy( T.zeros_like(d_out_given_fake_for_dis), d_out_given_fake_for_dis).mean() d_loss_real = binary_crossentropy( T.ones_like(d_out_given_real), d_out_given_real).mean() d_loss = d_loss_real + d_loss_fake d = d_out_given_fake_for_gen g_loss = - T.log(T.clip(d, 1e-7, 1 - 1e-7)).mean() return g_loss, d_loss, d_loss_real, d_loss_fake
def gan_binary_crossentropy(d_out_given_fake_for_gen, d_out_given_fake_for_dis, d_out_given_real): d_loss_fake = binary_crossentropy( T.zeros_like(d_out_given_fake_for_dis), d_out_given_fake_for_dis).mean() d_loss_real = binary_crossentropy( T.ones_like(d_out_given_real), d_out_given_real).mean() d_loss = d_loss_real + d_loss_fake g_loss = binary_crossentropy( T.ones_like(d_out_given_fake_for_gen), d_out_given_fake_for_gen).mean() return g_loss, d_loss, d_loss_real, d_loss_fake
def gan_generator_kl(d_out_given_fake_for_gen, d_out_given_fake_for_dis, d_out_given_real): """ see: http://www.inference.vc/an-alternative-update-rule-for-generative-adversarial-networks/ """ d_loss_fake = binary_crossentropy( T.zeros_like(d_out_given_fake_for_dis), d_out_given_fake_for_dis).mean() d_loss_real = binary_crossentropy( T.ones_like(d_out_given_real), d_out_given_real).mean() d_loss = d_loss_real + d_loss_fake d = d_out_given_fake_for_gen e = 1e-7 g_loss = - T.log(T.clip(d / (1 - d + e), e, 1 - e)).mean() return g_loss, d_loss, d_loss_real, d_loss_fake
def vae_loss(y_true, y_pred): recon = binary_crossentropy(y_true, y_pred) recon *= original_dim kl = 0.5 * K.sum(-1. - log_sigma + K.exp(log_sigma) + K.square(mu), axis=-1) loss = K.mean(kl + recon) return loss
def vae_loss(x, x_decoded_mean): x = K.flatten(x) x_decoded_mean = K.flatten(x_decoded_mean) xent_loss = max_length * objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = -0.5 * K.mean( 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
def vae_loss(x, x_decoded_mean): # NOTE: binary_crossentropy expects a batch_size by dim for x and x_decoded_mean, so we MUST flatten these! x = K.flatten(x) x_decoded_mean = K.flatten(x_decoded_mean) xent_loss = np.dot(original_dim, original_dim) * objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
def vae_loss(input,decoded): xent_loss = objectives.binary_crossentropy(input,decoded) kl_loss = - 0.5 * K.mean(1 + self.z_log_std - K.square(self.z_mean) - K.exp(self.z_log_std), axis=-1) return ( xent_loss + kl_loss )
def vae_loss(input_phono,phono_decoded): xent_loss_phono = objectives.binary_crossentropy(input_phono, phono_decoded) kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1) return ( xent_loss_phono + kl_loss )
def _vae_loss(self, x, x_decoded_mean): n_inputs = self._model.get_input_shape_at(0)[1] z_mean = self._model.get_layer('z_mean').inbound_nodes[0].output_tensors[0] z_log_var = self._model.get_layer('z_log_var').inbound_nodes[0].output_tensors[0] xent_loss = n_inputs * objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
def test_adverse(dev, ad_model, gen_model, word_index, glove, train_len, batch_size=64, ci = False): mb = load_data.get_minibatches_idx(len(dev), batch_size, shuffle=False) p = Progbar(len(dev) * 2) for i, train_index in mb: if len(train_index) != batch_size: continue class_indices = [i % 3] * batch_size if ci else None X, y = adverse_batch([dev[k] for k in train_index], word_index, gen_model, train_len, class_indices = class_indices) pred = ad_model.predict_on_batch(X)[0].flatten() loss = binary_crossentropy(y.flatten(), pred).eval() acc = sum(np.abs(y - pred) < 0.5) / float(len(y)) p.add(len(X),[('test_loss', loss), ('test_acc', acc)])
def vae_loss(y_true, y_pred): #Calculate loss = reconstruction loss + kl loss for each data in minibatch #E(log P(X|z)) # recon = K.sum(K.binary_crossentropy(y_pred, y_true), axis=1) recon = binary_crossentropy(y_true, y_pred) recon *= original_dim #D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist are Gaussian # kl = 0.5 * K.sum(K.exp(log_sigma) + K.square(mu) - 1. - log_sigma, axis=1) kl = 0.5 * K.sum(-1. - log_sigma + K.exp(log_sigma) + K.square(mu), axis=-1) loss = K.mean(kl + recon) # return recon + kl return loss
def __init__(sf, input_dim, y_dim, z_dim): # copy and paste sf.input_dim = input_dim sf.y_dim = y_dim sf.z_dim = z_dim # encoder sf.x = Input(shape=(input_dim,)) sf.enc_h_1 = Dense(500, activation='tanh', input_dim=input_dim)(sf.x) sf.enc_h_2 = Dense(200, activation='tanh')(sf.enc_h_1) sf.z_mean = Dense(z_dim)(sf.enc_h_2) sf.z_log_var = Dense(z_dim)(sf.enc_h_2) sf.y_probs = Dense(y_dim, activation='softmax')(sf.enc_h_2) sf.enc = Model(input=sf.x, output=[sf.z_mean, sf.z_log_var, sf.y_probs]) # sampling using reparameterization def sampling(args): mean, log_var = args epsilon = K.random_normal(shape=(z_dim,), mean=0, std=1) return mean + K.exp(log_var / 2) * epsilon sf.z = Lambda(function=sampling)([sf.z_mean, sf.z_log_var]) # decoder creating layers to be reused z_fc = Dense(200, activation='tanh', input_dim=z_dim) y_fc = Dense(200, activation='tanh', input_dim=y_dim) merge_layer = Merge([Sequential([z_fc]), Sequential([y_fc])], mode="concat", concat_axis=1) h_fc = Dense(1000, activation='tanh') dec_fc = Dense(input_dim, activation='sigmoid') sf.dec = Sequential([merge_layer, h_fc, dec_fc]) sf.z_h = z_fc(sf.z) sf.y_h = y_fc(sf.y_probs) sf.merged = merge([sf.z_h, sf.y_h], mode='concat', concat_axis=1) sf.dec_h =h_fc(sf.merged) sf.x_dec = dec_fc(sf.dec_h) # total model sf.vae = Model(input=sf.x, output=sf.x_dec) ''' Use a uniform for y_prior ''' sf.xent_loss = tf.reduce_mean(sf.input_dim * objectives.binary_crossentropy(sf.x, sf.x_dec)) sf.z_loss = - tf.reduce_mean(0.5 * K.sum(1 + sf.z_log_var - K.square(sf.z_mean) - K.exp(sf.z_log_var), axis=-1)) # omit the constant term sf.y_loss = tf.reduce_mean(10*K.sum(sf.y_probs * K.log(sf.y_probs * sf.y_dim), axis=-1)) sf.loss = sf.xent_loss + sf.z_loss + sf.y_loss
def gmm_loss(y_true, y_pred): """ GMM loss function. Assumes that y_pred has (D+2)*M dimensions and y_true has D dimensions. The first M*D features are treated as means, the next M features as standard devs and the last M features as mixture components of the GMM. """ def loss(m, M, D, y_true, y_pred): mu = y_pred[:, D*m:(m+1)*D] sigma = y_pred[:, D*M+m] alpha = y_pred[:, (D+1)*M+m] return (alpha/sigma) * T.exp(-T.sum(T.sqr(mu-y_true), -1)/(2*sigma**2)) D = T.shape(y_true)[1] - 1 M = (T.shape(y_pred)[1] - 1)/(D+2) seq = T.arange(M) result, _ = theano.scan(fn=loss, outputs_info=None, sequences=seq, non_sequences=[M, D, y_true[:, :-1], y_pred[:, :-1]]) # add loss for vuv bit vuv_loss = binary_crossentropy(y_true[:, -1], y_pred[:, -1]) # vuv_loss = 0 return -T.log(result.sum(0) + 1e-7) - vuv_loss
def vae_loss(x, x_decoded_mean): xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
def vae_loss(x, x_decoded_mean): xent_loss = original_dim * binary_crossentropy(x, x_decoded_mean) kl_loss = -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) # axis=-1 : last axis ( average by latent_dim axis ) return K.mean(xent_loss + kl_loss) # mean with batch size dim
def vae_loss(y_true, y_pre): xent_loss = objectives.binary_crossentropy(y_true, y_pre) kl_loss = -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) return xent_loss + kl_loss
def vae_loss(input_img, output_img): reconstruction_loss = objectives.binary_crossentropy( input_img.flatten(), output_img.flatten()) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return reconstruction_loss + beta * kl_loss
def vae_loss(x, x_decoded_mean): xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1) return xent_loss + kl_loss
def loss(x, out): entropy = image_len * objectives.binary_crossentropy(x, out) KL_div = -0.5 * K.sum(1. + log_var - K.square(mean) - K.exp(log_var), axis=1) return entropy + KL_div
def cost(x, output): cost_kl = -0.5 * K.sum(1 + K.log(K.square(z_std)) - K.square(z_mean) - K.square(z_std), axis=-1) cost_ce = objectives.binary_crossentropy(x, output) * image_size # Keras example multiplies with image_size return cost_kl + cost_ce
def vae_loss(vae_input, vae_output): xent_loss = Image_Dim * objectives.binary_crossentropy( vae_input, vae_output) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
z = Lambda(sampling, output_shape=(2, ))([mu, log_var]) decoder = Dense(1024)(z) decoder = Dense(128 * 7 * 7, activation='tanh')(decoder) decoder = BatchNormalization()(decoder) decoder = Reshape((7, 7, 128), input_shape=(128 * 7 * 7, ))(decoder) decoder = Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same')(decoder) decoder = LeakyReLU(alpha=0.2)(decoder) decoder = Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(decoder) decoder = LeakyReLU(alpha=0.2)(decoder) decoder_output = Conv2D(1, (5, 5), padding='same', activation='sigmoid')(decoder) for number in range(0, 10): X, y = get_mnist_data(number) upper = int(X.shape[0] / batch_size) * batch_size reconstruction_loss = objectives.binary_crossentropy( K.flatten(first), K.flatten(decoder_output)) * X.shape[0] kl_loss = 0.5 * K.sum(K.square(mu) + K.exp(log_var) - log_var - 1, axis=-1) vae_loss = reconstruction_loss + kl_loss # build model vae = Model(first, decoder_output) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') # vae.summary() vae.fit(X[:upper], shuffle=True, epochs=20, batch_size=batch_size) vae.save("VAE" + str(number), True) generator_input = np.random.uniform(-1, 1, (batch_size, 28, 28, 1)) images = vae.predict(generator_input, verbose=1) for i in range(0, 10): # print(images.shape) image = change_into_image(images[i])
def vae_loss(self, x, x_decoded_mean): xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = - 0.5 * K.mean( \ 1 + self.z_log_sigma - K.square(self.z_mean) - K.exp(self.z_log_sigma), axis=-1) return xent_loss + kl_loss
def vae_loss(x, x_decoded_mean): xent_loss = binary_crossentropy(x, x_decoded_mean) kl_loss = -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) return xent_loss + kl_loss
def vae_loss(x, x_decoded): xent_loss = length * objectives.binary_crossentropy(x, x_decoded) kl_loss = -0.5 * K.mean( 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
def vae_loss(x, x_decoded_mean): x = K.flatten(x) x_decoded_mean = K.flatten(x_decoded_mean) xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) return xent_loss + kl_loss
x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) filters *= 2 outputs = Conv2DTranspose(filters=3, kernel_size=kernel_size, activation='sigmoid', padding='same', name='decoder_output')(x) decoder = Model(latent_inputs, outputs, name='decoder') output = decoder(encoder(inputs)[2]) cvae = Model(inputs, output, name='cvae') reconstruction_loss = objectives.binary_crossentropy( K.flatten(inputs), K.flatten(output)) reconstruction_loss *= image_size * image_size kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = -0.5 * K.sum(kl_loss, axis=-1) cvae_loss = K.mean( reconstruction_loss + 3 * kl_loss) # use disentangled VAE for better results (beta = 3) cvae.add_loss(cvae_loss) cvae.compile(optimizer='adam') cvae.fit(x=x_train_1, shuffle=True, epochs=150, batch_size=50) directory_recon = "epsilon_grid_search" if not os.path.exists(directory_recon): os.makedirs(directory_recon)
def vae_loss(x, x_decoded_mean): xent_loss = original_dim * objectives.binary_crossentropy( x, x_decoded_mean) kl_loss = -K.mean(K.sum(z_logalpha, axis=-1)) return xent_loss + beta * kl_loss
def cross_entropy(y_true, y_pred): loss = tf.reduce_mean(binary_crossentropy(y_true, y_pred)) return loss
def label_loss_fn(self, x, y): self.loss_results[self.label_out] = objectives.binary_crossentropy( x, y) return self.loss_results[self.label_out]
def vae_loss(x, x_bar): reconst_loss = original_dim2 * objectives.binary_crossentropy(x, x_bar) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return reconst_loss + kl_loss
def d_loss(y_true, y_pred): L = objectives.binary_crossentropy(K.batch_flatten(y_true), K.batch_flatten(y_pred)) # L = objectives.mean_squared_error(K.batch_flatten(y_true), # K.batch_flatten(y_pred)) return L
def loss(self,inputs, outputs): kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1) kl_loss2 = - 0.5 * K.sum(1 + self.z_log_var2 - K.square(self.z_mean2) - K.exp(self.z_log_var2), axis=-1) xent_loss = self.input_size * objectives.binary_crossentropy(self.x, self.x_hat) return xent_loss + kl_loss+kl_loss2
def vae_loss(x, x_decoded_mean): xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) # kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1) return K.mean(xent_loss + kl_loss)
en_mean, log_var = args eps = K.random_normal(shape=(batch_size, z_dim), mean=0., stddev=1.0) return en_mean + K.exp(log_var) * eps z = Lambda(sampling, output_shape=(z_dim,))([en_mean, log_var]) decoder = Dense(x_tr.shape[1], activation='sigmoid') z_decoded = Dense(128, activation='relu')(z) z_decoded = Dense(256, activation='relu')(z_decoded) z_decoded = Dense(512, activation='relu')(z_decoded) z_decoded = Dense(1024, activation='relu')(z_decoded) y = decoder(z_decoded) # loss reconstruction_loss = objectives.binary_crossentropy(x, y) * x_tr.shape[1] kl_loss = 0.5 * K.sum(K.square(en_mean) + K.exp(log_var) - log_var - 1, axis = -1) vae_loss = reconstruction_loss + kl_loss # build model VAE = Model(x, y) VAE.add_loss(vae_loss) VAE.compile(optimizer='rmsprop') VAE.summary() plot_model(VAE, to_file='VAE_plot.png', show_shapes=True, show_layer_names=False) size = (int(x_tr.shape[0]/batch_size)) * batch_size VAE.fit(x_tr[:size], shuffle=True, epochs=n_epoch, batch_size=batch_size,
def seg_loss_no_weight(y_true, y_pred): y_true_flat = K.batch_flatten(y_true) y_pred_flat = K.batch_flatten(y_pred) return objectives.binary_crossentropy(y_true_flat, y_pred_flat)
z_decoded = z_decoder2(z_decoded) z_decoded = z_decoder3(z_decoded) z_decoded = z_decoder4(z_decoded) z_decoded = z_decoder5(z_decoded) z_decoded = z_decoder6(z_decoded) z_decoded = z_decoder7(z_decoded) y = z_decoder8(z_decoded) vae = Model(x, y) print "--->", X_train.shape[0:], y.shape, x.shape, "----->", X_train.shape[ 1] #def vae_loss(x, x_decoded_mean): x1 = K.flatten(x) y1 = K.flatten(y) print "shape", x.shape, x1.shape, "-", y.shape, y1.shape xent_loss = nparts * objectives.binary_crossentropy(x1, y1) kl_loss = -0.5 * K.mean(1 + log_var - K.square(mu) - K.exp(log_var), axis=-1) vae_loss = xent_loss + kl_loss vae.add_loss(vae_loss) vae.compile(optimizer='Adam') #,loss=[vae_loss],metrics=['accuracy']) vae.summary() vae.fit(X_train, shuffle=True, batch_size=batch_size, epochs=30, validation_data=(X_test, None), verbose=1) model_json = vae.to_json()
def generator_loss(x, x_decoded_mean): xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean) kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return xent_loss + kl_loss
def cost(x, output): cost_ce = objectives.binary_crossentropy(x, output) * image_size # Keras example multiplies with image_size return cost_ce
def vae_loss(a, ap): a_flat = K.batch_flatten(a) ap_flat = K.batch_flatten(ap) L_atoa = objectives.binary_crossentropy(a_flat, ap_flat) return 100 * L_atoa
def dummy_loss(y_true, y_pred): xent_loss = objectives.binary_crossentropy(K.flatten(y_true), K.flatten(y_pred)) return K.mean(xent_loss)
def d_loss(y_true, y_pred): L = objectives.binary_crossentropy(K.batch_flatten(y_true), K.batch_flatten(y_pred)) return L
def ae_loss(x, x_decoded_mean): x = K.flatten(x) x_decoded_mean = K.flatten(x_decoded_mean) loss = max_length * objectives.binary_crossentropy( x, x_decoded_mean) return loss
def class_loss_cls(y_true, y_pred): return lambda_cls_class * K.mean( binary_crossentropy(y_true[0, :, :], y_pred[0, :, :]) ) #if GT's contain mpre classes change to binary to categorical_crossentropy
def mse_crossentropy(y_true, y_pred): vuv_loss = binary_crossentropy(y_true[:, -1], y_pred[:, -1]) return mse(y_true[:, :-1], y_pred[:, :-1]) * vuv_loss
# name='AvgPool_4')(conv5) # drop5 = Dropout(0.5)(pool5) # ### # conv6 = Conv2D(num_filters,[filter_height2,filter_width],activation='relu', \ # kernel_regularizer='l2',padding='valid',name='conv_2')(drop5) # leak6 = LeakyReLU(alpha=.001)(conv6) # pool6 = AveragePooling2D((1,pool_size),strides=(1,pool_stride),padding='valid', \ # name='AvgPool_4')(conv6) # drop6 = Dropout(0.5)(pool6) flat = Flatten()(drop3) FC = Dense(50, activation='relu', name='representation')(flat) preds = Dense(num_GOterms, activation='sigmoid')(FC) # loss function loss = tf.reduce_mean(binary_crossentropy(labels, preds)) # loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,logits=preds)) # gradient descent optimizer (Adam) train_step = tf.train.AdamOptimizer(learning_rate=learning_rate). \ minimize(loss) # # one match accuracy # onematch_pred = tf.equal(tf.argmax(tf.multiply(labels,preds),axis=-1), \ # tf.argmax(preds,axis=-1)) # onematch = tf.reduce_mean(tf.cast(onematch_pred, tf.float32)) # exact match accuracy match = tf.equal(float(num_GOterms),\ tf.reduce_sum(tf.cast(tf.equal(labels,tf.round(preds)),tf.float32),axis=1)) exactmatch = tf.reduce_mean(tf.cast(match, tf.float32))
def __init__(self, params, mask_zero=True): # input words self.wds = tf.placeholder(tf.float32, [None, params['words']['dim']], name='words') # input pos self.pos = tf.placeholder(tf.float32, [None, params['pos']['dim']], name='pos') # output Y0 self.Y0 = tf.placeholder(tf.float32, [None, params['Y0']['dim']], name='Y0') # output Y1 self.Y1 = tf.placeholder(tf.float32, [None, params['Y1']['dim']], name='Y1') # 1.base layers: embedding wd_embedding = Embedding(output_dim=params['embed_size'], input_dim=params['voc_size'], input_length=params['words']['dim'], mask_zero=mask_zero, name='wd_embedding')(self.wds) # wd_embedding = BatchNormalization(momentum=0.9, name='wd_embedding_BN')(wd_embedding) pos_embedding = Embedding(output_dim=params['embed_size'], input_dim=params['pos_size'], input_length=params['pos']['dim'], mask_zero=mask_zero, name='pos_embedding')(self.pos) # pos_embedding = BatchNormalization(momentum=0.9, name='pos_embedding_BN')(pos_embeding) # 2. semantic layers: Bidirectional GRU wd_Bi_GRU = Bidirectional(GRU( params['words']['RNN']['cell'], dropout=params['words']['RNN']['drop_out'], recurrent_dropout=params['words']['RNN']['rnn_drop_out']), merge_mode='concat', name='word_Bi_GRU')(wd_embedding) if 'batch_norm' in params['words']['RNN']: wd_Bi_GRU = BatchNormalization( momentum=params['words']['RNN']['batch_norm'], name='word_Bi_GRU_BN')(wd_Bi_GRU) pos_Bi_GRU = Bidirectional(GRU( params['pos']['RNN']['cell'], dropout=params['pos']['RNN']['drop_out'], recurrent_dropout=params['pos']['RNN']['rnn_drop_out']), merge_mode='concat', name='word_Bi_GRU')(pos_embedding) if 'batch_norm' in params['pos']['RNN']: pos_Bi_GRU = BatchNormalization( momentum=params['pos']['RNN']['batch_norm'], name='pos_Bi_GRU_BN')(pos_Bi_GRU) # use pos as attention attention_probs = Dense(2 * params['pos']['RNN']['cell'], activation='softmax', name='attention_vec')(pos_Bi_GRU) attention_mul = multiply([wd_Bi_GRU, attention_probs], name='attention_mul') # ATTENTION PART FINISHES HERE # 3. middle layer for predict Y0 kwargs = params['Y0']['kwargs'] if 'kwargs' in params['Y0'] else {} if 'W_regularizer' in kwargs: kwargs['W_regularizer'] = l2(kwargs['W_regularizer']) self.Y0_probs = Dense( params['Y0']['dim'], # activation='softmax', name='Y0_probs', bias_regularizer=l2(0.01), **kwargs)(pos_Bi_GRU) # batch_norm if 'batch_norm' in params['Y0']: self.Y0_probs = BatchNormalization(**params['Y0']['batch_norm'])( self.Y0_probs) self.Y0_probs = Activation(params['Y0']['activate_func'])( self.Y0_probs) if 'activity_reg' in params['Y0']: self.Y0_probs = ActivityRegularization( name='Y0_activity_reg', **params['Y0']['activity_reg'])(self.Y0_probs) # 4. upper hidden layers # Firstly, learn a hidden layer from Bi_GRU # Secondly, consider Y0_preds as middle feature and combine it with hidden layer combine_layer = concatenate([self.Y0_probs, attention_mul], axis=-1, name='combine_layer') hidden_layer = Dense(params['H']['dim'], name='hidden_layer')(combine_layer) if 'batch_norm' in params['H']: hidden_layer = BatchNormalization( momentum=0.9, name='hidden_layer_BN')(hidden_layer) hidden_layer = Activation('relu')(hidden_layer) if 'drop_out' in params['H']: hidden_layer = Dropout(params['H']['drop_out'], name='hidden_layer_dropout')(hidden_layer) # 5. layer for predict Y1 kwargs = params['Y1']['kwargs'] if 'kwargs' in params['Y1'] else {} if 'W_regularizer' in kwargs: kwargs['W_regularizer'] = l2(kwargs['W_regularizer']) self.Y1_probs = Dense( params['Y1']['dim'], # activation='softmax', name='Y1_probs', bias_regularizer=l2(0.01), **kwargs)(hidden_layer) # batch_norm if 'batch_norm' in params['Y1']: self.Y1_probs = BatchNormalization(**params['Y1']['batch_norm'])( self.Y1_probs) self.Y1_probs = Activation(params['Y1']['activate_func'])( self.Y1_probs) if 'activity_reg' in params['Y1']: self.Y1_probs = ActivityRegularization( name='Y1_activity_reg', **params['Y1']['activity_reg'])(self.Y1_probs) # 6. Calculate loss with tf.name_scope('loss'): Y0_loss = tf.reduce_mean(binary_crossentropy( self.Y0, self.Y0_probs), name='Y0_loss') Y1_loss = tf.reduce_mean(binary_crossentropy( self.Y1, self.Y1_probs), name='Y1_loss') self.loss = tf.add_n([Y0_loss, Y1_loss], name='loss') self.train_op = tf.train.RMSPropOptimizer( params['learning_rate']).minimize(self.loss)
def build_model(): inputs = tf.placeholder(tf.float32, shape=[None, input_dim]) tf.summary.histogram('inputs', inputs) with tf.name_scope('attention_layer'): # ATTENTION PART STARTS HERE with tf.name_scope('weights'): attention_probs = Dense(input_dim, activation='softmax', name='attention_vec')(inputs) variable_summaries(attention_probs) with tf.name_scope('inputs_weighted'): attention_mul = multiply([inputs, attention_probs]) variable_summaries(attention_mul) # ATTENTION PART FINISHES HERE attention_mul = Dense(64)(attention_mul) with tf.name_scope('predictions'): preds = Dense(1, activation='sigmoid')(attention_mul) tf.summary.histogram('preds', preds) labels = tf.placeholder(tf.float32, shape=[None, 1]) loss = tf.reduce_mean(binary_crossentropy(labels, preds)) tf.summary.scalar('loss', loss) acc_value = tf.reduce_mean(binary_accuracy(labels, preds)) tf.summary.scalar('accuracy', acc_value) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss) merged = tf.summary.merge_all() # run model with tf.Session() as sess: train_writer = tf.summary.FileWriter('../docs/train', sess.graph) test_writer = tf.summary.FileWriter('../docs/test') # initializers init_op = tf.global_variables_initializer() sess.run(init_op) # feed data to training number_of_training_data = len(outputs) batch_size = 20 for epoch in range(10): for start, end in zip( range(0, number_of_training_data, batch_size), range(batch_size, number_of_training_data, batch_size)): # _, trn_loss = sess.run( # [train_step, loss], # feed_dict={ # inputs: inputs_1[start:end], # labels: outputs[start:end], # K.learning_phase(): 1 # }) if start % 10 == 0: summary = sess.run(merged, feed_dict={ inputs: inputs_1, labels: outputs, K.learning_phase(): 0 }) test_writer.add_summary(summary, start / 10) else: summary, _ = sess.run( [merged, train_step], feed_dict={ inputs: inputs_1[start:end], labels: outputs[start:end], K.learning_phase(): 1 }) train_writer.add_summary(summary, start / 10)
def vae_loss(x_, x_reconstruct): rec_loss = binary_crossentropy(x_, x_reconstruct) kl_loss = - 0.5 * K.mean(1 + 2*K.log(z_std + 1e-10) - z_mean**2 - z_std**2, axis=-1) return rec_loss + kl_loss
def custom_loss(y_true, y_pred): bottle=y_pred[:,:10] pred = y_pred[:,10:794] Sb = y_pred[:,794:] return objectives.binary_crossentropy(y_true, pred) + l*objectives.binary_crossentropy(bottle, Sb)