def loss(y_true, y_pred): # TODO: Replace this with actual log likelihood to better fit VAE # formalism. Will also have to rescale the KL loss (which I think I # multiplied by two before). likelihood_loss = categorical_crossentropy(y_true, y_pred) return K.sum(likelihood_loss, axis=-1) + kl_loss
def lstm_vae(sq_len,x_dim,h_dim,z_dim,b_size): x = Input(shape=(None,x_dim)) h = LSTM(h_dim)(x) z_m,z_lv = Dense(z_dim)(h),Dense(z_dim)(h) sampling = lambda args: args[0]+K.exp(args[1]/2)*K.random_normal(shape=(b_size,z_dim),mean=0.,stddev=1.) z = Lambda(sampling,output_shape=(z_dim,))([z_m,z_lv]) reweight = Dense(h_dim,activation='linear') z = reweight(z) dx = Input(shape=(None,x_dim)) # teacher forcing decoder_h = LSTM(h_dim,return_sequences=True,return_state=True) decoder_o = TimeDistributed(Dense(x_dim,activation='softmax')) dh,_,_ = decoder_h(dx,initial_state=[z,z]) do = decoder_o(dh) vae = Model([x,dx],do) enc = Model(x,[z_m,z_lv]) ds = Input(shape=(z_dim,)) _z = reweight(ds) _dh,_rh,_rc = decoder_h(dx,initial_state=[_z,_z]) _do = decoder_o(_dh) gen = Model([dx,ds],[_do,_rh,_rc]) drh = Input(shape=(h_dim,)) drc = Input(shape=(h_dim,)) __dh,__rh,__rc = decoder_h(dx,initial_state=[drh,drc]) __do = decoder_o(__dh) step = Model([dx,drh,drc],[__do,__rh,__rc]) loss = lambda x,do:objectives.categorical_crossentropy(x,do)-0.5*K.mean(1+z_lv-K.square(z_m)-K.exp(z_lv)) vae.compile(optimizer='adam',loss=loss) vae.summary() return vae, enc, gen, step
def cls_loss_det(y_true, y_pred): """ Loss function for the object classification output of the Fast R-CNN module. :param num_classes: positive integer, the number of object classes used NOT including background. :return: tensor for the category cross entry loss. """ return K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))
def train(): global_step = tf.Variable(0, trainable=False) img = tf.placeholder(tf.float32, shape=(None, 120, 160, 3)) lbs = tf.placeholder(tf.float32, shape=(None, 10)) preds = cnn_model.load_model(img) loss = tf.reduce_mean(categorical_crossentropy(lbs, preds)) tf.scalar_summary('loss', loss) lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, cnn_input.decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) tf.scalar_summary('learning_rate', lr) train_step = tf.train.GradientDescentOptimizer(lr).minimize(loss, global_step=global_step) sess = tf.Session() K.set_session(sess) init = tf.initialize_all_variables() sess.run(init) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) with sess.as_default(): train_data = cnn_input.load_train_data() for epoch in cnn_input.epochs(train_data): for batch in cnn_input.batches(epoch): train_step.run(feed_dict={img: batch[0], lbs: batch[1], K.learning_phase(): 1})
def classifier(learning_rate, use_dropout): """Builds (but does not execute) a TensorFlow graph for image classification on this dataset. Arguments: learning_rate - Learning rate to use use_dropout - Boolean for whether this network should use dropout Returns: model -- Output of tf.global_variables_initializer() train_op -- TensorFlow node for optimizing this network accuracy -- Node giving the accuracy score on current batch x -- Node for input placeholder variable y -- Node for label placeholder variable """ # I've started from a Keras model, rather than build a TensorFlow # one, but in order to get certain underlying TensorFlow nodes I # must compile it first, even if I don't use the Keras optimizer: keras_model = get_keras_model(use_dropout=use_dropout) model = tf.global_variables_initializer() sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True) keras_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) accuracy = keras_model.metrics_tensors[0] x = keras_model.inputs[0] y = keras_model.targets[0] predict = keras_model.outputs[0] loss = tf.reduce_mean(categorical_crossentropy(y, predict)) train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) return (model, train_op, accuracy, x, y)
def vae_loss(self, x, x_pred): xent = categorical_crossentropy(x, x_pred) # cur_seq_len = K.shape(x)[1] KL = self.KL_div() KL_repeated = K.repeat_elements(K.reshape(KL, shape=(K.shape(KL)[0], 1)), rep=self.m, axis=-1) return xent + KL_repeated
def build_graph(self): tf.reset_default_graph() with self.graph.as_default(): with self.sess: # Input images self.images = tf.placeholder(shape=[ None, self.config.image_size, self.config.image_size, self.config.channels ], dtype=tf.float32, name='Images') # Input labels that represent the real outputs self.labels = tf.placeholder(shape=[None, 2], dtype=tf.float32, name='Labels') self.model = self.init_model(self.images) self.loss = tf.reduce_mean( categorical_crossentropy(self.labels, self.model)) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss) correct_prediction = tf.equal(tf.argmax(self.labels, 1), tf.argmax(self.model, 1)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) self.init = tf.global_variables_initializer() self.saver = tf.train.Saver(tf.trainable_variables())
def class_loss_cls(y_true, y_pred): ''' Classification loss function for Classifier Purely cross-entropy loss ''' return LAMBDA_CLS_CLASS * K.mean( categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))
def digit_classification(): sess = tf.Session() K.set_session(sess) img = tf.placeholder(tf.float32, shape=(None, 784)) labels = tf.placeholder(tf.float32, shape=(None, 10)) x = Dense(128, activation='relu')(img) x = Dropout(0.5)(x) x = Dense(128, activation='relu')(x) x = Dropout(0.5)(x) preds = Dense(10, activation='softmax')(x) loss = tf.reduce_mean(categorical_crossentropy(labels, preds)) mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss) with sess.as_default(): for i in range(100): batch = mnist_data.train.next_batch(50) train_step.run(feed_dict={ img: batch[0], labels: batch[1], K.learning_phase(): 1 }) acc_value = accuracy(labels, preds) with sess.as_default(): print acc_value.eval( feed_dict={ img: mnist_data.test.images, labels: mnist_data.test.labels, K.learning_phase(): 0 })
def class_loss_cls(y_true, y_pred): """Calculate loss for classification task in classifier.""" lbda_cls_class = LossesCalculator.lambda_cls_class mean = K.mean( categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :])) return lbda_cls_class * mean
def head_loss_cls(y_true, y_pred): ''' :param y_true: :param y_pred: :return: ''' return lambda_cls_class * K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))
def final_cls_loss(y_true, y_pred): ''' 计算整个网络最后的分类层对应的损失,直接使用softmax对应的多分类损失函数 :param y_true: :param y_pred: :return: ''' return lambda_cls_class * K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))
def dae_loss(input_phono,phono_decoded): mse_loss_phono = objectives.mse(input_phono, phono_decoded) ent_loss_concept = objectives.categorical_crossentropy(input_concept, concept_decoded) return ( mse_loss_phono + ent_loss_concept )
def vae_loss(x, x_decoded_mean): #orig xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) xent_loss = K.mean( objectives.categorical_crossentropy( x, x_decoded_mean)) #*X_seq.shape[1] kl_loss = -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) #Extra K.mean added to use with sequential input shape return kl_loss + xent_loss
def vae_loss(input_phono,phono_decoded): mse_loss_phono = objectives.mse(input_phono, phono_decoded) ent_loss_concept = objectives.categorical_crossentropy(input_concept, concept_decoded) kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1) return ( mse_loss_phono + ent_loss_concept + kl_loss )
def _vae_loss(self, x, x_decoded_mean, **kwarg): zm, zlv = self.z_mean, self.z_log_var # Original Church code has a prefactor of Lq, which does not appear # in the Kingman & welling VAE formulation and leads to non-unit var. # It may have been copied from the fchollet example code #Lq = self.L*self.q #xent_loss = Lq * categorical_crossentropy(x, x_decoded_mean) xent_loss = categorical_crossentropy(x, x_decoded_mean) kl_loss = 0.5 * K.sum(1 + zlv - K.square(zm) - K.exp(zlv), axis=-1) return xent_loss - kl_loss
def entropy(self): es = [] for i, (low, high) in enumerate( zip(self.action_space.low, self.action_space.high)): if low + 1 == high: e = binary_crossentropy(self.ps[i], self.ps[i]) else: e = categorical_crossentropy(self.ps[i], self.ps[i]) es.append(e) return maybe_merge(es, mode="sum")
def class_loss_cls(y_true, y_pred): ''' Cross entropy loss in ROI classifier # Input | y_true: (1, N, K), binary class vector including background | y_pred: (1, N, K), binary class vector including background ''' return lambda_cls_class * K.mean( categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))
def bird_loss1(bird_true, y_pred_list): head_loss = head_l* bird_true[0][0]*categorical_crossentropy(bird_true[0][1:],y_pred_list[0]) legs_loss = legs_l * bird_true[1][0] * categorical_crossentropy(bird_true[1][1:], y_pred_list[1]) wings_loss = wings_l * bird_true[2][0] * categorical_crossentropy(bird_true[2][1:], y_pred_list[2]) back_loss = back_l * bird_true[3][0] * categorical_crossentropy(bird_true[3][1:], y_pred_list[3]) belly_loss = belly_l * bird_true[4][0] * categorical_crossentropy(bird_true[4][1:], y_pred_list[4]) breast_loss = breast_l * bird_true[5][0] * categorical_crossentropy(bird_true[5][1:], y_pred_list[5]) tail_loss = tail_l * bird_true[6][0] * categorical_crossentropy(bird_true[6][1:], y_pred_list[6]) return (head_loss+legs_loss+wings_loss+back_loss+belly_loss+breast_loss+tail_loss)/7
def __init__(self, network_shape, data_shape=(56, 56, 1), latent_shape=(33, 2), domain=False, use_latent=False): self.tau = K.variable(5.0) self.tau_min = 0.5 self.tau_decay = 5e-4 self.batch_size = 64 self.loss_func = binary_crossentropy self.loss_weight_func = K.variable(1.0) self.loss_weight_gumb = K.variable(1.0) self.loss_weight_zero = K.variable(1.0) self.latent_shape = latent_shape self.latent_units = latent_shape[0] * latent_shape[1] self.name = 'sae-' + str(network_shape) + str(latent_shape) # Build Network self.__net(data_shape, network_shape, domain) # Build Models self.autoencoder = Model( self.data_in, [self.data_out, self.latent_out ]) # Added the possibility to supply wanted encoding self.encoder = K.function( [self.data_in, K.learning_phase()], [self.latent_out]) self.decoder = K.function([ self.autoencoder.get_layer(name='decoder-0').input, K.learning_phase() ], [self.autoencoder.get_layer(name='final_output').output]) loss = lambda x, x_hat: self.loss_func( x, x_hat) * self.loss_weight_func + self.__lossGumbel( ) * self.loss_weight_gumb + self.__lossZero( ) * self.loss_weight_zero loss_supervised = lambda x, x_hat: categorical_crossentropy(x, x_hat) losses = { "final_output": loss, "latent_output": loss_supervised, } lossWeights = { "final_output": 1.0, "latent_output": 1.0 if use_latent else 0.0, } self.autoencoder.compile(optimizer='adam', loss=losses, loss_weights=lossWeights, metrics=[self.loss_func])
def vae_loss(x, x_decoded_mean): #orig xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) xent_loss = K.mean(objectives.categorical_crossentropy(x, x_decoded_mean)) kl_loss = - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) #Extra K.mean added to use with sequential input shape #orig kl_loss = - 0.5 * K.mean(K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)) #kl_loss = - 0.5 * K.mean(1 + tf.reshape(z_log_sigma,[-1]) - K.square(tf.reshape(z_mean,[-1])) - K.exp(tf.reshape(z_log_sigma,[-1])), axis=-1) #How to track the two losses in Tensorboard? #kl_loss weird !!!! return kl_loss + xent_loss
def vae_cdr3_loss(io_encoder, io_decoder): """ The loss function is the sum of the cross-entropy and KL divergence. KL gets a weight of beta. """ # Here we multiply by the number of sites, so that we have a # total loss across the sites rather than a mean loss. xent_loss = params['max_cdr3_len']* K.mean(objectives.categorical_crossentropy(io_encoder, io_decoder)) kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) kl_loss *= K.variable(params['beta']) return xent_loss + kl_loss
def vae_loss(input_phono,phono_decoded): mse_loss_phono = objectives.mse(input_phono, phono_decoded) ent_loss_concept = objectives.categorical_crossentropy(input_concept, concept_decoded) mse_loss_geo = objectives.mse(input_geo, geo_decoded) kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1) return ( mse_loss_phono + ent_loss_concept + kl_loss +mse_loss_geo )
def padded_categorical_crossentropy(y_true, y_pred): """Cross-entropy loss of a batch, ignoring padding. >>> true = tf.constant([0., 0., 1., 1., 0., 0.], shape=[1, 2, 3]) <1 example batch, 2 time steps, 3 tags: [2, 0 (padding)]> >>> pred = tf.constant([0, 0.5, 0.5, 0.4, 0.4, 0.2], shape=[1, 2, 3]) >>> padded_categorical_crossentropy(true, pred) # => [[ 0.69 0.]] """ full_loss = categorical_crossentropy(y_true, y_pred) padded = tf.squeeze(tf.slice(y_true, [0, 0, 0], [-1, -1, 1]), axis=2) mask = 1. - padded return full_loss * mask
def bayesian_loss(y_true, y_pred): ce = objectives.categorical_crossentropy(y_true, y_pred) kl = K.variable(0.0) for layer in model.layers: if type(layer) is Bayesian: mean = layer.mean log_stdev = layer.log_stdev #DKL_hidden = (1.0 + 2.0*W1_log_var - W1_mu**2.0 - T.exp(2.0*W1_log_var)).sum()/2.0 kl = kl + K.sum(1.0 + 2.0*log_stdev - mean**2.0 - K.exp(2.0*log_stdev))/2.0 #(mean**2)/2 + (K.exp(2*stdev) - 1 - 2*stdev)/2) return ce - kl/nb_batchs
def vae_loss(y_true, y_pred): xent_loss = K.mean(objectives.categorical_crossentropy(y_true, y_pred), axis=-1) kl_loss = K.mean(-z_log_var + 0.5 * K.square(z_mean) + K.exp(z_log_var) - 1, axis=-1) # # explicit kl # mu_loss = K.mean(0.5 * K.square(z_mean)) # var = K.std(z_mean) # var_loss = -K.log(var) + var - 1 # # decay kl loss by learning rate # lr, lr0 = self._get_learning_rate() # kl_loss *= (lr / lr0) return xent_loss + kl_loss # + mu_loss + var_loss
def bayesian_loss(y_true, y_pred): ce = objectives.categorical_crossentropy(y_true, y_pred) kl = K.variable(0.0) for layer in model.layers: if type(layer) is Bayesian: mean = layer.mean log_stdev = layer.log_stdev #DKL_hidden = (1.0 + 2.0*W1_log_var - W1_mu**2.0 - T.exp(2.0*W1_log_var)).sum()/2.0 kl = kl + K.sum(1.0 + 2.0 * log_stdev - mean**2.0 - K.exp(2.0 * log_stdev)) / 2.0 #(mean**2)/2 + (K.exp(2*stdev) - 1 - 2*stdev)/2) return ce - kl / nb_batchs
def custom_loss(y_true,y_pred): ''' Args: y_true: Ground Truth output y_pred: Predicted output The forms of these two vectors are: ###################### ## h1,h2,h3,...,h49 ## ###################### Returns: The loss caused by y_pred ''' loss = categorical_crossentropy(y_true, y_pred) loss = sum(loss) return loss
def __init__(self, input_dim, y_dim): self.inputs = tf.placeholder(tf.float32, shape=[None, input_dim]) attention_probs = Dense(input_dim, activation='softmax', name='attention_vec')(self.inputs) attention_mul = multiply([self.inputs, attention_probs]) attention_mul = Dense(64)(attention_mul) self.probs = Dense(y_dim, activation='sigmoid')(attention_mul) self.labels = tf.placeholder(tf.float32, shape=[None, y_dim]) self.loss = tf.reduce_mean( categorical_crossentropy(self.labels, self.probs)) # self.acc_value = tf.reduce_mean(categorical_accuracy(self.labels, preds)) self.train_step = tf.train.GradientDescentOptimizer(0.5).minimize( self.loss)
def patch_based_cnn_model(sess, dropout_prob=0.5, l_rate=0.5, n_classes=2): # Placeholders img = tf.placeholder(tf.float32, shape=(None, 101, 101, 3)) labels = tf.placeholder(tf.float32, shape=(None, 2)) # Layers convnet = Conv2D(80, 6, strides=1, padding='valid', activation=None, kernel_initializer='he_normal')(img) convnet = tf.nn.local_response_normalization(convnet) convnet = Activation('relu')(convnet) convnet = MaxPooling2D(pool_size=(2, 2), strides=2)(convnet) convnet = Conv2D(120, 5, strides=1, padding='valid', activation=None, kernel_initializer='he_normal')(convnet) convnet = tf.nn.local_response_normalization(convnet) convnet = Activation('relu')(convnet) convnet = MaxPooling2D(pool_size=(2, 2), strides=2)(convnet) convnet = Conv2D(160, 3, strides=1, padding='valid', activation=None, kernel_initializer='he_normal')(convnet) convnet = Conv2D(200, 3, strides=1, padding='valid', activation=None, kernel_initializer='he_normal')(convnet) convnet = MaxPooling2D(pool_size=(2, 2), strides=2)(convnet) convnet = tf.reshape(convnet, [-1, 9*9*200]) convnet = Dense(320, activation='relu')(convnet) convnet = Dropout(dropout_prob)(convnet) convnet = Dense(320, activation='relu')(convnet) convnet = Dropout(dropout_prob)(convnet) preds = Dense(n_classes, activation='linear')(convnet) preds = Activation('softmax')(preds) sess.run(tf.global_variables_initializer()) # loss funtion loss = tf.reduce_mean(categorical_crossentropy(labels, preds)) # Training operation train_step = tf.train.GradientDescentOptimizer(l_rate).minimize(loss) # Accurace metric accuracy_metric = tf.reduce_mean(accuracy(labels, preds)) return [preds, accuracy_metric, img, labels, train_step]
def patch_based_cnn_model(dropout_prob=0.5, l_rate=0.5, n_classes=2): # Placeholders img = tf.placeholder(tf.float32, shape=(None, 101, 101, 3)) labels = tf.placeholder(tf.float32, shape=(None, 2)) # Layers conv1 = Conv2D(80, 6, strides=1, padding='same', activation=None, kernel_initializer='he_normal')(img) conv1 = tf.nn.local_response_normalisation(conv1) # FIXME conv1 = Activation('relu')(conv1) conv1 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv1) conv2 = Conv2D(120, 5, strides=1, padding='same', activation=None, kernel_initializer='he_normal')(conv1) conv2 = tf.nn.local_response_normalisation(conv2) # FIXME conv2 = Activation('relu')(conv2) conv2 = MaxPooling2D(pool_size=(2, 2), strides=2)(conv2) conv3 = Conv2D(160, 3, strides=1, padding='same', activation=None, kernel_initializer='he_normal')(conv2) conv4 = Conv2D(200, 3, strides=1, padding='same', activation=None, kernel_initializer='he_normal')(conv3) conv4 = MaxPooling2D(pool_size=(3, 3), strides=2)(conv4) conv4_flatten = tf.reshape(conv4, [-1, 9*9*200]) dense1 = Dense(320, activation='relu')(conv4_flatten) dense1 = Dropout(dropout_prob)(dense1) dense2 = Dense(320, activation='relu')(dense1) dense2 = Dropout(dropout_prob)(dense2) preds = Dense(n_classes, activation='softmax')(dense2) pred_clas = tf.argmax(preds, axis=1) # loss funtion loss = tf.reduce_mean(categorical_crossentropy(labels, preds)) # Training operation train_step = tf.train.GradientDescentOptimizer(l_rate).minimize(loss) # Accurace metric acc_value = tf.reduce_mean(accuracy(labels, preds)) return [preds, pred_class, loss, train_step]
def loss(y_true, y_pred): KL_prior_posterior = K.variable(0.0) prior_log_std = K.variable(0.0, name="prior_log_std") # Variance prior for layer in model.layers: if type(layer) is Bayesian or \ type(layer) is OSBayesian or \ type(layer) is OSBayesianConvolution2D: mean = layer.mean log_std = layer.log_std KL_prior_posterior += K.sum(KL_standard_normal(mean, log_std, prior_log_std))/batch_size # Empirical Bayes (variance prior set using maximum likelihood) model.layers[-1].trainable_weights.append(prior_log_std) # Classification log_likelihood = -objectives.categorical_crossentropy(y_true, y_pred) # Regression # log_likelihood = K.sum(log_gaussian(y_true, y_pred, std_prior)) return K.sum(KL_prior_posterior/nb_batchs - log_likelihood)/batch_size
def crf_loss(y_true, y_pred): """General CRF loss function depending on the learning mode. # Arguments y_true: tensor with true targets. y_pred: tensor with predicted targets. # Returns If the CRF layer is being trained in the join mode, returns the negative log-likelihood. Otherwise returns the categorical crossentropy implemented by the underlying Keras backend. """ crf, idx = y_pred._keras_history[:2] if crf.learn_mode == 'join': return crf_nll(y_true, y_pred) else: if crf.sparse_target: return sparse_categorical_crossentropy(y_true, y_pred) else: return categorical_crossentropy(y_true, y_pred)
def tf_model(): sess = tf.Session() K.set_session(sess) print(K.learning_phase()) # This placeholder will contain our input digits, as flat vectors img = tf.placeholder(tf.float32, shape=(None,784)) # Keras layers can be called on TensorFlow tensors: x = Dense(128, activation='relu')(img) # fully-connected layer with 128 units and ReLU activation x = Dropout(0.5)(x) x = Dense(128, activation='relu')(x) x = Dropout(0.5)(x) preds = Dense(10, activation='softmax')(x) # output layer with 10 units and a softmax activation labels = tf.placeholder(tf.float32, shape=(None, 10)) loss = tf.reduce_mean(categorical_crossentropy(labels, preds)) mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss) # Initialize all variables init_op = tf.global_variables_initializer() sess.run(init_op) # Run training loop with sess.as_default(): for i in range(100): print(K.learning_phase()) batch = mnist_data.train.next_batch(50) train_step.run(feed_dict={img: batch[0], labels: batch[1], K.learning_phase(): 1}) acc_value = accuracy(labels, preds) with sess.as_default(): print(acc_value.eval(feed_dict={img:mnist_data.test.images, labels: mnist_data.test.labels}))
def loss(y_true, y_pred): KL_prior_posterior = K.variable(0.0) prior_log_std = K.variable(0.0, name="prior_log_std") # Variance prior for layer in model.layers: if type(layer) is Bayesian or \ type(layer) is PoorBayesian or \ type(layer) is PoorBayesianConvolution2D: mean = layer.mean log_std = layer.log_std KL_prior_posterior += K.sum( KL_standard_normal(mean, log_std, prior_log_std)) / batch_size # Empirical Bayes (variance prior set using maximum likelihood) model.layers[-1].trainable_weights.append(prior_log_std) # Classification log_likelihood = -objectives.categorical_crossentropy(y_true, y_pred) # Regression # log_likelihood = K.sum(log_gaussian(y_true, y_pred, std_prior)) return K.sum(KL_prior_posterior / nb_batchs - log_likelihood) / batch_size
num_threads=8) x_train_batch = tf.cast(x_train_batch, tf.float32) x_train_batch = tf.reshape(x_train_batch, shape=batch_shape) y_train_batch = tf.cast(y_train_batch, tf.int32) y_train_batch = tf.one_hot(y_train_batch, classes) x_batch_shape = x_train_batch.get_shape().as_list() y_batch_shape = y_train_batch.get_shape().as_list() x_train_input = layers.Input(tensor=x_train_batch, batch_shape=x_batch_shape) x_train_out = cnn_layers(x_train_input) train_model = Model(inputs=x_train_input, outputs=x_train_out) cce = objectives.categorical_crossentropy(y_train_batch, x_train_out) train_model.add_loss(cce) # Do not pass the loss directly to model.compile() # because it is not yet supported for Input Tensors. train_model.compile(optimizer='rmsprop', loss=None, metrics=['accuracy']) train_model.summary() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) train_model.fit(epochs=epochs, steps_per_epoch=steps_per_epoch) train_model.save_weights('saved_wt.h5')
def policy_gradient_loss(l_sampled, l_predicted): return A * categorical_crossentropy(l_sampled, l_predicted)[:, np.newaxis]
def policy_gradient_loss(l_sampled, l_predicted): return K.mean(A * categorical_crossentropy(l_sampled, l_predicted), axis=1) \ - 0.01 * K.mean(categorical_crossentropy(l_predicted, l_predicted), axis=1)
from keras.layers import Dense mnist_data = input_data.read_data_sets('tmp_MNIST_data', one_hot=True) # Now let's get started with our MNIST model. We can start building a classifier exactly as you would do in TensorFlow: # this placeholder will contain our input digits, as flat vectors img = tf.placeholder(tf.float32, shape=(None, 784)) # Keras layers can be called on TensorFlow tensors: x = Dense(128, activation='relu')(img) # fully-connected layer with 128 units and ReLU activation x = Dense(128, activation='relu')(x) preds = Dense(10, activation='softmax')(x) # output layer with 10 units and a softmax activation # We define the placeholder for the labels, and the loss function we will use: labels = tf.placeholder(tf.float32, shape=(None, 10)) loss = tf.reduce_mean(categorical_crossentropy(labels, preds)) # optimizer train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss) with tf.Session() as sess: K.set_session(sess) # train sess.run(tf.global_variables_initializer()) for i in range(1001): batch = mnist_data.train.next_batch(100) train_step.run(feed_dict={img: batch[0], labels: batch[1]}) # evaluate acc_value = accuracy(labels, preds) print( acc_value.eval(feed_dict={img: mnist_data.test.images, labels: mnist_data.test.labels}) )
# for k in range(f.attrs['nb_layers']): # if k >= len(model.layers): # # we don't look at the last (fully-connected) layers in the savefile # break # g = f['layer_{}'.format(k)] # weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] # model.layers[k].set_weights(weights) # f.close() # print('Model loaded.') # get the symbolic outputs of each "key" layer (we gave them unique names). outputs_dict = dict([(layer.name, layer.get_output()) for layer in model.layers]) output = model.get_output() desired_output = np_utils.to_categorical([imagenet_class], 1000) loss = categorical_crossentropy(desired_output, output) # get the gradients of the generated image wrt the loss grads = K.gradients(loss, input_image) outputs = [loss] if type(grads) in {list, tuple}: outputs += grads else: outputs.append(grads) f_outputs = K.function([input_image], outputs) f_class_output = K.function([input_image], output) def eval_loss_and_grads(x): x = x.reshape((1, 3, img_width, img_height)) outs = f_outputs([x])
def class_loss_cls(y_true, y_pred): return lambda_cls_class * K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :]))