Beispiel #1
0
    def on_epoch_end(self, epoch, logs={}):
        logs['lr'] = K.get_value(self.model.optimizer.lr)
        current = logs.get(self.monitor)
        if current is None:
            warnings.warn('Learning Rate Plateau Reducing requires %s available!' %
                          self.monitor, RuntimeWarning)
        else:
            if self.in_cooldown():
                self.cooldown_counter -= 1
                self.wait = 0

            if self.monitor_op(current, self.best):
                self.best = current
                self.wait = 0
            elif not self.in_cooldown():
                if self.wait >= self.patience:
                    old_lr = float(K.get_value(self.model.optimizer.lr))
                    if old_lr > self.min_lr + self.lr_epsilon:
                        new_lr = old_lr * self.factor
                        new_lr = max(new_lr, self.min_lr)
                        K.set_value(self.model.optimizer.lr, new_lr)
                        if self.verbose > 0:
                            print('\nEpoch %05d: reducing learning rate to %s.' % (epoch, new_lr))
                        self.cooldown_counter = self.cooldown
                        self.wait = 0
                self.wait += 1
Beispiel #2
0
 def train_step(self, optimizer):
     """
     One Network Tranning step.
     """
     opt = self.model.optimizer
     K.set_value(opt.lr, optimizer["lr"])
     K.set_value(opt.momentum, optimizer["momentum"])
    def reset_states(self):
        assert self.stateful, 'Layer must be stateful.'
        input_shape = self.input_shape
        if not input_shape[0]:
            raise Exception('If a RNN is stateful, a complete ' +
                            'input_shape must be provided ' +
                            '(including batch size).')

        if self.return_sequences:
            out_row, out_col, out_filter = self.output_shape[2:]
        else:
            out_row, out_col, out_filter = self.output_shape[1:]

        if hasattr(self, 'states'):
            K.set_value(self.states[0],
                        np.zeros((input_shape[0],
                                  out_row, out_col, out_filter)))
            K.set_value(self.states[1],
                        np.zeros((input_shape[0],
                                  out_row, out_col, out_filter)))
        else:
            self.states = [K.zeros((input_shape[0],
                                    out_row, out_col, out_filter)),
                           K.zeros((input_shape[0],
                                    out_row, out_col, out_filter))]
    def on_batch_begin(self, batch, logs={}):
        open_all_gates()

        rands = np.random.uniform(size=len(add_tables))
        for t, rand in zip(add_tables, rands):
            if rand < K.get_value(t["death_rate"]):
                K.set_value(t["gate"], 0)
Beispiel #5
0
	def train(self, model, data):
		""" Fits the given model on a batch of data.
		"""
		kur_optimizer = model.compiled['train']['kur_optimizer']
		if kur_optimizer.scale_rate:
			if kur_optimizer.scale_rate in data:
				import keras.backend as K		# pylint: disable=import-error
				factor = numpy.mean(data[kur_optimizer.scale_rate])
				if kur_optimizer.scale_mode == 'sqrt':
					factor = factor ** 0.5
				keras_optimizer = kur_optimizer.optimizer
				K.set_value(
					keras_optimizer.lr,
					K.get_value(keras_optimizer.lr) * factor
				)
				result = self.run_batch(model, data, 'train', True)
				K.set_value(
					keras_optimizer.lr,
					K.get_value(keras_optimizer.lr) / factor
				)
				return result
			else:
				logger.warning('The optimizer "scale_rate" was specified, but '
					'no such data column was found: %s. Ignoring this.',
					kur_optimizer.scale_rate)
		return self.run_batch(model, data, 'train', True)
    def reduce_lr(self, current_nb):
        if self.reduction_function == 'linear':
            new_rate = self.reduce_rate
        elif self.reduction_function == 'exponential':
            new_rate = np.power(self.exp_base,
                                current_nb / self.half_life) * self.reduce_rate
        elif self.reduction_function == 'noam':
            new_rate = np.float32(min(float(current_nb) ** self.exp_base,
                                      float(
                                          current_nb) * self.half_life ** self.warmup_exp))

        else:
            raise NotImplementedError(
                'The decay function %s is not implemented.' % str(
                    self.reduction_function))

        if self.reduction_function == 'noam':
            lr = self.initial_lr
        else:
            lr = K.get_value(self.model.optimizer.lr)
        self.new_lr = np.maximum(np.float32(lr * new_rate), self.min_lr)
        K.set_value(self.model.optimizer.lr, self.new_lr)

        if self.reduce_each_epochs and self.verbose > 0:
            logging.info("LR reduction from {0:0.6f} to {1:0.6f}".format(float(lr),
                                                                         float(self.new_lr)))
Beispiel #7
0
 def learn(self, last_observations, actions, rewards, learning_rate=0.001):
     import keras.backend as K
     K.set_value(self.train_net.optimizer.lr, learning_rate)
     frames = len(last_observations)
     self.counter += frames
     # -----
     values, policy = self.train_net.predict([last_observations, self.unroll])
     # -----
     self.targets.fill(0.)
     adventage = rewards - values.flatten()
     self.targets[self.unroll, actions] = 1.
     # -----
     loss = self.train_net.train_on_batch([last_observations, adventage], [rewards, self.targets])
     entropy = np.mean(-policy * np.log(policy + 0.00000001))
     self.pol_loss.append(loss[2])
     self.val_loss.append(loss[1])
     self.entropy.append(entropy)
     self.values.append(np.mean(values))
     min_val, max_val, avg_val = min(self.values), max(self.values), np.mean(self.values)
     print('\rFrames: %8d; Policy-Loss: %10.6f; Avg: %10.6f '
           '--- Value-Loss: %10.6f; Avg: %10.6f '
           '--- Entropy: %7.6f; Avg: %7.6f '
           '--- V-value; Min: %6.3f; Max: %6.3f; Avg: %6.3f' % (
               self.counter,
               loss[2], np.mean(self.pol_loss),
               loss[1], np.mean(self.val_loss),
               entropy, np.mean(self.entropy),
               min_val, max_val, avg_val), end='')
     # -----
     self.swap_counter -= frames
     if self.swap_counter < 0:
         self.swap_counter += self.swap_freq
         return True
     return False
 def on_batch_begin(self, batch, logs={}):
     probs = np.random.uniform(size=len(gates))
     for i,j in zip(gates, probs):
         if j > gates[i][0]:
             K.set_value(gates[i][1], 1)
         else:
             K.set_value(gates[i][1], 0)
Beispiel #9
0
 def on_epoch_begin(self, epoch, logs={}):
     self.task.set('status.stage', 'epoch #'+str(epoch))
     if hasattr(self.model.optimizer, 'lr'):
         if self.task.get('config.learning_rate'):
             lr = float(self.task.get('config.learning_rate'))
             K.set_value(self.model.optimizer.lr, lr)
     else:
         self.task.set('status.error', 'Optimizer must have a "lr" attribute.')
Beispiel #10
0
 def on_epoch_begin(self, epoch, logs=None):
     if not hasattr(self.model.optimizer, 'lr'):
         raise ValueError('Optimizer must have a "lr" attribute.')
     lr = self.schedule(epoch)
     if not isinstance(lr, (float, np.float32, np.float64)):
         raise ValueError('The output of the "schedule" function '
                          'should be float.')
     K.set_value(self.model.optimizer.lr, lr)
Beispiel #11
0
 def on_epoch_begin(self, epoch, logs={}):
     layer = self.model.layers[self.VAE_layer_idx]
     assert hasattr(layer, 'regularizer_scale'), \
         'Optimizer must have a "regularizer_scale" attribute.'
     weight = self.schedule(epoch)
     print("Current vae annealer weight is {}".format(weight))
     assert type(weight) == float, 'The output of the "schedule" function should be float.'
     K.set_value(layer.regularizer_scale, weight)
Beispiel #12
0
 def set_state(self, state):
     c = 0
     vs = self.vars
     for key in vs.keys():
         if key=='oopt': continue
         v = vs[key]
         for p in v.values():
             K.set_value(p,state[c])
             c += 1
Beispiel #13
0
 def on_epoch_begin(self, epoch, logs=None):
     if logs is None:
         logs = {}
     new_weight = self.schedule(epoch)
     new_value = new_weight * self.weight_orig
     print("Current {} annealer weight is {}".format(self.weight_name, new_value))
     assert type(
         new_weight) == float, 'The output of the "schedule" function should be float.'
     K.set_value(self.weight_var, new_value)
 def set_lr(self, learning_rate):
     """
     set the learning rate of the optimizer
     :param learning_rate: lerning rate pass to the optimizer
     :return:
     """
     # self.optimizer.lr.set_value(learning_rate)
     K.set_value(self.optimizer.lr, learning_rate)
     print('learning rate = {}'.format(learning_rate))
  def batch_train(self,
                  curr_state,
                  next_state,
                  immediate_reward,
                  action,
                  done,
                  target,
                  type="Double"):
    """ Computes the TD Error for a given batch of tuples.

            Here, we randomly sample episodes from the Experience buffer and use
            this to train our model. This method computes this for a batch and
            trains the model.

           Args:
              curr_state(array): Numpy array representing an array of current
              states of game
              next_state(array): Numpy array for  immediate next state of
              the game
              action(array): List of actions taken to go from current state 
              to the next
              reward(array): List of rewards for the given transition
              done(bool): if this is a terminal state or not.
              target(keras.model object): Target network for computing TD error

        """
    if type == "Double":
      forward_action = np.argmax(self.model.predict(next_state), axis=1)
      predicted_qvalue = target.predict(next_state)  # BxN matrix
      B = forward_action.size
      forward_qvalue = predicted_qvalue[np.arange(B), forward_action]  # Bx1 vec

    elif type == "Vanilla":
      forward_qvalue = np.max(target.predict(next_state), axis=1)

    discounted_reward = (self.discount * forward_qvalue * (1 - done))
    Q_value = immediate_reward + discounted_reward
    target_values = self.model.predict(curr_state)
    target_values[range(target_values.shape[0]), action] = Q_value
    """
        for i, target in enumerate(target_values):
          target_values[i, action[i]] = Q_value[i]
        """
    callbacks = []
    # Update epoch number for TensorBoard.
    K.set_value(self.reward_tensor, self.cur_reward)
    if self.model_dir is not None and self.epoch_num % TB_LOGGING_EPOCHS == 0:
      callbacks.append(self.tbCallBack)
    self.model.fit(
        curr_state,
        target_values,
        verbose=0,
        initial_epoch=self.epoch_num,
        callbacks=callbacks,
        epochs=self.epoch_num + 1)
    self.epoch_num += 1
Beispiel #16
0
 def on_batch_begin(self, batch, logs={}):
     # print self.batch_num
     for i in xrange(len(self.batch_point)):
         if self.batch_num < self.batch_point[i]:
             break
         elif self.batch_num == self.batch_point[i]:
             if i < len(self.lr):
                 K.set_value(self.model.optimizer.lr, self.lr[i])
             print 'current lr:', K.get_value(self.model.optimizer.lr)
     self.batch_num += 1
Beispiel #17
0
    def _adjust_learning_rate(self, epoch):
        old_lr = K.get_value(self.model.optimizer.lr)
        new_lr = self.initial_lr * self.multiplier(epoch)
        K.set_value(self.model.optimizer.lr, new_lr)

        if hasattr(self.model.optimizer, 'momentum') and self.momentum_correction:
            # See the paper cited above for more information about momentum correction.
            self.restore_momentum = K.get_value(self.model.optimizer.momentum)
            K.set_value(self.model.optimizer.momentum,
                        self.restore_momentum * new_lr / old_lr)
 def train_step(state_input, mcts_probs, winner, learning_rate):
     state_input_union = np.array(state_input)
     mcts_probs_union = np.array(mcts_probs)
     winner_union = np.array(winner)
     loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
     action_probs, _ = self.model.predict_on_batch(state_input_union)
     entropy = self_entropy(action_probs)
     K.set_value(self.model.optimizer.lr, learning_rate)
     self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
     return loss[0], entropy
    def update_learning_rate(self, total_steps):
        # The deepmind paper says
        # ~400k: 1e-2
        # 400k~600k: 1e-3
        # 600k~: 1e-4

        lr = self.decide_learning_rate(total_steps)
        if lr:
            K.set_value(self.opt.lr, lr)
            logger.debug(f"total step={total_steps}, set learning rate to {lr}")
 def set_lr(self, learning_rate):
     """
     set the learning rate of the optimizer
     :param learning_rate: lerning rate pass to the optimizer
     :return:
     """
     # self.optimizer.lr.set_value(learning_rate)
     K.set_value(self.optimizer.lr, learning_rate)
     print('learning rate = {}'.format(learning_rate))
     requests.post('http://localhost:8000/setmsg', None,
                           {'msg': 'set learning rate to {}'.format(learning_rate)})
Beispiel #21
0
 def reset_states(self):
     assert self.stateful, 'Layer must be stateful.'
     input_shape = self.input_spec[0].shape
     if not input_shape[0]:
         raise Exception('If a RNN is stateful, a complete ' +
                         'input_shape must be provided (including batch size).')
     if hasattr(self, 'states'):
         K.set_value(self.states[0],
                     np.zeros((input_shape[0], self.output_dim)))
     else:
         self.states = [K.zeros((input_shape[0], self.output_dim))]
 def on_batch_begin(self, batch, logs=None):
     self.step_num += 1
     t = self.step_num
     n = self.n_model
     p = self.warmup
     s = self.start_decay
     e = self.end_decay
     first = 1+t*(n-1)/(n*p)
     second = n
     third = n*(2*n)**((s-n*t)/(e-s))
     lr = self.basic * min(first, second, third)
     K.set_value(self.model.optimizer.lr, lr)
Beispiel #23
0
    def load_weights(self, filepath):
        # Loads weights from HDF5 file
        import h5py
        f = h5py.File(filepath)
        weights = [f['param_weight_{}'.format(p)] for p in range(f.attrs['nb_params'])]
        biases  = [f['param_bias_{}'.format(p)] for p in range(f.attrs['nb_params']+1)]
        for model_wieght,saved_weight in zip(self.Ws,weights):
            K.set_value(model_wieght, saved_weight)
        for model_bias,saved_bias in zip(self.bs,biases):
            K.set_value(model_bias, saved_bias)

        f.close()
Beispiel #24
0
    def build(self):
        input_shape = self.input_shape
        input_dim = input_shape[2] # = |x| # works only for stateful? (todo: try)
        self.input_dim = input_dim
        self.input = K.placeholder(input_shape)
        
        # from IPython import embed; embed()

        # output dim = |c| = |h| = |output|
        # input dim = |x|

        if self.stateful:
            self.reset_states()
        else:
            # initial states: 2 all-zero tensor of shape (output_dim)
            self.states = [None, None,None]

        # input_dim x output_dim
        # output dim = 50 = |h|?

        input_dim = self.input_dim
        output_dim = self.output_dim

        n = self.output_dim // len(self.periods)
        
        mask = np.zeros((self.output_dim, self.output_dim))
        period = np.zeros((self.output_dim, ), 'i')

        for i, T in enumerate(self.periods):
            mask[i*n:(i+1)*n, i*n:] = 1
            period[i*n:(i+1)*n] = T

        # from IPython import embed; embed()
        self.mask = K.zeros((self.output_dim, self.output_dim))
        self.period = K.zeros((self.output_dim, ), 'i')

        K.set_value(self.mask, mask)
        K.set_value(self.period, period)

        ## todo: mask & period are shared
        # n: K.zeros is shared by default (?)

        self.hh = self.init((self.output_dim, self.output_dim))
        self.xh = self.init((self.input_dim, self.output_dim))
        self.b = K.zeros((self.output_dim,), name="b")

        self.trainable_weights = [self.hh, self.xh, self.b]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Beispiel #25
0
 def on_batch_begin(self, batch, logs={}):
     if self.batch_count % 2 == 0:
         # Global regularization
         for depth in range(len(self.gates)):
             columns = len(self.gates[depth])+1
             selected_column = np.random.random_integers(low=1,high=columns)
             for i in range(1,columns):
                 if i >= selected_column:
                     for j in range(len(self.gates[depth][i])):
                         K.set_value(self.gates[depth][i][j], 1)
                 else:
                     for j in range(len(self.gates[depth][i])):
                         K.set_value(self.gates[depth][i][j], 0)
     else:
         # Local regularization
         for depth in range(len(self.gates)):
             columns = len(self.gates[depth])+1
             for i in range(1,columns):
                 for j in range(len(self.gates[depth][i])):
                     prob = np.random.uniform()
                     if prob > 0.5:
                         K.set_value(self.gates[depth][i][j], 1)
                     else:
                         K.set_value(self.gates[depth][i][j], 0)
     self.batch_count = self.batch_count+1
Beispiel #26
0
 def fit_generator(self, generator, samples_per_epoch, nb_epoch, validation_generator, nb_val_samples, opt):
     val_losses = []
     lr = K.get_value(self.optimizer.lr)
     for epoch in range(nb_epoch):
         super(sModel, self).fit_generator(generator, samples_per_epoch, 1, verbose=1)
         val_loss = exp(self.evaluate_generator(validation_generator, nb_val_samples))
         val_losses.append(val_loss)
         print 'Epoch {}/{}. Validation loss: {}'.format(epoch + 1, nb_epoch, val_loss)
         if len(val_losses) > 2 and (val_losses[-2] - val_losses[-1]) < opt.decay_when:
             lr *= opt.learning_rate_decay
             K.set_value(self.optimizer.lr, lr)
         if epoch == nb_epoch-1 or epoch % opt.save_every == 0:
             savefile = '%s/lm_%s_epoch%d_%.2f.h5' % (opt.checkpoint_dir, opt.savefile, epoch + 1, val_loss)
             self.save_weights(savefile)
Beispiel #27
0
    def on_epoch_end(self, epoch, logs={}):
        mean_loss = np.array(self.epoch_log).mean()
        if mean_loss + self.min_improvment <= self.current_best:
            self.current_best = mean_loss
            self.current_best_epoch = epoch

        if epoch - self.current_best_epoch > self.epoch_patience:
            lr = K.get_value(self.optimizer.lr)
            new_lr = lr*self.factor
            self.min_improvment *= self.factor
            K.set_value(self.optimizer.lr, new_lr)
            print()
            print("Reduce learning rate to: {:08f}".format(new_lr))
            self.current_best_epoch = epoch
Beispiel #28
0
 def fit_generator(self, generator, steps_per_epoch, epochs, validation_data, validation_steps, opt):
     val_losses = []
     lr = K.get_value(self.optimizer.lr)
     for epoch in range(epochs):
         super(sModel, self).fit_generator(generator, steps_per_epoch, epochs=epoch+1, verbose=1, initial_epoch=epoch)
         val_loss = exp(self.evaluate_generator(validation_data, validation_steps))
         val_losses.append(val_loss)
         print('Epoch {}/{}. Validation perplexity: {}'.format(epoch + 1, epochs, val_loss))
         if len(val_losses) > 2 and (val_losses[-2] - val_losses[-1]) < opt.decay_when:
             lr *= opt.learning_rate_decay
             K.set_value(self.optimizer.lr, lr)
         if epoch == epochs-1 or epoch % opt.save_every == 0:
             savefile = '%s/lm_%s_epoch%d_%.2f.h5' % (opt.checkpoint_dir, opt.savefile, epoch + 1, val_loss)
             self.save_weights(savefile)
def set_learning_rate(model, step_number):
    min_learning_rate = 0.001
    steps_per_drop = 100
    drop_by = 0.98

    lr = model.optimizer.lr.get_value()
                                                      
    if lr <= min_learning_rate:
        return lr
    if step_number % steps_per_drop == (steps_per_drop-1):
        lr *= drop_by

    K.set_value(model.optimizer.lr, lr)
    return lr
Beispiel #30
0
 def reset_states(self):
     assert self.stateful or self.state_input or len(self.state_outputs > 0), 'Layer must be stateful.'
     input_shape = self.input_shape
     if not input_shape[0]:
         raise Exception('If a RNN is stateful, a complete ' +
                         'input_shape must be provided ' +
                         '(including batch size).')
     if hasattr(self, 'states'):
         K.set_value(self.states[0],
                     np.zeros((input_shape[0], self.hidden_dim)))
         K.set_value(self.states[1],
                     np.zeros((input_shape[0], self.hidden_dim)))
     else:
         self.states = [K.zeros((input_shape[0], self.hidden_dim)),
                        K.zeros((input_shape[0], self.hidden_dim))]
Beispiel #31
0
 def reset_states(self):
     K.set_value(self.true_positives, 0)
Beispiel #32
0
 def update_lr(self):
     t = (self.trn_iteration % self.cycle_length) / self.cycle_length
     lr = (1 - t) * self.max_lr + t * self.min_lr
     K.set_value(self.model.optimizer.lr, lr)
Beispiel #33
0
    def train_target_discriminator(self,
                                   source_gen,
                                   target_gen,
                                   source_model=None,
                                   src_discriminator=None,
                                   tgt_discriminator=None,
                                   epochs=50,
                                   save_interval=1,
                                   start_epoch=0,
                                   num_batches=200):
        '''
        :param batch_data:
        :param source_model:
        :param src_discriminator:
        :param tgt_discriminator:
        :param epochs:
        :param batch_size:
        :param save_interval:
        :param start_epoch:
        :param num_batches:  一个epoch所循环的次数
        :return:
        '''
        # TODO:从这里是不是可以看出,不用一对一的指定标签?
        self.define_source_encoder(source_model)

        # TODO:起到了freeze的功能?
        for layer in self.source_encoder.layers:
            layer.trainable = False

        # get_discriminator(self, model, weights=None):
        source_discriminator = self.get_discriminator(self.source_encoder,
                                                      src_discriminator)
        target_discriminator = self.get_discriminator(self.target_encoder,
                                                      tgt_discriminator)
        # TODO:这里是不是和和 self.get_discriminator函数重复了加载功能?
        '''
        if src_discriminator is not None:
            source_discriminator.load_weights(src_discriminator)
        if tgt_discriminator is not None:
            target_discriminator.load_weights(tgt_discriminator)
        '''

        # TODO:为什么使用了binary_crossentropy?没有label输入啊? -> 后面有输入
        source_discriminator.compile(loss="binary_crossentropy",
                                     optimizer=self.tgt_optimizer,
                                     metrics=['accuracy'])
        target_discriminator.compile(loss="binary_crossentropy",
                                     optimizer=self.tgt_optimizer,
                                     metrics=['accuracy'])

        # TODO(11/12):更改路径
        callback1 = keras.callbacks.TensorBoard(
            os.path.join(save_path, 'tensorboard', 'binary'))
        callback1.set_model(source_discriminator)
        callback2 = keras.callbacks.TensorBoard(
            os.path.join(save_path, 'tensorboard', 'binary'))
        callback2.set_model(target_discriminator)
        src_names = ['src_discriminator_loss', 'src_discriminator_acc']
        tgt_names = ['tgt_discriminator_loss', 'tgt_discriminator_acc']

        for iteration in range(start_epoch, epochs):

            avg_loss, avg_acc, index = [0, 0], [0, 0], 0
            # TODO:用这种想法实现了discriminator的loss,因为前几层都是共享的
            # source_gen -> use function(next()) get the tuple (img, label)
            for source, target in zip(
                    next(source_gen)[0],
                    next(target_gen)[0]):
                l1, acc1 = source_discriminator.train_on_batch(
                    source,
                    np_utils.to_categorical(np.zeros(source.shape[0]), 2))
                l2, acc2 = target_discriminator.train_on_batch(
                    target, np_utils.to_categorical(np.ones(target.shape[0]),
                                                    2))
                index += 1
                loss, acc = (l1 + l2) / 2, (acc1 + acc2) / 2
                print(iteration + 1, ': ', index, '/', num_batches,
                      '; Loss: %.4f' % loss, ' (', '%.4f' % l1, '%.4f' % l2,
                      '); Accuracy: ', acc, ' (', '%.4f' % acc1, '%.4f' % acc2,
                      ')')
                avg_loss[0] += l1
                avg_acc[0] += acc1
                avg_loss[1] += l2
                avg_acc[1] += acc2
                if index % num_batches == 0:
                    break

            if iteration % self.discriminator_decay_rate == 0:
                lr = K.get_value(source_discriminator.optimizer.lr)
                K.set_value(source_discriminator.optimizer.lr,
                            lr * self.discriminator_decay_factor)
                lr = K.get_value(target_discriminator.optimizer.lr)
                K.set_value(target_discriminator.optimizer.lr,
                            lr * self.discriminator_decay_factor)
                print('Learning Rate Decayed to: ',
                      K.get_value(target_discriminator.optimizer.lr))
            # TODO(11/12):从这里修改地址,修改权重名称
            if iteration % save_interval == 0:
                source_discriminator.save_weights(
                    os.path.join(save_path,
                                 'discriminator_source_%02d.hdf5' % iteration))
                target_discriminator.save_weights(
                    os.path.join(save_path,
                                 'discriminator_target_%02d.hdf5' % iteration))

            self.tensorboard_log(
                callback1, src_names,
                [avg_loss[0] / source.shape[0], avg_acc[0] / source.shape[0]],
                iteration)
            self.tensorboard_log(
                callback2, tgt_names,
                [avg_loss[1] / target.shape[0], avg_acc[1] / target.shape[0]],
                iteration)
Beispiel #34
0
 def update_lr(self, model, decay):
     new_lr = K.get_value(model.optimizer.lr) - decay
     if new_lr < 0:
         new_lr = 0
     # print(K.get_value(model.optimizer.lr))
     K.set_value(model.optimizer.lr, new_lr)
Beispiel #35
0
 def step_decay(self, epoch):
     if epoch % 2 == 0 and epoch != 0:
         lr = K.get_value(self.model.optimizer.lr)
         K.set_value(self.model.optimizer.lr, lr * .5)
         print("lr changed to {}".format(lr * .5))
     return K.get_value(self.model.optimizer.lr)
Beispiel #36
0
Kfold_preds_final = []
k = 0
RMSE = []

for train_idx, test_idx in skf.split(train1, y_train):
    
    print("Number of Folds.."+str(k+1))
    
    # Initialize a new Model for Current FOLD 
    epochs = 1
    batch_size = 512 * 3
    steps = (int(train1.shape[0]/batch_size))*epochs
    lr_init, lr_fin = 0.009, 0.0045
    lr_decay = exp_decay(lr_init, lr_fin, steps)
    modelRNN = RNN_model()
    K.set_value(modelRNN.optimizer.lr, lr_init)
    K.set_value(modelRNN.optimizer.decay, lr_decay)

    #K Fold Split 
    
    X_train1, X_test1 = train1[train_idx], train1[test_idx]
    print(X_train1.shape, X_test1.shape)
    y_train1, y_test1 = y_train[train_idx], y_train[test_idx]
    print(y_train1.shape, y_test1.shape)
    gc.collect()
    
    print(type(X_train1))
    print(X_train1.shape)
    print(type(X_train1[:,12]))
    
    X_train_final = get_data_frame(X_train1)
 def on_epoch_end(self, epoch, logs=None):
     if (epoch+1)%2 == 0:
         lr = K.get_value(self.model.optimizer.lr)
         K.set_value(self.model.optimizer.lr, lr*0.94)
Beispiel #38
0
        model.trainable = True
        advmodel.trainable = False

        model.compile(loss=[make_loss_model(c=1.0)],
                      optimizer=opt_model,
                      metrics=['accuracy'])
        DRf.compile(loss=[make_loss_model(c=1.0),
                          make_loss_advmodel(c=-lam)],
                    optimizer=opt_DRf)
        DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR)

        indices = np.random.permutation(len(train_x))[:batch_size]

        ## Set learning learning for DRf according to num_epoch
        current_learning_rate = calculate_learning_rate(i)
        K.set_value(DRf.optimizer.lr, current_learning_rate)
        ###
        DRf.train_on_batch(
            train_x.iloc[indices],
            [train_y.iloc[indices], df_Convert_v2.iloc[indices]])
        print("learning_rate of DRf: ", K.eval(DRf.optimizer.lr))
        print("learning_rate of DfR: ", K.eval(DfR.optimizer.lr))

        #Fit "advmodel"
        if lam >= 0.0:
            model.trainable = False
            advmodel.trainable = True

            model.compile(loss=[make_loss_model(c=1.0)],
                          optimizer=opt_model,
                          metrics=['accuracy'])
Beispiel #39
0
def load_pretrain_weights(vade, X, Y, dataset, autoencoder=None, ae_weights=None):
    if autoencoder is None:
        ae = model_from_json(open(ae_weights).read())
        ae.load_weights('pretrain_weights/ae_'+dataset+'_weights.h5')
        vade.get_layer('encoder_0').set_weights(ae.layers[0].get_weights())
        vade.get_layer('encoder_1').set_weights(ae.layers[1].get_weights())
        vade.get_layer('encoder_2').set_weights(ae.layers[2].get_weights())
        vade.get_layer('z_mean').set_weights(ae.layers[3].get_weights())
        vade.get_layer('decoder_0').set_weights(ae.layers[-4].get_weights())
        vade.get_layer('decoder_1').set_weights(ae.layers[-3].get_weights())
        vade.get_layer('decoder_2').set_weights(ae.layers[-2].get_weights())
        vade.get_layer('output').set_weights(ae.layers[-1].get_weights())
        sample = sample_output.predict(X,batch_size=batch_size)
    else:
        autoencoder.load_weights(ae_weights)
        vade.get_layer('encoder_0').set_weights(autoencoder.layers[1].get_weights())
        vade.get_layer('encoder_1').set_weights(autoencoder.layers[2].get_weights())
        vade.get_layer('encoder_2').set_weights(autoencoder.layers[3].get_weights())
        vade.get_layer('z_mean').set_weights(autoencoder.layers[4].get_weights())
        vade.get_layer('decoder_0').set_weights(autoencoder.layers[-4].get_weights())
        vade.get_layer('decoder_1').set_weights(autoencoder.layers[-3].get_weights())
        vade.get_layer('decoder_2').set_weights(autoencoder.layers[-2].get_weights())
        vade.get_layer('output').set_weights(autoencoder.layers[-1].get_weights())
        sample = sample_output.predict(X, batch_size=batch_size)

    if dataset == 'mnist':

        gmm = GaussianMixture(n_components=n_centroid, covariance_type='diag')
        gmm.fit(sample)
        acc_0 = cluster_acc(Y, gmm.predict(sample))
        means_0 = [gmm.means_]
        for i in range(3):
            gmm.fit(sample)
            acc_0_new = cluster_acc(Y, gmm.predict(sample))
            if acc_0_new > acc_0:
                acc_0 = acc_0_new
                means_0 = gmm.means_
                covs_0 = gmm.covariances_

        K.set_value(u_p, means_0.T)
        K.set_value(lambda_p, covs_0.T)

    if dataset == 'reuters10k':
        k = KMeans(n_clusters=n_centroid)
        k.fit(sample)
        K.set_value(u_p, floatX(k.cluster_centers_.T))

    if dataset == 'har':
        g = mixture.GMM(n_components=n_centroid,covariance_type='diag',random_state=3)
        g.fit(sample)
        K.set_value(u_p, floatX(g.means_.T))
        K.set_value(lambda_p, floatX(g.covars_.T))

    if (dataset == 'custom') | (dataset is None):
        gmm = GaussianMixture(n_components=n_centroid, covariance_type='diag')
        gmm.fit(sample)
        acc_0 = cluster_acc(Y, gmm.predict(sample))
        means_0 = gmm.means_
        covs_0 = gmm.covariances_
        print(acc_0)
        print('means:', means_0.shape)
        for i in range(3):
            gmm.fit(sample)
            acc_0_new = cluster_acc(Y, gmm.predict(sample))
            if acc_0_new > acc_0:
                acc_0 = acc_0_new
                means_0 = gmm.means_
                covs_0 = gmm.covariances_

        K.set_value(u_p, means_0.T)
        K.set_value(lambda_p, covs_0.T)

    # Set trainable weights in 'latent' layer to initalized values
    K.set_value(vade.get_layer('latent').u_p, K.eval(u_p))
    K.set_value(vade.get_layer('latent').theta_p, K.eval(theta_p))
    K.set_value(vade.get_layer('latent').lambda_p, K.eval(lambda_p))

    print ('pretrain weights loaded!')
    return vade
    def reset_spikevars(self, sample_idx):
        """
        Reset variables present in spiking layers. Can be turned off for
        instance when a video sequence is tested.
        """

        mod = self.config.getint('simulation', 'reset_between_nth_sample')
        mod = mod if mod else sample_idx + 1
        do_reset = sample_idx % mod == 0
        if do_reset:
            k.set_value(self.mem, self.init_membrane_potential())
        k.set_value(self.time, np.float32(self.dt))
        zeros_output_shape = np.zeros(self.output_shape, k.floatx())
        if self.tau_refrac > 0:
            k.set_value(self.refrac_until, zeros_output_shape)
        if self.spiketrain is not None:
            k.set_value(self.spiketrain, zeros_output_shape)
        k.set_value(self.last_spiketimes, zeros_output_shape - 1)
        k.set_value(self.v_thresh, zeros_output_shape + self._v_thresh)
        k.set_value(self.prospective_spikes, zeros_output_shape)
        k.set_value(self.missing_impulse, zeros_output_shape)
Beispiel #41
0
 def adjust_learning_rate(self, optimizer, epoch):
     K.set_value(optimizer.lr, self.alpha_plan[epoch])
     K.set_value(optimizer.beta_1, self.beta1_plan[epoch])
Beispiel #42
0
model.add(Masking(input_shape=(seq_len, 2048), mask_value=0))
# model.add(SimpleRNN(128, return_sequences=True, activation='sigmoid', use_bias=True))
model.add(LSTM(256, return_sequences=True,
               activation='sigmoid', recurrent_activation='tanh',
               use_bias=True, unit_forget_bias=True))
# model.add(LSTM(32, return_sequences=True, activation='sigmoid', use_bias=True))
# model.add(Dense(8, activation=kb.sigmoid))
model.add(Dense(2, activation=kb.softmax))

optimizer = adam(lr=0.0001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

data_dir = './ActivityDataset'
traj_dir = data_dir + '/' + 'TrajectoriesLong'
train_dir = traj_dir + '/' + 'train'

ex_dirs = get_immediate_subdirectories(train_dir)

acc_arr = []
for i in range(num_iter):
    X, Y = get_batch_resnet(train_dir, batch_size, seq_len)
    model.train_on_batch(X, Y)
    score, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=1)
    acc_arr.append(acc)
    print('Step:', i, 'Score: ', score, 'Accuracy:', acc)
    if (i % decay_step == 0) and i is not 0:
        kb.set_value(optimizer.lr, 0.5 * kb.get_value(optimizer.lr))
    if (i % disp_step == 0) and i is not 0:
        plt.plot(acc_arr)
        plt.pause(0.0005)
Beispiel #43
0
def mse_trainer_mlp():

    bin_val = 2
    un_val = 1
    # training add mlp and storing it's history values for plotting.
    m_add = mlp_model(bin_val)
    m_add.compile("nadam", "mse", metrics=["mae"])
    K.set_value(m_add.optimizer.lr, 1e-2)
    hist_m_add_u = m_add.fit(trx_add,
                             try_add,
                             validation_data=(tex_add, tey_add),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_add.optimizer.lr, 1e-3)
    hist_m_add_d = m_add.fit(trx_add,
                             try_add,
                             validation_data=(tex_add, tey_add),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_add.optimizer.lr, 1e-4)
    hist_m_add_t = m_add.fit(trx_add,
                             try_add,
                             validation_data=(tex_add, tey_add),
                             batch_size=1024,
                             epochs=100)

    m_add.save('mlp_mse_add.h5')

    # training sub mlp and storing it's history values for plotting.
    m_sub = mlp_model(bin_val)
    m_sub.compile("nadam", "mse", metrics=["mae"])
    K.set_value(m_sub.optimizer.lr, 1e-2)
    hist_m_sub_u = m_sub.fit(trx_sub,
                             try_sub,
                             validation_data=(tex_sub, tey_sub),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_sub.optimizer.lr, 1e-3)
    hist_m_sub_d = m_sub.fit(trx_sub,
                             try_sub,
                             validation_data=(tex_sub, tey_sub),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_sub.optimizer.lr, 1e-4)
    hist_m_sub_t = m_sub.fit(trx_sub,
                             try_sub,
                             validation_data=(tex_sub, tey_sub),
                             batch_size=1024,
                             epochs=100)

    m_sub.save('mlp_mse_sub.h5')

    # training mul mlp and storing it's history values for plotting.
    m_mul = mlp_model(bin_val)
    m_mul.compile("nadam", "mse", metrics=["mae"])
    K.set_value(m_mul.optimizer.lr, 1e-2)
    hist_m_mul_u = m_mul.fit(trx_mul,
                             try_mul,
                             validation_data=(tex_mul, tey_mul),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_mul.optimizer.lr, 1e-3)
    hist_m_mul_d = m_mul.fit(trx_mul,
                             try_mul,
                             validation_data=(tex_mul, tey_mul),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_mul.optimizer.lr, 1e-4)
    hist_m_mul_t = m_mul.fit(trx_mul,
                             try_mul,
                             validation_data=(tex_mul, tey_mul),
                             batch_size=1024,
                             epochs=100)

    m_mul.save('mlp_mse_mul.h5')

    # training div mlp and storing it's history values for plotting.
    m_div = mlp_model(bin_val)
    m_div.compile("nadam", "mse", metrics=["mae"])
    K.set_value(m_div.optimizer.lr, 1e-2)
    hist_m_div_u = m_div.fit(trx_div,
                             try_div,
                             validation_data=(tex_div, tey_div),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_div.optimizer.lr, 1e-3)
    hist_m_div_d = m_div.fit(trx_div,
                             try_div,
                             validation_data=(tex_div, tey_div),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_div.optimizer.lr, 1e-4)
    hist_m_div_t = m_div.fit(trx_div,
                             try_div,
                             validation_data=(tex_div, tey_div),
                             batch_size=1024,
                             epochs=100)

    m_div.save('mlp_mse_div.h5')

    # training sqr mlp and storing it's history values for plotting.
    m_sqr = mlp_model(un_val)
    m_sqr.compile("nadam", "mse", metrics=["mae"])
    K.set_value(m_sqr.optimizer.lr, 1e-2)
    hist_m_sqr_u = m_sqr.fit(trx_sqr,
                             try_sqr,
                             validation_data=(tex_sqr, tey_sqr),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_sqr.optimizer.lr, 1e-3)
    hist_m_sqr_d = m_sqr.fit(trx_sqr,
                             try_sqr,
                             validation_data=(tex_sqr, tey_sqr),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_sqr.optimizer.lr, 1e-4)
    hist_m_sqr_t = m_sqr.fit(trx_sqr,
                             try_sqr,
                             validation_data=(tex_sqr, tey_sqr),
                             batch_size=1024,
                             epochs=100)

    m_sqr.save('mlp_mse_sqr.h5')

    # training qrt mlp and storing it's history values for plotting.
    m_qrt = mlp_model(un_val)
    m_qrt.compile("nadam", "mse", metrics=["mae"])
    K.set_value(m_qrt.optimizer.lr, 1e-2)
    hist_m_qrt_u = m_qrt.fit(trx_qrt,
                             try_qrt,
                             validation_data=(tex_qrt, tey_qrt),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_qrt.optimizer.lr, 1e-3)
    hist_m_qrt_d = m_qrt.fit(trx_qrt,
                             try_qrt,
                             validation_data=(tex_qrt, tey_qrt),
                             batch_size=1024,
                             epochs=100)
    K.set_value(m_qrt.optimizer.lr, 1e-4)
    hist_m_qrt_t = m_qrt.fit(trx_qrt,
                             try_qrt,
                             validation_data=(tex_qrt, tey_qrt),
                             batch_size=1024,
                             epochs=100)

    m_qrt.save('mlp_mse_qrt.h5')

    return hist_m_add_u, hist_m_add_d, hist_m_add_t, hist_m_sub_u, hist_m_sub_d, hist_m_sub_t, \
           hist_m_mul_u, hist_m_mul_d, hist_m_mul_t, hist_m_div_u, hist_m_div_d, hist_m_div_t, \
           hist_m_sqr_u, hist_m_sqr_d, hist_m_sqr_t, hist_m_qrt_u, hist_m_qrt_d, hist_m_qrt_t
Beispiel #44
0
    def train(self,
              batch_size: int = 32,
              epochs: int = 100,
              lr_multipliers: Tuple[float,
                                    ...] = (0.5, 0.75, 0.8, 1, 1.2, 1.5, 2),
              nb_models: int = 3,
              threads: int = 4,
              monitor_metric: str = 'val_word_acc_processed',
              log_dir: str = 'logs',
              **kwargs):
        self.params.update(locals()), self.params.pop('self')
        ''' Save all the objects/parameters for reproducibility '''
        log_dir = Path(log_dir).joinpath(
            datetime.now().replace(microsecond=0).isoformat())
        model_path = Path(log_dir).joinpath('checkpoints').joinpath(
            "best-model.joblib")
        model_path.parent.mkdir(parents=True, exist_ok=True)
        with open(Path(log_dir).joinpath('params.json'), 'w',
                  encoding='utf-8') as f:
            json.dump(
                {
                    'params': self.params,
                    'commandline': sys.argv,
                    'commit': get_current_commit()
                },
                f,
                indent=4)

        train_generator = DataGenerator(dataset=self.train_dataset,
                                        processor=self.processor,
                                        batch_size=batch_size)
        valid_generator = DataGenerator(dataset=self.valid_dataset,
                                        processor=self.processor,
                                        batch_size=batch_size,
                                        with_samples=True)

        best_current_models: List[ModelInstance] = []
        best_prev_models: List[ModelInstance] = []

        for epoch in range(epochs):
            best_prev_models = deepcopy(best_current_models)
            best_current_models = []

            def log_model(score):
                nonlocal best_current_models
                learning_rate = float(K.get_value(self.model.optimizer.lr))
                path = f'{log_dir}/model-epoch:{epoch}-acc:{score:.3f}-lr:{learning_rate:.3f}.joblib'
                best_current_models.append(
                    ModelInstance(performance=score,
                                  path=path,
                                  lr=learning_rate))
                print('Obtained:', str(best_current_models[-1]), flush=True)
                Word2Morph(model=self.model,
                           processor=self.processor).save(path)

                best_current_models = list(set(best_current_models))
                best_current_models = sorted(best_current_models, reverse=True)
                best_current_models, worst = best_current_models[:
                                                                 nb_models], best_current_models[
                                                                     nb_models:]
                for model in worst:
                    print('Removing:', model.path, flush=True)
                    os.remove(model.path)

                print('Resulting list:')
                for i, model in enumerate(best_current_models):
                    print(i, ':', str(model))
                print(flush=True)

            # There are no models for the initial epoch => use the initial random model as the base model
            if len(best_current_models) == 0:
                log_model(score=0)

            for base_model in best_prev_models:
                for lr_multiplier in lr_multipliers:
                    print('Trying to modify:', str(base_model), flush=True)

                    # Clean-up the keras session before working with a new model
                    del self.processor
                    del self.model
                    K.clear_session()
                    gc.collect()

                    w2m = Word2Morph.load_model(base_model.path)
                    self.model, self.processor = w2m.model, w2m.processor
                    lr = float(K.get_value(self.model.optimizer.lr))
                    K.set_value(self.model.optimizer.lr, lr * lr_multiplier)

                    history = self.model.fit_generator(
                        generator=train_generator,
                        epochs=epoch + 1,
                        initial_epoch=epoch,
                        callbacks=[
                            Evaluate(data_generator=valid_generator,
                                     to_sample=self.processor.to_sample)
                        ],
                        class_weight=self.class_weights,
                        use_multiprocessing=True,
                        workers=threads,
                    )
                    log_model(score=history.history[monitor_metric][-1])
Beispiel #45
0
def lr_decay():

    #### Learning rate decay
    K.set_value(adam_nn.lr, max(K.eval(adam_nn.lr)*decay_nn, 0.0002))

    print('lr_nn:%f' % K.eval(adam_nn.lr))
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['categorical_accuracy'])
    #model.compile(loss=norm_rmse, optimizer=rms)
    #model.compile(loss=log_error, optimizer=rms)
    #model.compile(loss=[contrastive_loss], optimizer=rms)
    #model.compile(loss=[contrastive_loss,'mse'],  loss_weights=[10.0, 1.0], optimizer=rms)
    #model.fit([np.expand_dims(tr_pairs[:, 0],axis=3), np.expand_dims(tr_pairs[:, 1],axis=3)], tr_y,
    #          validation_split = 0.1,
    #          batch_size=128,
    #          nb_epoch=nb_epoch)

    lr = 1e-3
    for i in range(2):  # num times to drop learning rate
        print('Learning rate: {0}'.format(lr))
        K.set_value(model.optimizer.lr, lr)
        for j in range(20):  # num times to generate data ~8M images
            print("i,j={0},{1}".format(i, j))
            [img_pairs, img_rot, img_label] = gen_data(imgs, quats, num_pairs)
            rot_label = compute_clusters(img_rot)  # rotation cluster labels
            print(rot_label.shape)
            input_top = np.expand_dims(img_pairs[:, 0], axis=3)
            input_bot = np.expand_dims(img_pairs[:, 1], axis=3)

            model.fit([input_top, input_bot], [rot_label],
                      validation_split=0.1,
                      batch_size=batch_size,
                      nb_epoch=nb_epoch,
                      callbacks=[model_checkpoint])
            #model.fit([input_top, input_bot], [img_label, img_rot], shuffle=True, validation_split = 0.1, batch_size=128, nb_epoch=nb_epoch)
            # compute final accuracy on training and test sets
def readSLICandMDLInit(resultFile,all_Q_mat,superpixel_label_mat):
    print('----write result, read mats')
    f_out = open(resultFile,'w')
    train_data = sio.loadmat(all_Q_mat)['all_Q']
    train_labels = sio.loadmat(superpixel_label_mat)['all_superpixel_labels']
    print('----get_train_data')    
    data = get_train_data(train_data, train_labels)
    print(len(data))
    print('----initialize_params')
    train_params = initialize_params(train_data, data)
    print('----initialize_net')
    model = initialize_net(train_params)
    model.summary()
    print('----model compile')
    model.compile(loss='categorical_crossentropy',
        optimizer=optimizers.Adam(lr=train_params['base_lr']),
        metrics=['accuracy'])
    print('----ImageDataGenerator')
    train_datagen = ImageDataGenerator(
                featurewise_center=True,
                featurewise_std_normalization=True)

    for epoch in range(0, train_params['num_epochs']):
        num_iterations = int(train_params['total_samples']/train_params['batch_size']) + 1
        for iteration in range(0, num_iterations):
            print ('Epoch : ' + str(epoch) + ' | Iteration : ' + str(iteration))
            given_data = load_data(data, train_params)
            X = given_data[0]
            Y = given_data[1]
            model.fit(X,Y,
                epochs=1,
                verbose=1)
        if epoch%train_params['decay_steps'] == 0 and epoch != 0:
            print (' Changing learning rate ... ')
            lr = K.get_value(model.optimizer.lr)
            K.set_value(model.optimizer.lr, lr*train_params['decay_factor'])
            print("lr changed to {}".format(lr*train_params['decay_factor']))
        if epoch%train_params['checkpoint'] == 0 and epoch != 0:
            print (' Saving model ... ')
            model_name = 'model_' + str(epoch) + '.h5'
            model.save(model_name)
        if epoch%1 == 0:
            acu_pos = 0
            acu_neg = 0
            acu = 0
            for i in range(0, int(train_params['pos_samples']/train_params['batch_size'])):
                X = np.zeros((train_params['batch_size'], train_params['max_size'], 3))
                Y = np.zeros((train_params['batch_size'], 2))
                for j in range(0, train_params['batch_size']):
                    sam = data[1][i*train_params['batch_size'] + j]
                    sam_len = sam.shape[0]
                    X[j, :sam_len, :] = np.true_divide(sam, sam.max())
                    Y[j][1] = float(1)
                pred = model.evaluate(X,Y, 
                        batch_size=train_params['batch_size'])
                print(pred)
                acu_pos = acu_pos + pred[1]
                acu = acu + pred[1]
            for i in range(0, int(train_params['neg_samples']/train_params['batch_size'])):
                X = np.zeros((train_params['batch_size'], train_params['max_size'], 3))
                Y = np.zeros((train_params['batch_size'], 2))
                for j in range(0, train_params['batch_size']):
                    sam = data[0][i*train_params['batch_size'] + j]
                    sam_len = sam.shape[0]
                    X[j, :sam_len, :] = np.true_divide(sam, sam.max())
                    Y[j][0] = float(1)
                pred = model.evaluate(X,Y, 
                        batch_size=train_params['batch_size'])
                print(pred)
                acu_neg = acu_neg + pred[1]
                acu = acu + pred[1]
            acu_pos = float(acu_pos)/float(int(train_params['pos_samples']/train_params['batch_size'])) 
            acu_neg = float(acu_neg)/float(int(train_params['neg_samples']/train_params['batch_size']))
            acu = float(acu)/float(int(train_params['pos_samples']/train_params['batch_size']) + int(train_params['neg_samples']/train_params['batch_size']))
            f_out.write('acu_pos: ' + str(acu_pos)+', acu_neg: '+str(acu_neg)+', acu:'+str(acu)+'\n')
Beispiel #48
0
        BESTSCORE = score
        ES_FLAG = 0

        pred_y = model.predict([
            test_q1_w, test_q1_c, test_q1_tm, test_q2_w, test_q2_c, test_q2_tm
        ],
                               batch_size=BATCH_SIZE)
        pred_y = np.reshape(pred_y, (len(pred_y), ))
        savepath = 'submission/submission-cnn-' + str(BESTSCORE)[:7] + '.csv'
        make_submission(pred_y, savepath)
    else:
        ES_FLAG += 1
        if ES_FLAG > EARLYSTOP:
            if LEARNING_RATE > 1e-6:
                LEARNING_RATE *= 0.5
                K.set_value(model.optimizer.lr, LEARNING_RATE)
                print('reduce learning rate = %f' % LEARNING_RATE)
                ES_FLAG = 0
            else:
                break

# from keras import callbacks
# cb_reducelr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6,
#                                           verbose=0, mode='auto', epsilon=0.001, cooldown=0)
# cb_ckpt = callbacks.ModelCheckpoint('/files/faust/COMPETITION/ppdai/rescnn.{val_loss:.2f}.hdf5',
#                                     monitor='val_loss', verbose=1, mode='auto', period=1)
# cb_earlystop = callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min')
#
# model.fit([train_all_q1_w, train_all_q1_c, train_all_q2_w, train_all_q2_c], train_all_y,
#           validation_split=0.2,
#           batch_size=BATCH_SIZE, epochs=EPOCH, verbose=1,
    def generate_backdoor(
            self, x_val: np.ndarray, y_val: np.ndarray,
            y_target: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Generates a possible backdoor for the model. Returns the pattern and the mask
        :return: A tuple of the pattern and mask for the model.
        """
        import keras.backend as K
        from keras_preprocessing.image import ImageDataGenerator

        self.reset()
        datagen = ImageDataGenerator()
        gen = datagen.flow(x_val, y_val, batch_size=self.batch_size)
        mask_best = None
        pattern_best = None
        reg_best = float("inf")
        cost_set_counter = 0
        cost_up_counter = 0
        cost_down_counter = 0
        cost_up_flag = False
        cost_down_flag = False
        early_stop_counter = 0
        early_stop_reg_best = reg_best
        mini_batch_size = len(x_val) // self.batch_size
        for _ in tqdm(range(self.steps),
                      desc="Generating backdoor for class {}".format(
                          np.argmax(y_target))):
            loss_reg_list = []
            loss_acc_list = []

            for _ in range(mini_batch_size):
                x_batch, _ = gen.next()
                y_batch = [y_target] * x_batch.shape[0]
                batch_loss_ce, batch_loss_reg, batch_loss, batch_loss_acc = self.train(
                    [x_batch, y_batch])

                loss_reg_list.extend(list(batch_loss_reg.flatten()))
                loss_acc_list.extend(list(batch_loss_acc.flatten()))

            avg_loss_reg = np.mean(loss_reg_list)
            avg_loss_acc = np.mean(loss_acc_list)

            # save best mask/pattern so far
            if avg_loss_acc >= self.attack_success_threshold and avg_loss_reg < reg_best:
                mask_best = K.eval(self.mask_tensor)
                pattern_best = K.eval(self.pattern_tensor)
                reg_best = avg_loss_reg

            # check early stop
            if self.early_stop:
                if reg_best < float("inf"):
                    if reg_best >= self.early_stop_threshold * early_stop_reg_best:
                        early_stop_counter += 1
                    else:
                        early_stop_counter = 0
                early_stop_reg_best = min(reg_best, early_stop_reg_best)

                if cost_down_flag and cost_up_flag and early_stop_counter >= self.early_stop_patience:
                    logger.info("Early stop")
                    break

            # cost modification
            if avg_loss_acc >= self.attack_success_threshold:
                cost_set_counter += 1
                if cost_set_counter >= self.patience:
                    self.cost = self.init_cost
                    K.set_value(self.cost_tensor, self.cost)
                    cost_up_counter = 0
                    cost_down_counter = 0
                    cost_up_flag = False
                    cost_down_flag = False
            else:
                cost_set_counter = 0

            if avg_loss_acc >= self.attack_success_threshold:
                cost_up_counter += 1
                cost_down_counter = 0
            else:
                cost_up_counter = 0
                cost_down_counter += 1

            if cost_up_counter >= self.patience:
                cost_up_counter = 0
                self.cost *= self.cost_multiplier_up
                K.set_value(self.cost_tensor, self.cost)
                cost_up_flag = True
            elif cost_down_counter >= self.patience:
                cost_down_counter = 0
                self.cost /= self.cost_multiplier_down
                K.set_value(self.cost_tensor, self.cost)
                cost_down_flag = True

        if mask_best is None:
            mask_best = K.eval(self.mask_tensor)
            pattern_best = K.eval(self.pattern_tensor)

        return mask_best, pattern_best
 def on_batch_begin(self, batch, logs={}):
     pts = self.currentEP + batch/self.nbatch - self.startEP
     decay = 1+np.cos(pts/self.Tmult*np.pi)
     lr = self.min_lr+0.5*(self.initial_lr-self.min_lr)*decay
     K.set_value(self.model.optimizer.lr,lr)
                           decay=0.1)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.,
                                   patience=50,
                                   verbose=0,
                                   mode='auto')

    # reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=0.00001, verbose=1)
    # callback = [early_stopping, reduce_lr]

    callback = [early_stopping]

    model.compile(loss=rel_mse, optimizer='adam', metrics=[rmse])
    K.set_value(model.optimizer.lr, 0.001)
    history = model.fit(Input_tr,
                        Output_tr,
                        validation_split=0.03125,
                        epochs=N_max_epoch,
                        batch_size=N_batch_size,
                        callbacks=callback)

    K.set_value(model.optimizer.lr, 0.0001)
    #
    history = model.fit(Input_tr,
                        Output_tr,
                        validation_split=0.03125,
                        epochs=N_max_epoch,
                        batch_size=N_batch_size,
                        callbacks=callback)
def rnn(embedding_matrix, config):
    if config['rnn'] == 'gru' and config['gpu']:
        encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
    else:
        encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True))

    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)
    q1_encoded = Dropout(0.2)(q1_encoded)
    q2_encoded = Dropout(0.2)(q2_encoded)
    # 双向
    #     q1_encoded = encode2(q1_encoded)
    #     q2_encoded = encode2(q2_encoded)
    # resnet
    rnn_layer2_input1 = concatenate([q1_embed, q1_encoded])
    rnn_layer2_input2 = concatenate([q2_embed, q2_encoded])
    q1_encoded2 = encode2(rnn_layer2_input1)
    q2_encoded2 = encode2(rnn_layer2_input2)

    # add res shortcut
    res_block1 = add([q1_encoded, q1_encoded2])
    res_block2 = add([q2_encoded, q2_encoded2])
    rnn_layer3_input1 = concatenate([q1_embed, res_block1])
    rnn_layer3_input2 = concatenate([q2_embed, res_block2])
    #     rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2])
    #     rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2])
    q1_encoded3 = encode3(rnn_layer3_input1)
    q2_encoded3 = encode3(rnn_layer3_input2)
    #     merged1 = GlobalMaxPool1D()(q1_encoded3)
    #     merged2 = GlobalMaxPool1D()(q2_encoded3)
    #     q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1)
    #     q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1)

    #     merged1 = concatenate([q1_encoded2, q1_embed], axis=-1)
    #     merged2 = concatenate([q2_encoded2, q2_embed], axis=-1)
    #     # TODO add attention rep , maxpooling rep
    q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3])
    q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3])
    merged1 = GlobalMaxPool1D()(q1_encoded3)
    merged2 = GlobalMaxPool1D()(q2_encoded3)
    # avg1 = GlobalAvgPool1D()(q1_encoded3)
    # avg2 = GlobalAvgPool1D()(q2_encoded3)
    # merged1 = concatenate([max1,avg1])
    # merged2 = concatenate([max2,avg2])
    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2])
    mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2])
    #     jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)-
    #                                               K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2])
    #     merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep])
    feature_input = Input(shape=(config['feature_length'], ))
    feature_dense = BatchNormalization()(feature_input)
    feature_dense = Dense(config['dense_dim'],
                          activation='relu')(feature_dense)

    merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense])
    # Classifier
    dense = Dropout(config['dense_dropout'])(merged)
    dense = BatchNormalization()(dense)
    dense = Dense(config['dense_dim'], activation='relu')(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = BatchNormalization()(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2, feature_input], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
Beispiel #53
0
def scheduler(epoch):
    if epoch % 100 == 0 and epoch != 0:
        lr = K.get_value(model.optimizer.lr)
        K.set_value(model.optimizer.lr, lr * 0.5)
        print("lr changed to {}".format(lr * 0.5))
    return K.get_value(model.optimizer.lr)
Beispiel #54
0
 def on_train_end(self, logs={}):
     for weight in self.sym_trainable_weights:
         K.set_value(weight, self.mv_trainable_weights_vals[weight.name])
Beispiel #55
0
def AtrousDenseUDeconvNet():
    caxis = 1 if K.image_data_format() == 'channels_first' else -1
    input = Input(shape=(img_w, img_h, 3),
                  batch_shape=(None, img_w, img_h, 3))  # 256
    # inputs=keras.layers.convolutional.ZeroPadding2D(padding=(0, 0), dim_ordering='default')(input)
    x = BatchNormalization(mode=0,
                           axis=caxis,
                           gamma_regularizer=l2(1E-4),
                           beta_regularizer=l2(1E-4))(input)
    inputs1 = Conv2D(filters=16,
                     kernel_size=3,
                     name="initial_conv2D",
                     bias=False,
                     strides=1,
                     activation="relu",
                     padding="same",
                     kernel_initializer="TruncatedNormal",
                     W_regularizer=l2(1E-4))(x)  # 256
    # 256
    dense1 = Dense_Block1(inputs1)  # 188
    tran1 = transition_block1(dense1, 64, dropout_rate=0.25, weight_decay=1E-4)
    dense2 = Dense_Block2(tran1)  # 40
    #a1 = keras.layers.PReLU(alpha_initializer='TruncatedNormal', alpha_regularizer=None, alpha_constraint=None,
    #                       shared_axes=None)(dense2)
    tran2 = transition_block2(dense2,
                              256,
                              dropout_rate=0.25,
                              weight_decay=1E-4)
    #pool = MaxPooling2D(pool_size=(2, 2))(a1)
    # 64 64*64
    conv2 = Conv2D(128,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(tran2)
    #conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
    drop2 = Dropout(0.1)(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(drop2)
    conv3 = Conv2D(256,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(pool2)
    #conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
    #pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    #conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
    #conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
    drop3 = Dropout(0.1)(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(drop3)

    conv4 = Conv2D(512,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(pool3)
    drop4 = Dropout(0.1)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    # conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
    conv5 = Conv2D(1024,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(pool4)
    drop5 = Dropout(0.1)(conv5)

    #conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)

    up7 = Conv2D(512,
                 3,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(UpSampling2D(size=(2,
                                                                    2))(drop5))
    merge7 = concatenate([drop4, up7], axis=caxis)
    conv7 = Conv2D(512,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(merge7)
    #conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)

    up8 = Conv2D(256,
                 2,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(UpSampling2D(size=(2,
                                                                    2))(conv7))
    merge8 = concatenate([drop3, up8], axis=caxis)
    conv8 = Conv2D(256,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(merge8)
    #conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)

    up9 = Conv2D(128,
                 2,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(UpSampling2D(size=(2,
                                                                    2))(conv8))
    merge9 = concatenate([drop2, up9], axis=caxis)
    conv9 = Conv2D(128,
                   3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(merge9)

    up10 = Conv2D(64,
                  2,
                  activation='relu',
                  padding='same',
                  kernel_initializer='he_normal')(
                      UpSampling2D(size=(2, 2))(conv9))
    merge10 = concatenate([db2x2, up10], axis=caxis)
    conv10 = Conv2D(64,
                    3,
                    activation='relu',
                    padding='same',
                    kernel_initializer='he_normal')(merge10)

    up11 = Conv2D(32,
                  3,
                  activation='relu',
                  padding='same',
                  kernel_initializer='he_normal')(
                      UpSampling2D(size=(2, 2))(conv10))
    merge11 = concatenate([db1x3, up11], axis=caxis)
    conv11 = Conv2D(32,
                    3,
                    activation='relu',
                    padding='same',
                    kernel_initializer='he_normal')(merge11)
    '''
    up11 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(
        UpSampling2D(size=(2, 2))(conv10))
    merge11 = concatenate([db1x3, up11], axis=caxis)
    conv11 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge11)
    '''
    conv12 = Conv2D(16,
                    3,
                    activation='relu',
                    padding='same',
                    kernel_initializer='he_normal')(conv11)
    conv12 = Conv2D(6,
                    3,
                    activation='relu',
                    padding='same',
                    kernel_initializer='he_normal')(conv12)
    '''
    conv9 = Conv2DTranspose(filters=64, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal",
                            W_regularizer=l2(1E-4), activation="relu")(conv8)  # 128
    #conv8add = Conv2DTranspose(filters=64, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal",name="conv8add",
     #                       W_regularizer=l2(1E-4), activation="relu")(conv8)  # 128

    conv10 = Conv2DTranspose(filters=32, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal",
                            W_regularizer=l2(1E-4), activation="relu")(conv9)
    #conv9add = Conv2DTranspose(filters=32, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal",name="conv9add",
     #                       W_regularizer=l2(1E-4), activation="relu")(conv9)

    conv10 = Conv2DTranspose(filters=6, kernel_size=3, padding="same", strides=1, kernel_initializer="TruncatedNormal",
                             W_regularizer=l2(1E-4), activation="relu")(conv10)
    #conv10add = Conv2DTranspose(filters=6, kernel_size=3, padding="same", strides=1, kernel_initializer="TruncatedNormal",name="conv10add",
     #                        W_regularizer=l2(1E-4), activation="softmax")(conv10)
    '''
    model = Model(input=input, output=conv12)
    #loss = tf.optimizers.RMSprop
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    K.set_value(model.optimizer.lr, 0.001)
    keras.utils.plot_model(model,
                           "AtrousDense-U-DeconvNet_model.png",
                           show_shapes=True)
    return model
Beispiel #56
0
def set_lr(model, lr):
    K.set_value(model.optimizer.lr, lr)
Beispiel #57
0
def lr_poly_decay(model, base_lr, curr_iter, max_iter, power=0.5):
    lrate = base_lr * (1.0 - (curr_iter / float(max_iter)))**power
    K.set_value(model.optimizer.lr, lrate)

    return K.eval(model.optimizer.lr)
Beispiel #58
0
        if Iter % model_save_step4_visualization == 0:
            d_model.save(model_save_Path + '/m_' + str(Iter) + '_model.h5')

# 优化策略:优化器变更以及学习率调整=========================================================================================
#       #开始的时候我们用的是adam,所以下面代码分为两部分,一部分是切换adam到sgd,另外一部分负责切换之后更新学习率
        #如果到达转换点,那么就开始转换,以防万一,先保存权重,之后重新编译模型,之后加载权重
        if Iter == optimizer_switch_point:
            d_model.save(model_save_Path + '/m_' + 'newest_model.h5')
            lr_new = lr_mod(Iter, max_epoch=50, epoch_file_size=trainset_num, batch_size=batch_size, init_lr=first_lr)
            d_model.compile(optimizer=SGD(lr=lr_new, momentum=0.9), loss=EuiLoss, metrics=[y_t, y_pre, Acc])
            d_model.load_weights(model_save_Path + '/m_' + 'newest_model.h5')

        if Iter > optimizer_switch_point:
            #batch_num_perepoch = or_train_num // batch_size  # 每个epoch包含的迭代次数,也即batch的个数
            lr_new = lr_mod(Iter, max_epoch=50, epoch_file_size=trainset_num, batch_size=batch_size, init_lr=first_lr)
            K.set_value(d_model.optimizer.lr, lr_new)



# 关闭文件,以供实时查看结果
        txt_s1.close()
        txt_s2.close()
        txt_s3.close()
        txt_s4.close()
        txt_s5.close()
        txt_s6.close()
        txt_s7.close()
        txt_s8.close()
        txt_s9.close()
        txt_s10.close()
        txt_s11.close()
    def call(self,
             inputs,
             mask=None,
             training=None,
             initial_state=None,
             constants=None):
        # note that the .build() method of subclasses MUST define
        # self.input_spec and self.state_spec with complete input shapes.
        if isinstance(inputs, list):
            inputs = inputs[0]
        if initial_state is not None:
            pass
        elif self.stateful:
            initial_state = self.states
        else:
            initial_state = self.get_initial_state(inputs)

        if isinstance(mask, list):
            mask = mask[0]

        if len(initial_state) != len(self.states):
            raise ValueError('Layer has ' + str(len(self.states)) +
                             ' states but was passed ' +
                             str(len(initial_state)) + ' initial states.')
        timesteps = K.int_shape(inputs)[1]

        kwargs = {}
        if has_arg(self.cell.call, 'training'):
            kwargs['training'] = training

        if constants:
            if not has_arg(self.cell.call, 'constants'):
                raise ValueError('RNN cell does not support constants')

            def step(inputs, states):
                constants = states[-self._num_constants:]
                states = states[:-self._num_constants]
                return self.cell.call(inputs,
                                      states,
                                      constants=constants,
                                      **kwargs)
        else:

            def step(inputs, states):
                return self.cell.call(inputs, states, **kwargs)

        last_output, outputs, states = K.rnn(step,
                                             inputs,
                                             initial_state,
                                             constants=constants,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             input_length=timesteps)
        if self.stateful:
            updates = []
            for i in range(len(states)):
                updates.append((self.states[i], states[i]))
            self.add_update(updates, inputs)

        if self.return_sequences:
            output = outputs
        else:
            output = last_output

        # Properly set learning phase
        if getattr(last_output, '_uses_learning_phase', False):
            output._uses_learning_phase = True

        if self.return_state:
            if not isinstance(states, (list, tuple)):
                states = [states]
            else:
                states = list(states)
            return [output] + states
        else:
            # print('output')
            # print(output.shape)
            return output

        # helper function
        def get_tuple_shape(nb_channels):
            result = list(state_shape)
            if self.cell.data_format == 'channels_first':
                result[1] = nb_channels
            elif self.cell.data_format == 'channels_last':
                result[3] = nb_channels
            else:
                raise KeyError
            return tuple(result)

        # initialize state if None
        if self.states[0] is None:
            if hasattr(self.cell.state_size, '__len__'):
                self.states = [
                    K.zeros(get_tuple_shape(dim))
                    for dim in self.cell.state_size
                ]
            else:
                self.states = [K.zeros(get_tuple_shape(self.cell.state_size))]
        elif states is None:
            if hasattr(self.cell.state_size, '__len__'):
                for state, dim in zip(self.states, self.cell.state_size):
                    K.set_value(state, np.zeros(get_tuple_shape(dim)))
            else:
                K.set_value(self.states[0],
                            np.zeros(get_tuple_shape(self.cell.state_size)))
        else:
            if not isinstance(states, (list, tuple)):
                states = [states]
            if len(states) != len(self.states):
                raise ValueError('Layer ' + self.name + ' expects ' +
                                 str(len(self.states)) + ' states, '
                                 'but it received ' + str(len(states)) +
                                 ' state values. Input received: ' +
                                 str(states))
            for index, (value, state) in enumerate(zip(states, self.states)):
                if hasattr(self.cell.state_size, '__len__'):
                    dim = self.cell.state_size[index]
                else:
                    dim = self.cell.state_size
                if value.shape != get_tuple_shape(dim):
                    raise ValueError('State ' + str(index) +
                                     ' is incompatible with layer ' +
                                     self.name + ': expected shape=' +
                                     str(get_tuple_shape(dim)) +
                                     ', found shape=' + str(value.shape))
                # TODO: consider batch calls to `set_value`.
                K.set_value(state, value)
def set_lr(model, lr):
    import keras.backend as K
    K.set_value(model.optimizer.lr, float(lr))