def on_epoch_end(self, epoch, logs={}): logs['lr'] = K.get_value(self.model.optimizer.lr) current = logs.get(self.monitor) if current is None: warnings.warn('Learning Rate Plateau Reducing requires %s available!' % self.monitor, RuntimeWarning) else: if self.in_cooldown(): self.cooldown_counter -= 1 self.wait = 0 if self.monitor_op(current, self.best): self.best = current self.wait = 0 elif not self.in_cooldown(): if self.wait >= self.patience: old_lr = float(K.get_value(self.model.optimizer.lr)) if old_lr > self.min_lr + self.lr_epsilon: new_lr = old_lr * self.factor new_lr = max(new_lr, self.min_lr) K.set_value(self.model.optimizer.lr, new_lr) if self.verbose > 0: print('\nEpoch %05d: reducing learning rate to %s.' % (epoch, new_lr)) self.cooldown_counter = self.cooldown self.wait = 0 self.wait += 1
def train_step(self, optimizer): """ One Network Tranning step. """ opt = self.model.optimizer K.set_value(opt.lr, optimizer["lr"]) K.set_value(opt.momentum, optimizer["momentum"])
def reset_states(self): assert self.stateful, 'Layer must be stateful.' input_shape = self.input_shape if not input_shape[0]: raise Exception('If a RNN is stateful, a complete ' + 'input_shape must be provided ' + '(including batch size).') if self.return_sequences: out_row, out_col, out_filter = self.output_shape[2:] else: out_row, out_col, out_filter = self.output_shape[1:] if hasattr(self, 'states'): K.set_value(self.states[0], np.zeros((input_shape[0], out_row, out_col, out_filter))) K.set_value(self.states[1], np.zeros((input_shape[0], out_row, out_col, out_filter))) else: self.states = [K.zeros((input_shape[0], out_row, out_col, out_filter)), K.zeros((input_shape[0], out_row, out_col, out_filter))]
def on_batch_begin(self, batch, logs={}): open_all_gates() rands = np.random.uniform(size=len(add_tables)) for t, rand in zip(add_tables, rands): if rand < K.get_value(t["death_rate"]): K.set_value(t["gate"], 0)
def train(self, model, data): """ Fits the given model on a batch of data. """ kur_optimizer = model.compiled['train']['kur_optimizer'] if kur_optimizer.scale_rate: if kur_optimizer.scale_rate in data: import keras.backend as K # pylint: disable=import-error factor = numpy.mean(data[kur_optimizer.scale_rate]) if kur_optimizer.scale_mode == 'sqrt': factor = factor ** 0.5 keras_optimizer = kur_optimizer.optimizer K.set_value( keras_optimizer.lr, K.get_value(keras_optimizer.lr) * factor ) result = self.run_batch(model, data, 'train', True) K.set_value( keras_optimizer.lr, K.get_value(keras_optimizer.lr) / factor ) return result else: logger.warning('The optimizer "scale_rate" was specified, but ' 'no such data column was found: %s. Ignoring this.', kur_optimizer.scale_rate) return self.run_batch(model, data, 'train', True)
def reduce_lr(self, current_nb): if self.reduction_function == 'linear': new_rate = self.reduce_rate elif self.reduction_function == 'exponential': new_rate = np.power(self.exp_base, current_nb / self.half_life) * self.reduce_rate elif self.reduction_function == 'noam': new_rate = np.float32(min(float(current_nb) ** self.exp_base, float( current_nb) * self.half_life ** self.warmup_exp)) else: raise NotImplementedError( 'The decay function %s is not implemented.' % str( self.reduction_function)) if self.reduction_function == 'noam': lr = self.initial_lr else: lr = K.get_value(self.model.optimizer.lr) self.new_lr = np.maximum(np.float32(lr * new_rate), self.min_lr) K.set_value(self.model.optimizer.lr, self.new_lr) if self.reduce_each_epochs and self.verbose > 0: logging.info("LR reduction from {0:0.6f} to {1:0.6f}".format(float(lr), float(self.new_lr)))
def learn(self, last_observations, actions, rewards, learning_rate=0.001): import keras.backend as K K.set_value(self.train_net.optimizer.lr, learning_rate) frames = len(last_observations) self.counter += frames # ----- values, policy = self.train_net.predict([last_observations, self.unroll]) # ----- self.targets.fill(0.) adventage = rewards - values.flatten() self.targets[self.unroll, actions] = 1. # ----- loss = self.train_net.train_on_batch([last_observations, adventage], [rewards, self.targets]) entropy = np.mean(-policy * np.log(policy + 0.00000001)) self.pol_loss.append(loss[2]) self.val_loss.append(loss[1]) self.entropy.append(entropy) self.values.append(np.mean(values)) min_val, max_val, avg_val = min(self.values), max(self.values), np.mean(self.values) print('\rFrames: %8d; Policy-Loss: %10.6f; Avg: %10.6f ' '--- Value-Loss: %10.6f; Avg: %10.6f ' '--- Entropy: %7.6f; Avg: %7.6f ' '--- V-value; Min: %6.3f; Max: %6.3f; Avg: %6.3f' % ( self.counter, loss[2], np.mean(self.pol_loss), loss[1], np.mean(self.val_loss), entropy, np.mean(self.entropy), min_val, max_val, avg_val), end='') # ----- self.swap_counter -= frames if self.swap_counter < 0: self.swap_counter += self.swap_freq return True return False
def on_batch_begin(self, batch, logs={}): probs = np.random.uniform(size=len(gates)) for i,j in zip(gates, probs): if j > gates[i][0]: K.set_value(gates[i][1], 1) else: K.set_value(gates[i][1], 0)
def on_epoch_begin(self, epoch, logs={}): self.task.set('status.stage', 'epoch #'+str(epoch)) if hasattr(self.model.optimizer, 'lr'): if self.task.get('config.learning_rate'): lr = float(self.task.get('config.learning_rate')) K.set_value(self.model.optimizer.lr, lr) else: self.task.set('status.error', 'Optimizer must have a "lr" attribute.')
def on_epoch_begin(self, epoch, logs=None): if not hasattr(self.model.optimizer, 'lr'): raise ValueError('Optimizer must have a "lr" attribute.') lr = self.schedule(epoch) if not isinstance(lr, (float, np.float32, np.float64)): raise ValueError('The output of the "schedule" function ' 'should be float.') K.set_value(self.model.optimizer.lr, lr)
def on_epoch_begin(self, epoch, logs={}): layer = self.model.layers[self.VAE_layer_idx] assert hasattr(layer, 'regularizer_scale'), \ 'Optimizer must have a "regularizer_scale" attribute.' weight = self.schedule(epoch) print("Current vae annealer weight is {}".format(weight)) assert type(weight) == float, 'The output of the "schedule" function should be float.' K.set_value(layer.regularizer_scale, weight)
def set_state(self, state): c = 0 vs = self.vars for key in vs.keys(): if key=='oopt': continue v = vs[key] for p in v.values(): K.set_value(p,state[c]) c += 1
def on_epoch_begin(self, epoch, logs=None): if logs is None: logs = {} new_weight = self.schedule(epoch) new_value = new_weight * self.weight_orig print("Current {} annealer weight is {}".format(self.weight_name, new_value)) assert type( new_weight) == float, 'The output of the "schedule" function should be float.' K.set_value(self.weight_var, new_value)
def set_lr(self, learning_rate): """ set the learning rate of the optimizer :param learning_rate: lerning rate pass to the optimizer :return: """ # self.optimizer.lr.set_value(learning_rate) K.set_value(self.optimizer.lr, learning_rate) print('learning rate = {}'.format(learning_rate))
def batch_train(self, curr_state, next_state, immediate_reward, action, done, target, type="Double"): """ Computes the TD Error for a given batch of tuples. Here, we randomly sample episodes from the Experience buffer and use this to train our model. This method computes this for a batch and trains the model. Args: curr_state(array): Numpy array representing an array of current states of game next_state(array): Numpy array for immediate next state of the game action(array): List of actions taken to go from current state to the next reward(array): List of rewards for the given transition done(bool): if this is a terminal state or not. target(keras.model object): Target network for computing TD error """ if type == "Double": forward_action = np.argmax(self.model.predict(next_state), axis=1) predicted_qvalue = target.predict(next_state) # BxN matrix B = forward_action.size forward_qvalue = predicted_qvalue[np.arange(B), forward_action] # Bx1 vec elif type == "Vanilla": forward_qvalue = np.max(target.predict(next_state), axis=1) discounted_reward = (self.discount * forward_qvalue * (1 - done)) Q_value = immediate_reward + discounted_reward target_values = self.model.predict(curr_state) target_values[range(target_values.shape[0]), action] = Q_value """ for i, target in enumerate(target_values): target_values[i, action[i]] = Q_value[i] """ callbacks = [] # Update epoch number for TensorBoard. K.set_value(self.reward_tensor, self.cur_reward) if self.model_dir is not None and self.epoch_num % TB_LOGGING_EPOCHS == 0: callbacks.append(self.tbCallBack) self.model.fit( curr_state, target_values, verbose=0, initial_epoch=self.epoch_num, callbacks=callbacks, epochs=self.epoch_num + 1) self.epoch_num += 1
def on_batch_begin(self, batch, logs={}): # print self.batch_num for i in xrange(len(self.batch_point)): if self.batch_num < self.batch_point[i]: break elif self.batch_num == self.batch_point[i]: if i < len(self.lr): K.set_value(self.model.optimizer.lr, self.lr[i]) print 'current lr:', K.get_value(self.model.optimizer.lr) self.batch_num += 1
def _adjust_learning_rate(self, epoch): old_lr = K.get_value(self.model.optimizer.lr) new_lr = self.initial_lr * self.multiplier(epoch) K.set_value(self.model.optimizer.lr, new_lr) if hasattr(self.model.optimizer, 'momentum') and self.momentum_correction: # See the paper cited above for more information about momentum correction. self.restore_momentum = K.get_value(self.model.optimizer.momentum) K.set_value(self.model.optimizer.momentum, self.restore_momentum * new_lr / old_lr)
def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy
def update_learning_rate(self, total_steps): # The deepmind paper says # ~400k: 1e-2 # 400k~600k: 1e-3 # 600k~: 1e-4 lr = self.decide_learning_rate(total_steps) if lr: K.set_value(self.opt.lr, lr) logger.debug(f"total step={total_steps}, set learning rate to {lr}")
def set_lr(self, learning_rate): """ set the learning rate of the optimizer :param learning_rate: lerning rate pass to the optimizer :return: """ # self.optimizer.lr.set_value(learning_rate) K.set_value(self.optimizer.lr, learning_rate) print('learning rate = {}'.format(learning_rate)) requests.post('http://localhost:8000/setmsg', None, {'msg': 'set learning rate to {}'.format(learning_rate)})
def reset_states(self): assert self.stateful, 'Layer must be stateful.' input_shape = self.input_spec[0].shape if not input_shape[0]: raise Exception('If a RNN is stateful, a complete ' + 'input_shape must be provided (including batch size).') if hasattr(self, 'states'): K.set_value(self.states[0], np.zeros((input_shape[0], self.output_dim))) else: self.states = [K.zeros((input_shape[0], self.output_dim))]
def on_batch_begin(self, batch, logs=None): self.step_num += 1 t = self.step_num n = self.n_model p = self.warmup s = self.start_decay e = self.end_decay first = 1+t*(n-1)/(n*p) second = n third = n*(2*n)**((s-n*t)/(e-s)) lr = self.basic * min(first, second, third) K.set_value(self.model.optimizer.lr, lr)
def load_weights(self, filepath): # Loads weights from HDF5 file import h5py f = h5py.File(filepath) weights = [f['param_weight_{}'.format(p)] for p in range(f.attrs['nb_params'])] biases = [f['param_bias_{}'.format(p)] for p in range(f.attrs['nb_params']+1)] for model_wieght,saved_weight in zip(self.Ws,weights): K.set_value(model_wieght, saved_weight) for model_bias,saved_bias in zip(self.bs,biases): K.set_value(model_bias, saved_bias) f.close()
def build(self): input_shape = self.input_shape input_dim = input_shape[2] # = |x| # works only for stateful? (todo: try) self.input_dim = input_dim self.input = K.placeholder(input_shape) # from IPython import embed; embed() # output dim = |c| = |h| = |output| # input dim = |x| if self.stateful: self.reset_states() else: # initial states: 2 all-zero tensor of shape (output_dim) self.states = [None, None,None] # input_dim x output_dim # output dim = 50 = |h|? input_dim = self.input_dim output_dim = self.output_dim n = self.output_dim // len(self.periods) mask = np.zeros((self.output_dim, self.output_dim)) period = np.zeros((self.output_dim, ), 'i') for i, T in enumerate(self.periods): mask[i*n:(i+1)*n, i*n:] = 1 period[i*n:(i+1)*n] = T # from IPython import embed; embed() self.mask = K.zeros((self.output_dim, self.output_dim)) self.period = K.zeros((self.output_dim, ), 'i') K.set_value(self.mask, mask) K.set_value(self.period, period) ## todo: mask & period are shared # n: K.zeros is shared by default (?) self.hh = self.init((self.output_dim, self.output_dim)) self.xh = self.init((self.input_dim, self.output_dim)) self.b = K.zeros((self.output_dim,), name="b") self.trainable_weights = [self.hh, self.xh, self.b] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def on_batch_begin(self, batch, logs={}): if self.batch_count % 2 == 0: # Global regularization for depth in range(len(self.gates)): columns = len(self.gates[depth])+1 selected_column = np.random.random_integers(low=1,high=columns) for i in range(1,columns): if i >= selected_column: for j in range(len(self.gates[depth][i])): K.set_value(self.gates[depth][i][j], 1) else: for j in range(len(self.gates[depth][i])): K.set_value(self.gates[depth][i][j], 0) else: # Local regularization for depth in range(len(self.gates)): columns = len(self.gates[depth])+1 for i in range(1,columns): for j in range(len(self.gates[depth][i])): prob = np.random.uniform() if prob > 0.5: K.set_value(self.gates[depth][i][j], 1) else: K.set_value(self.gates[depth][i][j], 0) self.batch_count = self.batch_count+1
def fit_generator(self, generator, samples_per_epoch, nb_epoch, validation_generator, nb_val_samples, opt): val_losses = [] lr = K.get_value(self.optimizer.lr) for epoch in range(nb_epoch): super(sModel, self).fit_generator(generator, samples_per_epoch, 1, verbose=1) val_loss = exp(self.evaluate_generator(validation_generator, nb_val_samples)) val_losses.append(val_loss) print 'Epoch {}/{}. Validation loss: {}'.format(epoch + 1, nb_epoch, val_loss) if len(val_losses) > 2 and (val_losses[-2] - val_losses[-1]) < opt.decay_when: lr *= opt.learning_rate_decay K.set_value(self.optimizer.lr, lr) if epoch == nb_epoch-1 or epoch % opt.save_every == 0: savefile = '%s/lm_%s_epoch%d_%.2f.h5' % (opt.checkpoint_dir, opt.savefile, epoch + 1, val_loss) self.save_weights(savefile)
def on_epoch_end(self, epoch, logs={}): mean_loss = np.array(self.epoch_log).mean() if mean_loss + self.min_improvment <= self.current_best: self.current_best = mean_loss self.current_best_epoch = epoch if epoch - self.current_best_epoch > self.epoch_patience: lr = K.get_value(self.optimizer.lr) new_lr = lr*self.factor self.min_improvment *= self.factor K.set_value(self.optimizer.lr, new_lr) print() print("Reduce learning rate to: {:08f}".format(new_lr)) self.current_best_epoch = epoch
def fit_generator(self, generator, steps_per_epoch, epochs, validation_data, validation_steps, opt): val_losses = [] lr = K.get_value(self.optimizer.lr) for epoch in range(epochs): super(sModel, self).fit_generator(generator, steps_per_epoch, epochs=epoch+1, verbose=1, initial_epoch=epoch) val_loss = exp(self.evaluate_generator(validation_data, validation_steps)) val_losses.append(val_loss) print('Epoch {}/{}. Validation perplexity: {}'.format(epoch + 1, epochs, val_loss)) if len(val_losses) > 2 and (val_losses[-2] - val_losses[-1]) < opt.decay_when: lr *= opt.learning_rate_decay K.set_value(self.optimizer.lr, lr) if epoch == epochs-1 or epoch % opt.save_every == 0: savefile = '%s/lm_%s_epoch%d_%.2f.h5' % (opt.checkpoint_dir, opt.savefile, epoch + 1, val_loss) self.save_weights(savefile)
def set_learning_rate(model, step_number): min_learning_rate = 0.001 steps_per_drop = 100 drop_by = 0.98 lr = model.optimizer.lr.get_value() if lr <= min_learning_rate: return lr if step_number % steps_per_drop == (steps_per_drop-1): lr *= drop_by K.set_value(model.optimizer.lr, lr) return lr
def reset_states(self): assert self.stateful or self.state_input or len(self.state_outputs > 0), 'Layer must be stateful.' input_shape = self.input_shape if not input_shape[0]: raise Exception('If a RNN is stateful, a complete ' + 'input_shape must be provided ' + '(including batch size).') if hasattr(self, 'states'): K.set_value(self.states[0], np.zeros((input_shape[0], self.hidden_dim))) K.set_value(self.states[1], np.zeros((input_shape[0], self.hidden_dim))) else: self.states = [K.zeros((input_shape[0], self.hidden_dim)), K.zeros((input_shape[0], self.hidden_dim))]
def reset_states(self): K.set_value(self.true_positives, 0)
def update_lr(self): t = (self.trn_iteration % self.cycle_length) / self.cycle_length lr = (1 - t) * self.max_lr + t * self.min_lr K.set_value(self.model.optimizer.lr, lr)
def train_target_discriminator(self, source_gen, target_gen, source_model=None, src_discriminator=None, tgt_discriminator=None, epochs=50, save_interval=1, start_epoch=0, num_batches=200): ''' :param batch_data: :param source_model: :param src_discriminator: :param tgt_discriminator: :param epochs: :param batch_size: :param save_interval: :param start_epoch: :param num_batches: 一个epoch所循环的次数 :return: ''' # TODO:从这里是不是可以看出,不用一对一的指定标签? self.define_source_encoder(source_model) # TODO:起到了freeze的功能? for layer in self.source_encoder.layers: layer.trainable = False # get_discriminator(self, model, weights=None): source_discriminator = self.get_discriminator(self.source_encoder, src_discriminator) target_discriminator = self.get_discriminator(self.target_encoder, tgt_discriminator) # TODO:这里是不是和和 self.get_discriminator函数重复了加载功能? ''' if src_discriminator is not None: source_discriminator.load_weights(src_discriminator) if tgt_discriminator is not None: target_discriminator.load_weights(tgt_discriminator) ''' # TODO:为什么使用了binary_crossentropy?没有label输入啊? -> 后面有输入 source_discriminator.compile(loss="binary_crossentropy", optimizer=self.tgt_optimizer, metrics=['accuracy']) target_discriminator.compile(loss="binary_crossentropy", optimizer=self.tgt_optimizer, metrics=['accuracy']) # TODO(11/12):更改路径 callback1 = keras.callbacks.TensorBoard( os.path.join(save_path, 'tensorboard', 'binary')) callback1.set_model(source_discriminator) callback2 = keras.callbacks.TensorBoard( os.path.join(save_path, 'tensorboard', 'binary')) callback2.set_model(target_discriminator) src_names = ['src_discriminator_loss', 'src_discriminator_acc'] tgt_names = ['tgt_discriminator_loss', 'tgt_discriminator_acc'] for iteration in range(start_epoch, epochs): avg_loss, avg_acc, index = [0, 0], [0, 0], 0 # TODO:用这种想法实现了discriminator的loss,因为前几层都是共享的 # source_gen -> use function(next()) get the tuple (img, label) for source, target in zip( next(source_gen)[0], next(target_gen)[0]): l1, acc1 = source_discriminator.train_on_batch( source, np_utils.to_categorical(np.zeros(source.shape[0]), 2)) l2, acc2 = target_discriminator.train_on_batch( target, np_utils.to_categorical(np.ones(target.shape[0]), 2)) index += 1 loss, acc = (l1 + l2) / 2, (acc1 + acc2) / 2 print(iteration + 1, ': ', index, '/', num_batches, '; Loss: %.4f' % loss, ' (', '%.4f' % l1, '%.4f' % l2, '); Accuracy: ', acc, ' (', '%.4f' % acc1, '%.4f' % acc2, ')') avg_loss[0] += l1 avg_acc[0] += acc1 avg_loss[1] += l2 avg_acc[1] += acc2 if index % num_batches == 0: break if iteration % self.discriminator_decay_rate == 0: lr = K.get_value(source_discriminator.optimizer.lr) K.set_value(source_discriminator.optimizer.lr, lr * self.discriminator_decay_factor) lr = K.get_value(target_discriminator.optimizer.lr) K.set_value(target_discriminator.optimizer.lr, lr * self.discriminator_decay_factor) print('Learning Rate Decayed to: ', K.get_value(target_discriminator.optimizer.lr)) # TODO(11/12):从这里修改地址,修改权重名称 if iteration % save_interval == 0: source_discriminator.save_weights( os.path.join(save_path, 'discriminator_source_%02d.hdf5' % iteration)) target_discriminator.save_weights( os.path.join(save_path, 'discriminator_target_%02d.hdf5' % iteration)) self.tensorboard_log( callback1, src_names, [avg_loss[0] / source.shape[0], avg_acc[0] / source.shape[0]], iteration) self.tensorboard_log( callback2, tgt_names, [avg_loss[1] / target.shape[0], avg_acc[1] / target.shape[0]], iteration)
def update_lr(self, model, decay): new_lr = K.get_value(model.optimizer.lr) - decay if new_lr < 0: new_lr = 0 # print(K.get_value(model.optimizer.lr)) K.set_value(model.optimizer.lr, new_lr)
def step_decay(self, epoch): if epoch % 2 == 0 and epoch != 0: lr = K.get_value(self.model.optimizer.lr) K.set_value(self.model.optimizer.lr, lr * .5) print("lr changed to {}".format(lr * .5)) return K.get_value(self.model.optimizer.lr)
Kfold_preds_final = [] k = 0 RMSE = [] for train_idx, test_idx in skf.split(train1, y_train): print("Number of Folds.."+str(k+1)) # Initialize a new Model for Current FOLD epochs = 1 batch_size = 512 * 3 steps = (int(train1.shape[0]/batch_size))*epochs lr_init, lr_fin = 0.009, 0.0045 lr_decay = exp_decay(lr_init, lr_fin, steps) modelRNN = RNN_model() K.set_value(modelRNN.optimizer.lr, lr_init) K.set_value(modelRNN.optimizer.decay, lr_decay) #K Fold Split X_train1, X_test1 = train1[train_idx], train1[test_idx] print(X_train1.shape, X_test1.shape) y_train1, y_test1 = y_train[train_idx], y_train[test_idx] print(y_train1.shape, y_test1.shape) gc.collect() print(type(X_train1)) print(X_train1.shape) print(type(X_train1[:,12])) X_train_final = get_data_frame(X_train1)
def on_epoch_end(self, epoch, logs=None): if (epoch+1)%2 == 0: lr = K.get_value(self.model.optimizer.lr) K.set_value(self.model.optimizer.lr, lr*0.94)
model.trainable = True advmodel.trainable = False model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy']) DRf.compile(loss=[make_loss_model(c=1.0), make_loss_advmodel(c=-lam)], optimizer=opt_DRf) DfR.compile(loss=[make_loss_advmodel(c=1.0)], optimizer=opt_DfR) indices = np.random.permutation(len(train_x))[:batch_size] ## Set learning learning for DRf according to num_epoch current_learning_rate = calculate_learning_rate(i) K.set_value(DRf.optimizer.lr, current_learning_rate) ### DRf.train_on_batch( train_x.iloc[indices], [train_y.iloc[indices], df_Convert_v2.iloc[indices]]) print("learning_rate of DRf: ", K.eval(DRf.optimizer.lr)) print("learning_rate of DfR: ", K.eval(DfR.optimizer.lr)) #Fit "advmodel" if lam >= 0.0: model.trainable = False advmodel.trainable = True model.compile(loss=[make_loss_model(c=1.0)], optimizer=opt_model, metrics=['accuracy'])
def load_pretrain_weights(vade, X, Y, dataset, autoencoder=None, ae_weights=None): if autoencoder is None: ae = model_from_json(open(ae_weights).read()) ae.load_weights('pretrain_weights/ae_'+dataset+'_weights.h5') vade.get_layer('encoder_0').set_weights(ae.layers[0].get_weights()) vade.get_layer('encoder_1').set_weights(ae.layers[1].get_weights()) vade.get_layer('encoder_2').set_weights(ae.layers[2].get_weights()) vade.get_layer('z_mean').set_weights(ae.layers[3].get_weights()) vade.get_layer('decoder_0').set_weights(ae.layers[-4].get_weights()) vade.get_layer('decoder_1').set_weights(ae.layers[-3].get_weights()) vade.get_layer('decoder_2').set_weights(ae.layers[-2].get_weights()) vade.get_layer('output').set_weights(ae.layers[-1].get_weights()) sample = sample_output.predict(X,batch_size=batch_size) else: autoencoder.load_weights(ae_weights) vade.get_layer('encoder_0').set_weights(autoencoder.layers[1].get_weights()) vade.get_layer('encoder_1').set_weights(autoencoder.layers[2].get_weights()) vade.get_layer('encoder_2').set_weights(autoencoder.layers[3].get_weights()) vade.get_layer('z_mean').set_weights(autoencoder.layers[4].get_weights()) vade.get_layer('decoder_0').set_weights(autoencoder.layers[-4].get_weights()) vade.get_layer('decoder_1').set_weights(autoencoder.layers[-3].get_weights()) vade.get_layer('decoder_2').set_weights(autoencoder.layers[-2].get_weights()) vade.get_layer('output').set_weights(autoencoder.layers[-1].get_weights()) sample = sample_output.predict(X, batch_size=batch_size) if dataset == 'mnist': gmm = GaussianMixture(n_components=n_centroid, covariance_type='diag') gmm.fit(sample) acc_0 = cluster_acc(Y, gmm.predict(sample)) means_0 = [gmm.means_] for i in range(3): gmm.fit(sample) acc_0_new = cluster_acc(Y, gmm.predict(sample)) if acc_0_new > acc_0: acc_0 = acc_0_new means_0 = gmm.means_ covs_0 = gmm.covariances_ K.set_value(u_p, means_0.T) K.set_value(lambda_p, covs_0.T) if dataset == 'reuters10k': k = KMeans(n_clusters=n_centroid) k.fit(sample) K.set_value(u_p, floatX(k.cluster_centers_.T)) if dataset == 'har': g = mixture.GMM(n_components=n_centroid,covariance_type='diag',random_state=3) g.fit(sample) K.set_value(u_p, floatX(g.means_.T)) K.set_value(lambda_p, floatX(g.covars_.T)) if (dataset == 'custom') | (dataset is None): gmm = GaussianMixture(n_components=n_centroid, covariance_type='diag') gmm.fit(sample) acc_0 = cluster_acc(Y, gmm.predict(sample)) means_0 = gmm.means_ covs_0 = gmm.covariances_ print(acc_0) print('means:', means_0.shape) for i in range(3): gmm.fit(sample) acc_0_new = cluster_acc(Y, gmm.predict(sample)) if acc_0_new > acc_0: acc_0 = acc_0_new means_0 = gmm.means_ covs_0 = gmm.covariances_ K.set_value(u_p, means_0.T) K.set_value(lambda_p, covs_0.T) # Set trainable weights in 'latent' layer to initalized values K.set_value(vade.get_layer('latent').u_p, K.eval(u_p)) K.set_value(vade.get_layer('latent').theta_p, K.eval(theta_p)) K.set_value(vade.get_layer('latent').lambda_p, K.eval(lambda_p)) print ('pretrain weights loaded!') return vade
def reset_spikevars(self, sample_idx): """ Reset variables present in spiking layers. Can be turned off for instance when a video sequence is tested. """ mod = self.config.getint('simulation', 'reset_between_nth_sample') mod = mod if mod else sample_idx + 1 do_reset = sample_idx % mod == 0 if do_reset: k.set_value(self.mem, self.init_membrane_potential()) k.set_value(self.time, np.float32(self.dt)) zeros_output_shape = np.zeros(self.output_shape, k.floatx()) if self.tau_refrac > 0: k.set_value(self.refrac_until, zeros_output_shape) if self.spiketrain is not None: k.set_value(self.spiketrain, zeros_output_shape) k.set_value(self.last_spiketimes, zeros_output_shape - 1) k.set_value(self.v_thresh, zeros_output_shape + self._v_thresh) k.set_value(self.prospective_spikes, zeros_output_shape) k.set_value(self.missing_impulse, zeros_output_shape)
def adjust_learning_rate(self, optimizer, epoch): K.set_value(optimizer.lr, self.alpha_plan[epoch]) K.set_value(optimizer.beta_1, self.beta1_plan[epoch])
model.add(Masking(input_shape=(seq_len, 2048), mask_value=0)) # model.add(SimpleRNN(128, return_sequences=True, activation='sigmoid', use_bias=True)) model.add(LSTM(256, return_sequences=True, activation='sigmoid', recurrent_activation='tanh', use_bias=True, unit_forget_bias=True)) # model.add(LSTM(32, return_sequences=True, activation='sigmoid', use_bias=True)) # model.add(Dense(8, activation=kb.sigmoid)) model.add(Dense(2, activation=kb.softmax)) optimizer = adam(lr=0.0001) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) data_dir = './ActivityDataset' traj_dir = data_dir + '/' + 'TrajectoriesLong' train_dir = traj_dir + '/' + 'train' ex_dirs = get_immediate_subdirectories(train_dir) acc_arr = [] for i in range(num_iter): X, Y = get_batch_resnet(train_dir, batch_size, seq_len) model.train_on_batch(X, Y) score, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=1) acc_arr.append(acc) print('Step:', i, 'Score: ', score, 'Accuracy:', acc) if (i % decay_step == 0) and i is not 0: kb.set_value(optimizer.lr, 0.5 * kb.get_value(optimizer.lr)) if (i % disp_step == 0) and i is not 0: plt.plot(acc_arr) plt.pause(0.0005)
def mse_trainer_mlp(): bin_val = 2 un_val = 1 # training add mlp and storing it's history values for plotting. m_add = mlp_model(bin_val) m_add.compile("nadam", "mse", metrics=["mae"]) K.set_value(m_add.optimizer.lr, 1e-2) hist_m_add_u = m_add.fit(trx_add, try_add, validation_data=(tex_add, tey_add), batch_size=1024, epochs=100) K.set_value(m_add.optimizer.lr, 1e-3) hist_m_add_d = m_add.fit(trx_add, try_add, validation_data=(tex_add, tey_add), batch_size=1024, epochs=100) K.set_value(m_add.optimizer.lr, 1e-4) hist_m_add_t = m_add.fit(trx_add, try_add, validation_data=(tex_add, tey_add), batch_size=1024, epochs=100) m_add.save('mlp_mse_add.h5') # training sub mlp and storing it's history values for plotting. m_sub = mlp_model(bin_val) m_sub.compile("nadam", "mse", metrics=["mae"]) K.set_value(m_sub.optimizer.lr, 1e-2) hist_m_sub_u = m_sub.fit(trx_sub, try_sub, validation_data=(tex_sub, tey_sub), batch_size=1024, epochs=100) K.set_value(m_sub.optimizer.lr, 1e-3) hist_m_sub_d = m_sub.fit(trx_sub, try_sub, validation_data=(tex_sub, tey_sub), batch_size=1024, epochs=100) K.set_value(m_sub.optimizer.lr, 1e-4) hist_m_sub_t = m_sub.fit(trx_sub, try_sub, validation_data=(tex_sub, tey_sub), batch_size=1024, epochs=100) m_sub.save('mlp_mse_sub.h5') # training mul mlp and storing it's history values for plotting. m_mul = mlp_model(bin_val) m_mul.compile("nadam", "mse", metrics=["mae"]) K.set_value(m_mul.optimizer.lr, 1e-2) hist_m_mul_u = m_mul.fit(trx_mul, try_mul, validation_data=(tex_mul, tey_mul), batch_size=1024, epochs=100) K.set_value(m_mul.optimizer.lr, 1e-3) hist_m_mul_d = m_mul.fit(trx_mul, try_mul, validation_data=(tex_mul, tey_mul), batch_size=1024, epochs=100) K.set_value(m_mul.optimizer.lr, 1e-4) hist_m_mul_t = m_mul.fit(trx_mul, try_mul, validation_data=(tex_mul, tey_mul), batch_size=1024, epochs=100) m_mul.save('mlp_mse_mul.h5') # training div mlp and storing it's history values for plotting. m_div = mlp_model(bin_val) m_div.compile("nadam", "mse", metrics=["mae"]) K.set_value(m_div.optimizer.lr, 1e-2) hist_m_div_u = m_div.fit(trx_div, try_div, validation_data=(tex_div, tey_div), batch_size=1024, epochs=100) K.set_value(m_div.optimizer.lr, 1e-3) hist_m_div_d = m_div.fit(trx_div, try_div, validation_data=(tex_div, tey_div), batch_size=1024, epochs=100) K.set_value(m_div.optimizer.lr, 1e-4) hist_m_div_t = m_div.fit(trx_div, try_div, validation_data=(tex_div, tey_div), batch_size=1024, epochs=100) m_div.save('mlp_mse_div.h5') # training sqr mlp and storing it's history values for plotting. m_sqr = mlp_model(un_val) m_sqr.compile("nadam", "mse", metrics=["mae"]) K.set_value(m_sqr.optimizer.lr, 1e-2) hist_m_sqr_u = m_sqr.fit(trx_sqr, try_sqr, validation_data=(tex_sqr, tey_sqr), batch_size=1024, epochs=100) K.set_value(m_sqr.optimizer.lr, 1e-3) hist_m_sqr_d = m_sqr.fit(trx_sqr, try_sqr, validation_data=(tex_sqr, tey_sqr), batch_size=1024, epochs=100) K.set_value(m_sqr.optimizer.lr, 1e-4) hist_m_sqr_t = m_sqr.fit(trx_sqr, try_sqr, validation_data=(tex_sqr, tey_sqr), batch_size=1024, epochs=100) m_sqr.save('mlp_mse_sqr.h5') # training qrt mlp and storing it's history values for plotting. m_qrt = mlp_model(un_val) m_qrt.compile("nadam", "mse", metrics=["mae"]) K.set_value(m_qrt.optimizer.lr, 1e-2) hist_m_qrt_u = m_qrt.fit(trx_qrt, try_qrt, validation_data=(tex_qrt, tey_qrt), batch_size=1024, epochs=100) K.set_value(m_qrt.optimizer.lr, 1e-3) hist_m_qrt_d = m_qrt.fit(trx_qrt, try_qrt, validation_data=(tex_qrt, tey_qrt), batch_size=1024, epochs=100) K.set_value(m_qrt.optimizer.lr, 1e-4) hist_m_qrt_t = m_qrt.fit(trx_qrt, try_qrt, validation_data=(tex_qrt, tey_qrt), batch_size=1024, epochs=100) m_qrt.save('mlp_mse_qrt.h5') return hist_m_add_u, hist_m_add_d, hist_m_add_t, hist_m_sub_u, hist_m_sub_d, hist_m_sub_t, \ hist_m_mul_u, hist_m_mul_d, hist_m_mul_t, hist_m_div_u, hist_m_div_d, hist_m_div_t, \ hist_m_sqr_u, hist_m_sqr_d, hist_m_sqr_t, hist_m_qrt_u, hist_m_qrt_d, hist_m_qrt_t
def train(self, batch_size: int = 32, epochs: int = 100, lr_multipliers: Tuple[float, ...] = (0.5, 0.75, 0.8, 1, 1.2, 1.5, 2), nb_models: int = 3, threads: int = 4, monitor_metric: str = 'val_word_acc_processed', log_dir: str = 'logs', **kwargs): self.params.update(locals()), self.params.pop('self') ''' Save all the objects/parameters for reproducibility ''' log_dir = Path(log_dir).joinpath( datetime.now().replace(microsecond=0).isoformat()) model_path = Path(log_dir).joinpath('checkpoints').joinpath( "best-model.joblib") model_path.parent.mkdir(parents=True, exist_ok=True) with open(Path(log_dir).joinpath('params.json'), 'w', encoding='utf-8') as f: json.dump( { 'params': self.params, 'commandline': sys.argv, 'commit': get_current_commit() }, f, indent=4) train_generator = DataGenerator(dataset=self.train_dataset, processor=self.processor, batch_size=batch_size) valid_generator = DataGenerator(dataset=self.valid_dataset, processor=self.processor, batch_size=batch_size, with_samples=True) best_current_models: List[ModelInstance] = [] best_prev_models: List[ModelInstance] = [] for epoch in range(epochs): best_prev_models = deepcopy(best_current_models) best_current_models = [] def log_model(score): nonlocal best_current_models learning_rate = float(K.get_value(self.model.optimizer.lr)) path = f'{log_dir}/model-epoch:{epoch}-acc:{score:.3f}-lr:{learning_rate:.3f}.joblib' best_current_models.append( ModelInstance(performance=score, path=path, lr=learning_rate)) print('Obtained:', str(best_current_models[-1]), flush=True) Word2Morph(model=self.model, processor=self.processor).save(path) best_current_models = list(set(best_current_models)) best_current_models = sorted(best_current_models, reverse=True) best_current_models, worst = best_current_models[: nb_models], best_current_models[ nb_models:] for model in worst: print('Removing:', model.path, flush=True) os.remove(model.path) print('Resulting list:') for i, model in enumerate(best_current_models): print(i, ':', str(model)) print(flush=True) # There are no models for the initial epoch => use the initial random model as the base model if len(best_current_models) == 0: log_model(score=0) for base_model in best_prev_models: for lr_multiplier in lr_multipliers: print('Trying to modify:', str(base_model), flush=True) # Clean-up the keras session before working with a new model del self.processor del self.model K.clear_session() gc.collect() w2m = Word2Morph.load_model(base_model.path) self.model, self.processor = w2m.model, w2m.processor lr = float(K.get_value(self.model.optimizer.lr)) K.set_value(self.model.optimizer.lr, lr * lr_multiplier) history = self.model.fit_generator( generator=train_generator, epochs=epoch + 1, initial_epoch=epoch, callbacks=[ Evaluate(data_generator=valid_generator, to_sample=self.processor.to_sample) ], class_weight=self.class_weights, use_multiprocessing=True, workers=threads, ) log_model(score=history.history[monitor_metric][-1])
def lr_decay(): #### Learning rate decay K.set_value(adam_nn.lr, max(K.eval(adam_nn.lr)*decay_nn, 0.0002)) print('lr_nn:%f' % K.eval(adam_nn.lr))
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['categorical_accuracy']) #model.compile(loss=norm_rmse, optimizer=rms) #model.compile(loss=log_error, optimizer=rms) #model.compile(loss=[contrastive_loss], optimizer=rms) #model.compile(loss=[contrastive_loss,'mse'], loss_weights=[10.0, 1.0], optimizer=rms) #model.fit([np.expand_dims(tr_pairs[:, 0],axis=3), np.expand_dims(tr_pairs[:, 1],axis=3)], tr_y, # validation_split = 0.1, # batch_size=128, # nb_epoch=nb_epoch) lr = 1e-3 for i in range(2): # num times to drop learning rate print('Learning rate: {0}'.format(lr)) K.set_value(model.optimizer.lr, lr) for j in range(20): # num times to generate data ~8M images print("i,j={0},{1}".format(i, j)) [img_pairs, img_rot, img_label] = gen_data(imgs, quats, num_pairs) rot_label = compute_clusters(img_rot) # rotation cluster labels print(rot_label.shape) input_top = np.expand_dims(img_pairs[:, 0], axis=3) input_bot = np.expand_dims(img_pairs[:, 1], axis=3) model.fit([input_top, input_bot], [rot_label], validation_split=0.1, batch_size=batch_size, nb_epoch=nb_epoch, callbacks=[model_checkpoint]) #model.fit([input_top, input_bot], [img_label, img_rot], shuffle=True, validation_split = 0.1, batch_size=128, nb_epoch=nb_epoch) # compute final accuracy on training and test sets
def readSLICandMDLInit(resultFile,all_Q_mat,superpixel_label_mat): print('----write result, read mats') f_out = open(resultFile,'w') train_data = sio.loadmat(all_Q_mat)['all_Q'] train_labels = sio.loadmat(superpixel_label_mat)['all_superpixel_labels'] print('----get_train_data') data = get_train_data(train_data, train_labels) print(len(data)) print('----initialize_params') train_params = initialize_params(train_data, data) print('----initialize_net') model = initialize_net(train_params) model.summary() print('----model compile') model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=train_params['base_lr']), metrics=['accuracy']) print('----ImageDataGenerator') train_datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True) for epoch in range(0, train_params['num_epochs']): num_iterations = int(train_params['total_samples']/train_params['batch_size']) + 1 for iteration in range(0, num_iterations): print ('Epoch : ' + str(epoch) + ' | Iteration : ' + str(iteration)) given_data = load_data(data, train_params) X = given_data[0] Y = given_data[1] model.fit(X,Y, epochs=1, verbose=1) if epoch%train_params['decay_steps'] == 0 and epoch != 0: print (' Changing learning rate ... ') lr = K.get_value(model.optimizer.lr) K.set_value(model.optimizer.lr, lr*train_params['decay_factor']) print("lr changed to {}".format(lr*train_params['decay_factor'])) if epoch%train_params['checkpoint'] == 0 and epoch != 0: print (' Saving model ... ') model_name = 'model_' + str(epoch) + '.h5' model.save(model_name) if epoch%1 == 0: acu_pos = 0 acu_neg = 0 acu = 0 for i in range(0, int(train_params['pos_samples']/train_params['batch_size'])): X = np.zeros((train_params['batch_size'], train_params['max_size'], 3)) Y = np.zeros((train_params['batch_size'], 2)) for j in range(0, train_params['batch_size']): sam = data[1][i*train_params['batch_size'] + j] sam_len = sam.shape[0] X[j, :sam_len, :] = np.true_divide(sam, sam.max()) Y[j][1] = float(1) pred = model.evaluate(X,Y, batch_size=train_params['batch_size']) print(pred) acu_pos = acu_pos + pred[1] acu = acu + pred[1] for i in range(0, int(train_params['neg_samples']/train_params['batch_size'])): X = np.zeros((train_params['batch_size'], train_params['max_size'], 3)) Y = np.zeros((train_params['batch_size'], 2)) for j in range(0, train_params['batch_size']): sam = data[0][i*train_params['batch_size'] + j] sam_len = sam.shape[0] X[j, :sam_len, :] = np.true_divide(sam, sam.max()) Y[j][0] = float(1) pred = model.evaluate(X,Y, batch_size=train_params['batch_size']) print(pred) acu_neg = acu_neg + pred[1] acu = acu + pred[1] acu_pos = float(acu_pos)/float(int(train_params['pos_samples']/train_params['batch_size'])) acu_neg = float(acu_neg)/float(int(train_params['neg_samples']/train_params['batch_size'])) acu = float(acu)/float(int(train_params['pos_samples']/train_params['batch_size']) + int(train_params['neg_samples']/train_params['batch_size'])) f_out.write('acu_pos: ' + str(acu_pos)+', acu_neg: '+str(acu_neg)+', acu:'+str(acu)+'\n')
BESTSCORE = score ES_FLAG = 0 pred_y = model.predict([ test_q1_w, test_q1_c, test_q1_tm, test_q2_w, test_q2_c, test_q2_tm ], batch_size=BATCH_SIZE) pred_y = np.reshape(pred_y, (len(pred_y), )) savepath = 'submission/submission-cnn-' + str(BESTSCORE)[:7] + '.csv' make_submission(pred_y, savepath) else: ES_FLAG += 1 if ES_FLAG > EARLYSTOP: if LEARNING_RATE > 1e-6: LEARNING_RATE *= 0.5 K.set_value(model.optimizer.lr, LEARNING_RATE) print('reduce learning rate = %f' % LEARNING_RATE) ES_FLAG = 0 else: break # from keras import callbacks # cb_reducelr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, # verbose=0, mode='auto', epsilon=0.001, cooldown=0) # cb_ckpt = callbacks.ModelCheckpoint('/files/faust/COMPETITION/ppdai/rescnn.{val_loss:.2f}.hdf5', # monitor='val_loss', verbose=1, mode='auto', period=1) # cb_earlystop = callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min') # # model.fit([train_all_q1_w, train_all_q1_c, train_all_q2_w, train_all_q2_c], train_all_y, # validation_split=0.2, # batch_size=BATCH_SIZE, epochs=EPOCH, verbose=1,
def generate_backdoor( self, x_val: np.ndarray, y_val: np.ndarray, y_target: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Generates a possible backdoor for the model. Returns the pattern and the mask :return: A tuple of the pattern and mask for the model. """ import keras.backend as K from keras_preprocessing.image import ImageDataGenerator self.reset() datagen = ImageDataGenerator() gen = datagen.flow(x_val, y_val, batch_size=self.batch_size) mask_best = None pattern_best = None reg_best = float("inf") cost_set_counter = 0 cost_up_counter = 0 cost_down_counter = 0 cost_up_flag = False cost_down_flag = False early_stop_counter = 0 early_stop_reg_best = reg_best mini_batch_size = len(x_val) // self.batch_size for _ in tqdm(range(self.steps), desc="Generating backdoor for class {}".format( np.argmax(y_target))): loss_reg_list = [] loss_acc_list = [] for _ in range(mini_batch_size): x_batch, _ = gen.next() y_batch = [y_target] * x_batch.shape[0] batch_loss_ce, batch_loss_reg, batch_loss, batch_loss_acc = self.train( [x_batch, y_batch]) loss_reg_list.extend(list(batch_loss_reg.flatten())) loss_acc_list.extend(list(batch_loss_acc.flatten())) avg_loss_reg = np.mean(loss_reg_list) avg_loss_acc = np.mean(loss_acc_list) # save best mask/pattern so far if avg_loss_acc >= self.attack_success_threshold and avg_loss_reg < reg_best: mask_best = K.eval(self.mask_tensor) pattern_best = K.eval(self.pattern_tensor) reg_best = avg_loss_reg # check early stop if self.early_stop: if reg_best < float("inf"): if reg_best >= self.early_stop_threshold * early_stop_reg_best: early_stop_counter += 1 else: early_stop_counter = 0 early_stop_reg_best = min(reg_best, early_stop_reg_best) if cost_down_flag and cost_up_flag and early_stop_counter >= self.early_stop_patience: logger.info("Early stop") break # cost modification if avg_loss_acc >= self.attack_success_threshold: cost_set_counter += 1 if cost_set_counter >= self.patience: self.cost = self.init_cost K.set_value(self.cost_tensor, self.cost) cost_up_counter = 0 cost_down_counter = 0 cost_up_flag = False cost_down_flag = False else: cost_set_counter = 0 if avg_loss_acc >= self.attack_success_threshold: cost_up_counter += 1 cost_down_counter = 0 else: cost_up_counter = 0 cost_down_counter += 1 if cost_up_counter >= self.patience: cost_up_counter = 0 self.cost *= self.cost_multiplier_up K.set_value(self.cost_tensor, self.cost) cost_up_flag = True elif cost_down_counter >= self.patience: cost_down_counter = 0 self.cost /= self.cost_multiplier_down K.set_value(self.cost_tensor, self.cost) cost_down_flag = True if mask_best is None: mask_best = K.eval(self.mask_tensor) pattern_best = K.eval(self.pattern_tensor) return mask_best, pattern_best
def on_batch_begin(self, batch, logs={}): pts = self.currentEP + batch/self.nbatch - self.startEP decay = 1+np.cos(pts/self.Tmult*np.pi) lr = self.min_lr+0.5*(self.initial_lr-self.min_lr)*decay K.set_value(self.model.optimizer.lr,lr)
decay=0.1) # Early stopping early_stopping = EarlyStopping(monitor='val_loss', min_delta=0., patience=50, verbose=0, mode='auto') # reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=0.00001, verbose=1) # callback = [early_stopping, reduce_lr] callback = [early_stopping] model.compile(loss=rel_mse, optimizer='adam', metrics=[rmse]) K.set_value(model.optimizer.lr, 0.001) history = model.fit(Input_tr, Output_tr, validation_split=0.03125, epochs=N_max_epoch, batch_size=N_batch_size, callbacks=callback) K.set_value(model.optimizer.lr, 0.0001) # history = model.fit(Input_tr, Output_tr, validation_split=0.03125, epochs=N_max_epoch, batch_size=N_batch_size, callbacks=callback)
def rnn(embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) q1_encoded = Dropout(0.2)(q1_encoded) q2_encoded = Dropout(0.2)(q2_encoded) # 双向 # q1_encoded = encode2(q1_encoded) # q2_encoded = encode2(q2_encoded) # resnet rnn_layer2_input1 = concatenate([q1_embed, q1_encoded]) rnn_layer2_input2 = concatenate([q2_embed, q2_encoded]) q1_encoded2 = encode2(rnn_layer2_input1) q2_encoded2 = encode2(rnn_layer2_input2) # add res shortcut res_block1 = add([q1_encoded, q1_encoded2]) res_block2 = add([q2_encoded, q2_encoded2]) rnn_layer3_input1 = concatenate([q1_embed, res_block1]) rnn_layer3_input2 = concatenate([q2_embed, res_block2]) # rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2]) # rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2]) q1_encoded3 = encode3(rnn_layer3_input1) q2_encoded3 = encode3(rnn_layer3_input2) # merged1 = GlobalMaxPool1D()(q1_encoded3) # merged2 = GlobalMaxPool1D()(q2_encoded3) # q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1) # q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1) # merged1 = concatenate([q1_encoded2, q1_embed], axis=-1) # merged2 = concatenate([q2_encoded2, q2_embed], axis=-1) # # TODO add attention rep , maxpooling rep q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3]) q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3]) merged1 = GlobalMaxPool1D()(q1_encoded3) merged2 = GlobalMaxPool1D()(q2_encoded3) # avg1 = GlobalAvgPool1D()(q1_encoded3) # avg2 = GlobalAvgPool1D()(q2_encoded3) # merged1 = concatenate([max1,avg1]) # merged2 = concatenate([max2,avg2]) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2]) mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2]) # jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)- # K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2]) # merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep]) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense]) # Classifier dense = Dropout(config['dense_dropout'])(merged) dense = BatchNormalization()(dense) dense = Dense(config['dense_dim'], activation='relu')(dense) dense = Dropout(config['dense_dropout'])(dense) dense = BatchNormalization()(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
def scheduler(epoch): if epoch % 100 == 0 and epoch != 0: lr = K.get_value(model.optimizer.lr) K.set_value(model.optimizer.lr, lr * 0.5) print("lr changed to {}".format(lr * 0.5)) return K.get_value(model.optimizer.lr)
def on_train_end(self, logs={}): for weight in self.sym_trainable_weights: K.set_value(weight, self.mv_trainable_weights_vals[weight.name])
def AtrousDenseUDeconvNet(): caxis = 1 if K.image_data_format() == 'channels_first' else -1 input = Input(shape=(img_w, img_h, 3), batch_shape=(None, img_w, img_h, 3)) # 256 # inputs=keras.layers.convolutional.ZeroPadding2D(padding=(0, 0), dim_ordering='default')(input) x = BatchNormalization(mode=0, axis=caxis, gamma_regularizer=l2(1E-4), beta_regularizer=l2(1E-4))(input) inputs1 = Conv2D(filters=16, kernel_size=3, name="initial_conv2D", bias=False, strides=1, activation="relu", padding="same", kernel_initializer="TruncatedNormal", W_regularizer=l2(1E-4))(x) # 256 # 256 dense1 = Dense_Block1(inputs1) # 188 tran1 = transition_block1(dense1, 64, dropout_rate=0.25, weight_decay=1E-4) dense2 = Dense_Block2(tran1) # 40 #a1 = keras.layers.PReLU(alpha_initializer='TruncatedNormal', alpha_regularizer=None, alpha_constraint=None, # shared_axes=None)(dense2) tran2 = transition_block2(dense2, 256, dropout_rate=0.25, weight_decay=1E-4) #pool = MaxPooling2D(pool_size=(2, 2))(a1) # 64 64*64 conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(tran2) #conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2) drop2 = Dropout(0.1)(conv2) pool2 = MaxPooling2D(pool_size=(2, 2))(drop2) conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2) #conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3) #pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) #conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3) #conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4) drop3 = Dropout(0.1)(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(drop3) conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3) drop4 = Dropout(0.1)(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(drop4) # conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4) conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4) drop5 = Dropout(0.1)(conv5) #conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6) up7 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(drop5)) merge7 = concatenate([drop4, up7], axis=caxis) conv7 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7) #conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7) up8 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv7)) merge8 = concatenate([drop3, up8], axis=caxis) conv8 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8) #conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8) up9 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv8)) merge9 = concatenate([drop2, up9], axis=caxis) conv9 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9) up10 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')( UpSampling2D(size=(2, 2))(conv9)) merge10 = concatenate([db2x2, up10], axis=caxis) conv10 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge10) up11 = Conv2D(32, 3, activation='relu', padding='same', kernel_initializer='he_normal')( UpSampling2D(size=(2, 2))(conv10)) merge11 = concatenate([db1x3, up11], axis=caxis) conv11 = Conv2D(32, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge11) ''' up11 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')( UpSampling2D(size=(2, 2))(conv10)) merge11 = concatenate([db1x3, up11], axis=caxis) conv11 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge11) ''' conv12 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv11) conv12 = Conv2D(6, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv12) ''' conv9 = Conv2DTranspose(filters=64, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal", W_regularizer=l2(1E-4), activation="relu")(conv8) # 128 #conv8add = Conv2DTranspose(filters=64, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal",name="conv8add", # W_regularizer=l2(1E-4), activation="relu")(conv8) # 128 conv10 = Conv2DTranspose(filters=32, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal", W_regularizer=l2(1E-4), activation="relu")(conv9) #conv9add = Conv2DTranspose(filters=32, kernel_size=3, padding="same", strides=2, kernel_initializer="TruncatedNormal",name="conv9add", # W_regularizer=l2(1E-4), activation="relu")(conv9) conv10 = Conv2DTranspose(filters=6, kernel_size=3, padding="same", strides=1, kernel_initializer="TruncatedNormal", W_regularizer=l2(1E-4), activation="relu")(conv10) #conv10add = Conv2DTranspose(filters=6, kernel_size=3, padding="same", strides=1, kernel_initializer="TruncatedNormal",name="conv10add", # W_regularizer=l2(1E-4), activation="softmax")(conv10) ''' model = Model(input=input, output=conv12) #loss = tf.optimizers.RMSprop model.compile(optimizer=Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy']) model.summary() K.set_value(model.optimizer.lr, 0.001) keras.utils.plot_model(model, "AtrousDense-U-DeconvNet_model.png", show_shapes=True) return model
def set_lr(model, lr): K.set_value(model.optimizer.lr, lr)
def lr_poly_decay(model, base_lr, curr_iter, max_iter, power=0.5): lrate = base_lr * (1.0 - (curr_iter / float(max_iter)))**power K.set_value(model.optimizer.lr, lrate) return K.eval(model.optimizer.lr)
if Iter % model_save_step4_visualization == 0: d_model.save(model_save_Path + '/m_' + str(Iter) + '_model.h5') # 优化策略:优化器变更以及学习率调整========================================================================================= # #开始的时候我们用的是adam,所以下面代码分为两部分,一部分是切换adam到sgd,另外一部分负责切换之后更新学习率 #如果到达转换点,那么就开始转换,以防万一,先保存权重,之后重新编译模型,之后加载权重 if Iter == optimizer_switch_point: d_model.save(model_save_Path + '/m_' + 'newest_model.h5') lr_new = lr_mod(Iter, max_epoch=50, epoch_file_size=trainset_num, batch_size=batch_size, init_lr=first_lr) d_model.compile(optimizer=SGD(lr=lr_new, momentum=0.9), loss=EuiLoss, metrics=[y_t, y_pre, Acc]) d_model.load_weights(model_save_Path + '/m_' + 'newest_model.h5') if Iter > optimizer_switch_point: #batch_num_perepoch = or_train_num // batch_size # 每个epoch包含的迭代次数,也即batch的个数 lr_new = lr_mod(Iter, max_epoch=50, epoch_file_size=trainset_num, batch_size=batch_size, init_lr=first_lr) K.set_value(d_model.optimizer.lr, lr_new) # 关闭文件,以供实时查看结果 txt_s1.close() txt_s2.close() txt_s3.close() txt_s4.close() txt_s5.close() txt_s6.close() txt_s7.close() txt_s8.close() txt_s9.close() txt_s10.close() txt_s11.close()
def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): inputs = inputs[0] if initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if isinstance(mask, list): mask = mask[0] if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') timesteps = K.int_shape(inputs)[1] kwargs = {} if has_arg(self.cell.call, 'training'): kwargs['training'] = training if constants: if not has_arg(self.cell.call, 'constants'): raise ValueError('RNN cell does not support constants') def step(inputs, states): constants = states[-self._num_constants:] states = states[:-self._num_constants] return self.cell.call(inputs, states, constants=constants, **kwargs) else: def step(inputs, states): return self.cell.call(inputs, states, **kwargs) last_output, outputs, states = K.rnn(step, inputs, initial_state, constants=constants, go_backwards=self.go_backwards, mask=mask, input_length=timesteps) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True if self.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) return [output] + states else: # print('output') # print(output.shape) return output # helper function def get_tuple_shape(nb_channels): result = list(state_shape) if self.cell.data_format == 'channels_first': result[1] = nb_channels elif self.cell.data_format == 'channels_last': result[3] = nb_channels else: raise KeyError return tuple(result) # initialize state if None if self.states[0] is None: if hasattr(self.cell.state_size, '__len__'): self.states = [ K.zeros(get_tuple_shape(dim)) for dim in self.cell.state_size ] else: self.states = [K.zeros(get_tuple_shape(self.cell.state_size))] elif states is None: if hasattr(self.cell.state_size, '__len__'): for state, dim in zip(self.states, self.cell.state_size): K.set_value(state, np.zeros(get_tuple_shape(dim))) else: K.set_value(self.states[0], np.zeros(get_tuple_shape(self.cell.state_size))) else: if not isinstance(states, (list, tuple)): states = [states] if len(states) != len(self.states): raise ValueError('Layer ' + self.name + ' expects ' + str(len(self.states)) + ' states, ' 'but it received ' + str(len(states)) + ' state values. Input received: ' + str(states)) for index, (value, state) in enumerate(zip(states, self.states)): if hasattr(self.cell.state_size, '__len__'): dim = self.cell.state_size[index] else: dim = self.cell.state_size if value.shape != get_tuple_shape(dim): raise ValueError('State ' + str(index) + ' is incompatible with layer ' + self.name + ': expected shape=' + str(get_tuple_shape(dim)) + ', found shape=' + str(value.shape)) # TODO: consider batch calls to `set_value`. K.set_value(state, value)
def set_lr(model, lr): import keras.backend as K K.set_value(model.optimizer.lr, float(lr))