def fit(self, # pylint: disable=too-many-locals inputs=None, outputs=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, use_multiprocessing=False, workers=1): """Model fitting. This method is used to fit a given model. Most of parameters are directly delegated the fit_generator of the keras model. Parameters ---------- inputs : :code:`Dataset`, list(Dataset) or Sequence (keras.utils.Sequence) Input Dataset or Sequence to use for fitting the model. outputs : :code:`Dataset`, list(Dataset) or None Output Dataset containing the training targets. If a Sequence is used for inputs, outputs will have no effect. batch_size : int or None Batch size. If set to None a batch size of 32 is used. epochs : int Number of epochs. Default: 1. verbose : int Verbosity level. See https://keras.io. callbacks : List(keras.callbacks.Callback) Callbacks to be applied during training. See https://keras.io/callbacks validation_data : tuple, Sequence or None Validation data can be a tuple (input_dataset, output_dataset), or (input_dataset, output_dataset, sample_weights) or a keras.utils.Sequence instance or a list of validation chromsomoes. The latter choice only works with when using Cover and Bioseq dataset. This allows you to train on a dedicated set of chromosomes and to validate the performance on respective heldout chromosomes. If None, validation is not applied. shuffle : boolean shuffle batches. Default: True. class_weight : dict Class weights. See https://keras.io. sample_weight : np.array or None Sample weights. See https://keras.io. initial_epoch : int Initial epoch at which to start training. steps_per_epoch : int, None. Number of steps per epoch. If None, this value is determined from the dataset size and the batch_size. use_multiprocessing : boolean Whether to use multiprocessing. See https://keras.io. Default: False. workers : int Number of workers to use in multiprocessing mode. Default: 1. Examples -------- .. code-block:: python model.fit(DATA, LABELS) """ if not isinstance(inputs, Sequence): inputs = _convert_data(self.kerasmodel, inputs, 'input_layers') outputs = _convert_data(self.kerasmodel, outputs, 'output_layers') hyper_params = { 'epochs': epochs, 'batch_size': batch_size, 'shuffle': shuffle, 'class_weight': class_weight, 'initial_epoch': initial_epoch, 'steps_per_epoch': steps_per_epoch, 'use_multiprocessing': use_multiprocessing, 'workers': workers } self.logger.info('Fit: %s', self.name) if isinstance(inputs, Sequence): self.logger.info('using custom Sequence') else: self.logger.info("Input:") self.__dim_logging(inputs) self.logger.info("Output:") self.__dim_logging(outputs) self.timer = time.time() history = None self.logger.info("Hyper-parameters:") for par_ in hyper_params: self.logger.info('%s: %s', par_, str(hyper_params[par_])) if callbacks is None: callbacks = [] callbacks.append(LambdaCallback(on_epoch_end=lambda epoch, logs: self.logger.info( "epoch %s: %s", epoch + 1, ' '.join(["{}=".format(k) + ('{:.4f}' if abs(logs[k]) > 1e-3 else '{:.4e}').format(logs[k]) for k in logs])))) if not os.path.exists(os.path.join(self.outputdir, 'evaluation')): os.mkdir(os.path.join(self.outputdir, 'evaluation')) if not os.path.exists(os.path.join(self.outputdir, 'evaluation', self.name)): os.mkdir(os.path.join(self.outputdir, 'evaluation', self.name)) callbacks.append(CSVLogger(os.path.join(self.outputdir, 'evaluation', self.name, 'training.log'))) if not batch_size: batch_size = 32 if isinstance(inputs, Sequence): # input could be a sequence jseq = inputs else: jseq = JangguSequence(batch_size, inputs, outputs, sample_weight, shuffle=shuffle) if isinstance(validation_data, tuple): valinputs = _convert_data(self.kerasmodel, validation_data[0], 'input_layers') valoutputs = _convert_data(self.kerasmodel, validation_data[1], 'output_layers') sweights = validation_data[2] if len(validation_data) == 3 else None valjseq = JangguSequence(batch_size, valinputs, valoutputs, sweights, shuffle=False) elif isinstance(validation_data, Sequence): valjseq = validation_data elif isinstance(validation_data, list) and isinstance(validation_data[0], str): # if the validation data is a list of chromosomes that should # be used as validation dataset we end up here. # This is only possible, however, if all input and output datasets # are Cover or Bioseq dataset. if not all(hasattr(datum, 'gindexer') \ for datum in [jseq.inputs[k] for k in jseq.inputs] + [jseq.outputs[k] for k in jseq.outputs]): raise ValueError("Not all dataset are Cover or Bioseq dataset" " which is required for this options.") # then split the original dataset into training and validation set. train, val = split_train_test((jseq.inputs, jseq.outputs), validation_data) traininp, trainoup = train valinp, valoup = val self.logger.info("Split in training and validation set:") self.logger.info("Training-Input:") self.__dim_logging(traininp) self.logger.info("Training-Output:") self.__dim_logging(trainoup) self.logger.info("Validation-Input:") self.__dim_logging(valinp) self.logger.info("Validation-Output:") self.__dim_logging(valoup) jseq = JangguSequence(jseq.batch_size, _convert_data(self.kerasmodel, traininp, 'input_layers'), _convert_data(self.kerasmodel, trainoup, 'output_layers'), sample_weights=None, shuffle=jseq.shuffle) valjseq = JangguSequence(jseq.batch_size, _convert_data(self.kerasmodel, valinp, 'input_layers'), _convert_data(self.kerasmodel, valoup, 'output_layers'), sample_weights=None, shuffle=False) else: valjseq = None try: history = self.kerasmodel.fit_generator( jseq, epochs=epochs, validation_data=valjseq, class_weight=class_weight, initial_epoch=initial_epoch, shuffle=shuffle, use_multiprocessing=use_multiprocessing, max_queue_size=50, workers=workers, verbose=verbose, callbacks=callbacks) except Exception: # pragma: no cover # ignore the linter warning, the exception # is reraised anyways. self.logger.exception('fit_generator failed:') raise self.logger.info('#' * 40) for k in history.history: self.logger.info('%s: %f', k, history.history[k][-1]) self.logger.info('#' * 40) self.save() self._save_hyper(hyper_params) self.logger.info("Training finished after %1.3f s", time.time() - self.timer) return history
def test_model_methods(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np })) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10, ))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] trained_batches = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) def on_batch_begin(batch, logs): trained_batches.append(batch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin, on_batch_begin=on_batch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([ np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3)) ], [ np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3)) ]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], )) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={ 'dense_1': 0.2, 'dropout': 0.8 }) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={ 'dense_1': None, 'dropout': 'temporal' }) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=3, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(3)) * 5 # steps_per_epoch will be equal to len of sequence if it's unspecified trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), epochs=5, initial_epoch=0, validation_data=RandomSequence(4), callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 # fit_generator will throw an exception if steps is unspecified for regular generator with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.fit_generator(generator=gen_data(), epochs=5, initial_epoch=0, validation_data=gen_data(), callbacks=[tracker_cb]) # Check if generator is only accessed an expected number of times gen_counters = [0, 0] def gen_data(i): while True: gen_counters[i] += 1 yield ([np.random.random((1, 3)), np.random.random((1, 3))], [np.random.random((1, 4)), np.random.random((1, 3))]) out = model.fit_generator(generator=gen_data(0), epochs=3, steps_per_epoch=2, validation_data=gen_data(1), validation_steps=1, max_queue_size=2, workers=2) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 6 <= gen_counters[0] <= 8 assert 3 <= gen_counters[1] <= 5 gen_counters = [0] out = model.fit_generator(generator=RandomSequence(3), epochs=3, validation_data=gen_data(0), validation_steps=1, max_queue_size=2, workers=2) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 3 <= gen_counters[0] <= 5 # predict_generator output shape behavior should be consistent def expected_shape(batch_size, n_batches): return (batch_size * n_batches, 4), (batch_size * n_batches, 3) # Multiple outputs and one step. batch_size = 5 sequence_length = 1 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Multiple outputs and multiple steps. batch_size = 5 sequence_length = 2 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Create a model with a single output. single_output_model = Model([a, b], a_2) single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None) # Single output and one step. batch_size = 5 sequence_length = 1 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 # Single output and multiple steps. batch_size = 5 sequence_length = 2 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0
def train_generator(self, epochs, batch_size, workers=1, dataname='train_gen', datapath_train='./train_dir', datapath_validation='./val_dir', datapath_test='./val_dir', steps_per_epoch=1000, steps_per_validation=1000, crops_per_image=2, log_weight_path='./data/weights/', log_tensorboard_path='./data/logs/', log_tensorboard_name='SRResNet', log_tensorboard_update_freq=1, log_test_path="./images/samples/" ): """Trains the generator part of the network with MSE loss""" # Create data loaders train_loader = DataLoader( datapath_train, batch_size, self.height_hr, self.width_hr, self.upscaling_factor, crops_per_image ) test_loader = None if datapath_validation is not None: test_loader = DataLoader( datapath_validation, batch_size, self.height_hr, self.width_hr, self.upscaling_factor, crops_per_image ) # Callback: tensorboard callbacks = [] if log_tensorboard_path: tensorboard = TensorBoard( log_dir=os.path.join(log_tensorboard_path, log_tensorboard_name), histogram_freq=0, batch_size=batch_size, write_graph=False, write_grads=False, update_freq=log_tensorboard_update_freq ) callbacks.append(tensorboard) else: print(">> Not logging to tensorboard since no log_tensorboard_path is set") # Callback: save weights after each epoch modelcheckpoint = ModelCheckpoint( os.path.join(log_weight_path, dataname + '_{}X'.format(self.upscaling_factor)), monitor='val_loss', save_best_only=True, save_weights_only=True ) callbacks.append(modelcheckpoint) # Callback: test images plotting if datapath_test is not None: testplotting = LambdaCallback( on_epoch_end=lambda epoch, logs: plot_test_images( self, test_loader, datapath_test, log_test_path, epoch, name='SRResNet' ) ) callbacks.append(testplotting) # Fit the model self.generator.fit_generator( train_loader, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=test_loader, validation_steps=steps_per_validation, callbacks=callbacks, use_multiprocessing=workers>1, workers=workers )
preds = model.predict(x_pred, verbose=0)[0] next_index = sample(preds, diversity) next_char = indices_char[next_index] generated += next_char sentence = sentence[1:] + next_char sys.stdout.write(next_char) sys.stdout.flush() print() else: print() print('----- Not generating text after Epoch: %d' % epoch) generate_text = LambdaCallback(on_epoch_end=on_epoch_end) # define the checkpoint filepath = "weights_rory.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') # fit model using our gpu with tf.device('/gpu:0'): model.fit(x, y, batch_size=128, epochs=15, verbose=2,
def run_experiment(option): # building test, train, validation split # number_of_samples = 1280000 number_of_samples = 32000 BATCH_SIZE=128 # make the test, train, validation split ttv = make_ttv(number_of_samples) # set up pipeline print('Setting up data pipeline') data_pipeline = one_hot_token_pipeline(ttv, use_cache=False) train_data = data_pipeline.get_set('train') train_gen = KerasGenerator(data_source=train_data, batch_size=BATCH_SIZE) validation_data = data_pipeline.get_set('validation') validation_gen = KerasGenerator(data_source=validation_data, batch_size=BATCH_SIZE) def shuffle_train_data(*args): nonlocal train_gen train_gen.shuffle() data_shuffler = LambdaCallback(on_epoch_end=shuffle_train_data) if option == 'simple': model, encoder, generator = make_simple_cnn(BATCH_SIZE) elif option == 'conv1': model, encoder, generator = make_cnn_1(BATCH_SIZE) else: print('INVALID OPTION') exit(1) # checkpoint = ModelCheckpoint( # 'experiments/token_VAE/'+ option +'_models/weights.{epoch:02d}-{val_loss:.2f}.hdf5', # save_best_only=True # ) def save_models(epoch, logs): val_loss = logs.get('val_loss') path = \ 'experiments/token_VAE/{}_models/weights.{epoch:02d}-{val_loss:.2f}'.format( option, epoch, val_loss ) model.save(path + '{}.hdf5'.format('model')) encoder.save(path + '{}.hdf5'.format('encoder')) generator.save(path + '{}.hdf5'.format('generator')) save_models = LambdaCallback(on_epoch_end=save_models) model.fit_generator( generator=train_gen, samples_per_epoch=len(train_gen), nb_epoch=5, validation_data=validation_gen, nb_val_samples=len(validation_gen), callbacks=[save_models, data_shuffler] ) pass
def train(self, epochs=1, trained_model=None, trained_model_name='trained_model_wn'): """ Create and train deep model :param epochs: Specify number of epoch for training. :param : :return: Nothing. """ # Start time of training dt = datetime.datetime.now().strftime('_date_%Y-%m-%d_%H-%M-%S_') print('Generate training samples ...') sentences_training, next_chars_training = self.generate_samples( self.text_training + self.text_test) print('Generate validations samples ...') sentences_validation, next_chars_validation = self.generate_samples( self.text_validation) print('Build and compile model ...') model = None model_name = None if trained_model is None: model, model_name = self.define_model( (self.maxlen, len(self.chars)), len(self.chars)) else: model = trained_model model_name = trained_model_name optimizer = RMSprop(lr=0.01) # [0.001, 0.01, 0.02, 0.05, 0.1] optimizer = Adam(lr=0.0001) # Reduce from 0.001 to 0.0001 for model_10 model.compile( optimizer=optimizer, loss='categorical_crossentropy', # metrics=['accuracy'] metrics=['accuracy', cross_entropy, perplexity]) print(model_name, ' summary ...') model.summary() print(model_name, ' count_params ...') print(model.count_params()) # input() print('Set #5 callback ...') # callback #1 EarlyStopping # monitor= 'val_loss' or monitor='loss'? model_early_stopping = EarlyStopping(monitor='loss', min_delta=0.01, patience=5, verbose=1, mode='auto') # callback #2 ModelCheckpoint # Create a directory for each training process to keep model checkpoint in .h5 format dir_name = './model_checkpoint/pdfs/' + model_name + dt + 'epochs_' + str( epochs) + '/' if not os.path.exists(dir_name): os.makedirs(dir_name) file_name = dir_name + model_name + dt + 'epoch_{epoch:02d}_val_loss_{val_loss:.4f}.h5' model_checkpoint = ModelCheckpoint(file_name, verbose=1) # callback #3 TensorBoard dir_name = './logs_tensorboard/pdfs/' + model_name + dt + 'epochs_' + str( epochs) + '/' if not os.path.exists(dir_name): os.makedirs(dir_name) model_tensorboard = TensorBoard(log_dir=dir_name, histogram_freq=0, batch_size=self.batch_size, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None) # callback #4 CSVLogger # Create a directory and an empty csv file within to save mode csv log. dir_name = './logs_csv/pdfs/' + model_name + dt + 'epochs_' + str( epochs) + '/' if not os.path.exists(dir_name): os.makedirs(dir_name) file_name = dir_name + model_name + dt + '_epochs_' + str( epochs) + '_step_' + str(self.step) + '.csv' open(file_name, mode='a', newline='').close() model_csv_logger = CSVLogger(file_name, separator=',', append=False) # callback #5 LambdaCallback dir_name = './generated_results/pdfs/' + model_name + dt + 'epochs_' + str( epochs) + '/' if not os.path.exists(dir_name): os.makedirs(dir_name) def on_epoch_end(epoch, logs): nonlocal model nonlocal epochs nonlocal model_name nonlocal dir_name print('Sampling model and save results ... ') self.generate_and_fuzz_new_samples(model=model, model_name=model_name, epochs=epochs, current_epoch=epoch, dir_name=dir_name) generate_and_fuzz_new_samples_callback = LambdaCallback( on_epoch_begin=None, on_epoch_end=on_epoch_end, on_batch_begin=None, on_batch_end=None, on_train_begin=None, on_train_end=None) if learning_config['dataset_size'] == 'small': # very_small print('Start training on small dataset ...') x, y = self.data_generator_in_memory(sentences_training, next_chars_training) model.fit(x, y, batch_size=self.batch_size, epochs=epochs, validation_split=0.2, shuffle=True, callbacks=[ model_checkpoint, model_tensorboard, model_csv_logger, generate_and_fuzz_new_samples_callback ]) else: print('Build training and validation data generators ...') training_data_generator = self.data_generator( sentences_training, next_chars_training) validation_data_generator = self.data_generator_validation( sentences_validation, next_chars_validation) # x, y = next(training_data_generator) # print(x) # print('+'*75) # print(y) # print('#'*50) # x, y = next(training_data_generator) # print(x) # print('+' * 75) # print(y) # print('#' * 50) # input() print('Start training on large dataset ...') model.fit_generator( generator=training_data_generator, # steps_per_epoch=200, steps_per_epoch=len(sentences_training) // self.batch_size, # 1000, validation_data=validation_data_generator, validation_steps=len(sentences_validation) // (self.batch_size * 2), # 100, # validation_steps=10, use_multiprocessing=False, workers=1, epochs=epochs, shuffle=True, callbacks=[ model_checkpoint, model_tensorboard, model_csv_logger, generate_and_fuzz_new_samples_callback ])
validation_steps=1000, epochs=50, verbose=0, callbacks=[ TensorBoard( log_dir=FOLDER, write_graph=False ), ModelCheckpoint( FOLDER+'weights.{epoch:02d}-{loss:.2f}.h5', monitor='val_loss', save_best_only=True, save_weights_only=True ), LambdaCallback( on_epoch_end=lambda epoch, logs: plot_callback(model) ), TQDMNotebookCallback() ] ) # ## Phase 2 - without batch normalization # In[ ]: # Load weights from previous run model = PConvUnet(vgg_weights='./data/logs/pytorch_vgg16.h5') model.load( r"C:\Users\Mathias Felix Gruber\Documents\GitHub\PConv-Keras\data\logs\imagenet_phase1\weights.23-1.18.h5",
def main(): # Initialize Model model = CNN_model.fnBuildModel() #If user passes a single image to check if the model works if args.test is not None: print("---------- In Testing mode") image = cv2.imread(args.test) image = fu.preprocessing(image) image = np.expand_dims(image, axis=0) y = np.expand_dims(np.asarray([0]), axis=0) BatchSize = 1 model.fit(image, y, epochs=400, \ batch_size=BatchSize, \ validation_split=0.1, \ shuffle=True, verbose=0) return X_filename = '../data/X_train_train.npy' Y_filename = '../data/y_train_train.npy' X_train = np.load(X_filename) Y_train = np.load(Y_filename) print(X_train.shape) print(Y_train.shape) print("Training started...........") arrCallbacks = [] earlystop_callback = EarlyStopping(monitor='val_loss', patience=5, verbose=0) batch_print_callback = LambdaCallback( on_batch_begin=lambda batch, logs: print(batch)) epoch_print_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: print("epoch:", epoch)) arrCallbacks.append(earlystop_callback) arrCallbacks.append(batch_print_callback) arrCallbacks.append(epoch_print_callback) BatchSize = 512 hist = model.fit(X_train, Y_train, epochs=400, \ batch_size=BatchSize, \ validation_split=0.1, \ shuffle=True, verbose=0, \ callbacks=arrCallbacks) model.save_weights('my_model_weights.h5') #scores = model.evaluate(X_train, Y_train, verbose=0) # model result: train_val_accuracy = hist.history # Get and print training accuracy train_accuracy = train_val_accuracy['acc'] # Get and print validation accuracy val_accuracy = train_val_accuracy['val_acc'] print("Done!") print("Train acc: %.3f" % train_accuracy[-1]) print("Validation acc: %.3f" % val_accuracy[-1]) #print ("Train loss : %.3f" % scores[0]) #print ("Train accuracy : %.3f" % scores[1]) print("Training finished")
model.compile(loss="mean_squared_error", optimizer=newopt) print("Start training") print(model.summary()) get_both(model, 10) # exit() history = model.fit(X_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], validation_split=params['validation_split'], callbacks = [ModelCheckpoint(filepath="models_pruned/"+saved_model,monitor='loss',verbose=1, save_best_only=True,save_weights_only=True),\ ModelCheckpoint(filepath="models_pruned/"+saved_model+"_val",monitor='val_loss',verbose=1, mode='min',save_best_only=True,save_weights_only=True),\ LambdaCallback(on_epoch_end=lambda batch, logs: get_both(model, logs['val_loss']))] ) # # print(model.summary()) # In[6]: if df_his is None: df = pd.DataFrame(history.history) df.to_csv("history_%s.csv" % (saved_model), header=True) else: df = pd.concat([df_his, pd.DataFrame(history.history)]).reset_index() df.to_csv("history_%s.csv" % (saved_model), header=True) print("Predicting") st = time.time() predict = scaler_y.inverse_transform(model.predict(X_test))
#x_norm2 = np.transpose(x_norm2) predicted_weight = prediction_model.predict(temp_weight_pool) temp_n = 0 for j in range(l.shape[1]): tmp_shape = l[0][j].shape tmp_length = l[0][j].flatten().shape[0] l[0][j] = np.reshape(predicted_weight[temp_n:temp_n + tmp_length], tmp_shape) temp_n = temp_n + tmp_length model.set_weights(l[0]) l = np.array([]) update_weights = LambdaCallback( on_epoch_end=lambda batch, logs: weight_prediciton()) # initiate RMSprop optimizer #opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 if not data_augmentation:
# test_bi_acc = (test_bi_predict == (np.array(test_label)>0)).mean() # # print(f'train acc: {train_bi_acc} - test acc: {test_bi_acc}') # print() # if epoch > 10 and not epoch % 5: # # print('-----------train high examples---------') # # for i,v in enumerate(heapq.nlargest(5, range(len(train_predict)), train_predict.take)): # # print(" ".join(train_sentences[i])) # # print(train_sentences[heapq.nlargest(5, range(len(train_predict)), train_predict.take)]) # print('-----------test high examaples---------') # for i in heapq.nlargest(5, range(len(test_predict)), test_predict.take): # print(" ".join(test_sentences[i])) # print('-----------test low examples----------') # for i in heapq.nsmallest(5, range(len(test_predict)), test_predict.take): # print(" ".join(test_sentences[i])) history = model.fit(X_train, train_label, validation_data=[X_test, test_label], epochs=100, batch_size=128, verbose=2, callbacks=[LambdaCallback(on_epoch_end=call_corr)]) history.history['train_corr'] = train_corr_list history.history['test_corr'] = test_corr_list with open('attention_history.pkl', 'wb') as f: pickle.dump(history.history, f)
def train_model(data, args): # Make sure everything is the right shape assert data.val_X.shape[1] == args.seq_in_length, data.val_X.shape assert data.val_Y.shape[1] == args.seq_out_length, data.val_Y.shape assert data.train_X.shape[1] == args.seq_in_length, data.train_X.shape assert data.train_Y.shape[1] == args.seq_out_length, data.train_Y.shape _, _, in_shape = data.train_X.shape _, _, out_shape = data.train_Y.shape vae, encoder, decoder = make_vae(in_shape, out_shape, args) try: makedirs(args.action_dir) except FileExistsError: pass try: makedirs(args.model_dir) except FileExistsError: pass def sample_trajectories(epoch, logs={}): epoch += args.extra_epoch actions_to_save = args.actions_to_save out_data = dict() gen_actions = decoder.predict( np.random.randn(actions_to_save, args.noise_dim)) out_data['gen_actions'] = action_lists(gen_actions, data.action_names) train_inds = np.random.permutation(len(data.train_X))[:actions_to_save] train_actions = data.train_Y[train_inds] out_data['train_actions'] = action_lists(train_actions, data.action_names) val_inds = np.random.permutation(len(data.val_X))[:actions_to_save] val_actions = data.val_Y[val_inds] out_data['val_actions'] = action_lists(val_actions, data.action_names) out_data['action_names'] = data.action_names out_path = path.join(args.action_dir, 'preds-epoch-%d.json' % (epoch + 1)) print('\nSaving samples to', out_path) with open(out_path, 'w') as fp: json.dump(out_data, fp) def model_paths(epoch, logs={}): model_path = path.join(args.model_dir, 'epoch-{epoch:02d}'.format(epoch=epoch)) encoder_path = model_path + '-enc.h5' decoder_path = model_path + '-dec.h5' return encoder_path, decoder_path def save_encoder_decoder(epoch, logs={}): epoch += args.extra_epoch encoder_path, decoder_path = model_paths(epoch, logs) print('Saving encoder to %s' % encoder_path) encoder.save(encoder_path) print('Saving decoder to %s' % decoder_path) decoder.save(decoder_path) def save_state(epoch, logs={}): # Save all paths and arguments to file. Good for resumption of # training. epoch += args.extra_epoch encoder_path, decoder_path = model_paths(epoch, logs) extra_args = args._all_args config_dest = args.config_path data = { 'encoder_path': encoder_path, 'decoder_path': decoder_path, 'args': extra_args, 'epoch': epoch + 1 } print('Saving config to', config_dest) with open(config_dest, 'w') as fp: json.dump(data, fp, indent=2) def check_prediction_accuracy(epoch, logs={}): # Check prediction accuracy over entire validation dataset epoch += args.extra_epoch print('Calculating prediction accuracies') indices = np.random.permutation(len(data.val_X))[:1000] sub_X = data.val_X[indices] sub_Y = data.val_Y[indices] # 'Extend' baseline. Repeats *last* action of input sequence. ext_preds = np.repeat(sub_Y[:, args.seq_in_length - 1:args.seq_in_length], args.seq_out_length, axis=1) # Actual VAE baseline vae_preds = vae.predict(sub_X) # Fake VAE baseline in which the input has no bearing on the labels # (input is randomly permuted). This should be much worse than the VAE # baseline, in theory. fake_X = sub_X[np.random.permutation(len(sub_X))] random_preds = vae.predict(fake_X) pred_dict = { 'extend': ext_preds, 'vae-pred': vae_preds, 'random-vae': random_preds } report = time_series_metrics(sub_Y, pred_dict, data.action_names) dest_path = path.join(args.acc_dir, 'epoch-%d.txt' % epoch) print('Saving accuracy report to', dest_path) with open(dest_path, 'w') as fp: fp.write(report) print('Training recurrent VAE') cb_list = [ LambdaCallback(on_epoch_end=sample_trajectories), LambdaCallback(on_epoch_end=check_prediction_accuracy), LambdaCallback(on_epoch_end=save_encoder_decoder), LambdaCallback(on_epoch_end=save_state), ReduceLROnPlateau(patience=10) ] vae.fit(data.train_X, data.train_Y, validation_data=(data.val_X, data.val_Y), shuffle=True, batch_size=args.batch_size, nb_epoch=1000, callbacks=cb_list) return vae, encoder, decoder
def fixed_generator(generator): for batch in generator: yield (batch, batch) train_datagen = ImageDataGenerator(rescale=1./255) test_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode=None) validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode=None) callbacks_list = [LambdaCallback(on_epoch_end=lambda epoch, logs: save_models() )] history = vae.fit_generator( fixed_generator(train_generator), steps_per_epoch=nb_train_samples//batch_size, nb_epoch=nb_epoch, callbacks=callbacks_list, validation_data=fixed_generator(validation_generator), nb_val_samples=nb_validation_samples)
def model_main(result_sds, project_id, result_dir, train_data_dir, validation_data_dir, nb_train_samples, nb_validation_samples, input_shape, img_width, img_height, epochs, batch_size): # 通过train_data_dir下的文件夹数目得到分类数量 l = os.listdir(train_data_dir) l.remove('.DS_Store') num_classes = len(l) if num_classes < 2: raise Exception('classes should be more than 1, put your ' 'different classes images file into ' 'different folder') # load the inception_v3 network base_model = applications.InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape) # build the top of cnn network top_model = Sequential() top_model.add(Flatten(input_shape=base_model.output_shape[1:])) # top_model.add(Dense(256, activation='relu')) top_model.add(Dropout(0.5)) # binary class if num_classes == 2: top_model.add(Dense(1, activation='sigmoid')) model = Model(inputs=base_model.input, outputs=top_model(base_model.output)) for layer in model.layers[:-2]: layer.trainable = False model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy', custom_metrcis.matthews_correlation, custom_metrcis.precision, custom_metrcis.recall, custom_metrcis.fmeasure, ]) else: top_model.add(Dense(num_classes, activation='softmax')) model = Model(inputs=base_model.input, outputs=top_model(base_model.output)) for layer in model.layers[:-2]: layer.trainable = False model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # this is the augmentation configuration we will use for training train_datagen = ImageDataGenerator( rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) # this is the augmentation configuration we will use for testing: # only rescaling test_datagen = ImageDataGenerator(rescale=1. / 255) if num_classes == 2: class_mode = 'binary' else: class_mode = 'categorical' train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode=class_mode) validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode=class_mode) # callback to save metrics batch_print_callback = LambdaCallback(on_epoch_begin= lambda epoch, logs: logger_service.log_epoch_begin( epoch, logs, result_sds, project_id), on_epoch_end= lambda epoch, logs: logger_service.log_epoch_end( epoch, logs, result_sds, project_id), on_batch_end= lambda batch, logs: logger_service.log_batch_end( batch, logs, result_sds, project_id) ) # checkpoint to save best weight best_checkpoint = MyModelCheckpoint( os.path.abspath(os.path.join(result_dir, 'best.hdf5')), save_weights_only=True, verbose=1, save_best_only=True) # checkpoint to save latest weight general_checkpoint = MyModelCheckpoint( os.path.abspath(os.path.join(result_dir, 'latest.hdf5')), save_weights_only=True, verbose=1) history = model.fit_generator( train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=nb_validation_samples // batch_size, callbacks=[batch_print_callback, best_checkpoint, general_checkpoint], ) # model.save_weights('first_try.h5') config = model.get_config() logger_service.log_train_end(result_sds, model_config=config, # score=score, history=history.history) keras_saved_model.save_model(result_dir, model) return {'history': history.history}
def test_model_methods(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np })) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10, ))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([ np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3)) ], [ np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3)) ]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], )) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(RuntimeError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={ 'dense_1': 0.2, 'dropout': 0.8 }) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={ 'dense_1': None, 'dropout': 'temporal' }) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=4, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=3, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4]
def SaveBestScore(save_path, dev1_stream, dev2_stream, test_stream): return LambdaCallback(on_train_end=partial(_evaluate_and_save, save_path=save_path, dev1_stream=dev1_stream, dev2_stream=dev2_stream, test_stream=test_stream))
valid_input = X_data[NUM_EXAMPLES + 1000:4000] valid_output = y_data[NUM_EXAMPLES + 1000:4000] train_input = X_data[:NUM_EXAMPLES] train_output = y_data[:NUM_EXAMPLES] print("Data done generating ....") chunk_size = 40 n_chunks = 200 batch_size = 500 hidden_units = 256 loss_history = [] val_loss_history = [] save_loss_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: loss_history.append(logs['loss'])) save_valloss_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: val_loss_history.append(logs['val_loss'])) monitor_loss = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto') model = Sequential() model.add( Bidirectional(GRU(hidden_units, return_sequences=True), input_shape=(n_chunks, chunk_size))) model.add(Bidirectional(GRU(hidden_units, return_sequences=True))) model.add(Bidirectional(GRU(hidden_units, return_sequences=True)))
def EvaluateOnDataStream(model, data_stream, prefix): return LambdaCallback(on_epoch_end=partial(_evaluate_on_data_stream, model=model, data_stream=data_stream, prefix=prefix))
def fit_model(model, x, y): print_callback = LambdaCallback(on_epoch_end=on_epoch_end) model.fit(x, y, batch_size=128, epochs=60, callbacks=[print_callback])
def vae(self, dims=[10], epochs=2000, batch_size=100, verbose=2, loss='mse', output_act=False, act='relu', patience=25, beta=1.0, warmup=True, warmup_rate=0.01, val_rate=0.2, no_trn=False): # manipulating an experiment identifier in the output file if patience != 25: self.prefix += 'p' + str(patience) + '_' if warmup: self.prefix += 'w' + str(warmup_rate) + '_' self.prefix += 'VAE' if loss == 'binary_crossentropy': self.prefix += 'b' if output_act: self.prefix += 'T' if beta != 1: self.prefix += 'B' + str(beta) self.prefix += str(dims).replace(", ", "-") + '_' if act == 'sigmoid': self.prefix += 'sig_' # filename for temporary model checkpoint modelName = self.prefix + self.data + '.h5' # clean up model checkpoint before use if os.path.isfile(modelName): os.remove(modelName) # callbacks for each epoch callbacks = [ EarlyStopping(monitor='val_loss', patience=patience, mode='min', verbose=1), ModelCheckpoint(modelName, monitor='val_loss', mode='min', verbose=1, save_best_only=True, save_weights_only=True) ] # warm-up callback warm_up_cb = LambdaCallback( on_epoch_end=lambda epoch, logs: [warm_up( epoch)]) # , print(epoch), print(K.get_value(beta))]) # warm-up implementation def warm_up(epoch): val = epoch * warmup_rate if val <= 1.0: K.set_value(beta, val) # add warm-up callback if requested if warmup: beta = K.variable(value=0.0) callbacks.append(warm_up_cb) # spliting the training set into the inner-train and the inner-test set (validation set) X_inner_train, X_inner_test, y_inner_train, y_inner_test = train_test_split( self.X_train, self.y_train, test_size=val_rate, random_state=self.seed, stratify=self.y_train) # insert input shape into dimension list dims.insert(0, X_inner_train.shape[1]) # create vae model self.vae, self.encoder, self.decoder = DNN_models.variational_AE( dims, act=act, recon_loss=loss, output_act=output_act, beta=beta) self.vae.summary() if no_trn: return # fit self.history = self.vae.fit(X_inner_train, epochs=epochs, batch_size=batch_size, callbacks=callbacks, verbose=verbose, validation_data=(X_inner_test, None)) # save loss progress self.saveLossProgress() # load best model self.vae.load_weights(modelName) self.encoder = self.vae.layers[1] # applying the learned encoder into the whole training and the test set. _, _, self.X_train = self.encoder.predict(self.X_train) _, _, self.X_test = self.encoder.predict(self.X_test)
# instead of the reshaped_image, we will feed in the output of the generator network. content_tensor, *style_tensors = featurization_model(reshaped_image) feature_tensors = [content_tensor, *style_tensors] training_model = Model(inputs=dummy_input_tensor, outputs=feature_tensors) training_model.summary() def save_int_image(epoch_idx, logs): if epoch_idx % 100 == 99: flattened_image_data = image_layer.get_weights()[0] # flattened_image_data = K.eval(flattened_image_tensor) image_data = np.reshape(flattened_image_data, (768, 1024, 3)) save_image(f'./images/result{epoch_idx:04}.jpeg', image_data) optimizer = Adam(lr=10.0) #changed to 10 from 0.001 training_model.compile(loss='mean_squared_error', optimizer=optimizer, loss_weights=[2.5, *([1] * 5)]) training_model.fit( #one example, one feature, the value is 1 since 1s function np.ones([1, 1]), target_values, batch_size=1, epochs=3000, verbose=2, callbacks=[LambdaCallback(on_epoch_end=save_int_image)])
print("lr",lr,"at",initial_epoch,"from last trained log file.") model = load_model(os.path.join(model_dir, last_model_name)) # 若成功加载前面保存的参数,输出下列信息 print("load model success !") optimizer = optimizers.RMSprop(lr=lr) # keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-6) model.compile(optimizer=optimizer, loss='mse', metrics=['mae']) print('model.metrics_names = ', model.metrics_names) checkpoint = ModelCheckpoint(os.path.join(model_dir, model_name), monitor='val_mean_absolute_error',verbose=1, save_best_only=False, period = 10)#save_best_only=False每间隔period个epoch都要保存模型,不管模型好坏 reduce_lr = ReduceLROnPlateau(monitor='val_mean_absolute_error', factor=0.5,#new_lr=factor*old_lr patience=epochs/10, min_lr=lr/100)#有了此callback就有了返回值lr csv_logger = CSVLogger(os.path.join(model_dir, log_name), separator=',', append=True) #CSVLogger:将epoch的训练结果保存在csv中 epoch_end_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: print("epoch_end_callback epoch",epoch,"lr",logs["lr"]) )#自定义的回调函数 callback_lists = [checkpoint, reduce_lr, csv_logger, epoch_end_callback] history = model.fit(train_data, train_targets,epochs=epochs, batch_size=batchsize, validation_split=0.2, verbose=2, shuffle=True, initial_epoch = initial_epoch, callbacks = callback_lists) # 模型保存 # model.save(model_file) # elesun # print('Model Saved.') # elesun ##########################展示训练结果################################ print('history.history.keys = ',history.history.keys()) print('history.history = ',history.history) print('history.epoch = ',history.epoch) # plot history plt.title("model performace") plt.plot(history.epoch,history.history['loss'], label='train_loss')
def train_vgg(folder): DATA_DIR = folder TRAIN_DIR = os.path.join(DATA_DIR, 'train') VALID_DIR = os.path.join(DATA_DIR, 'valid') TEST_DIR = os.path.join(DATA_DIR, 'test') save_aug = os.path.join(DATA_DIR, 'tmp') num_train_samples = sum([len(files) for r, d, files in os.walk(TRAIN_DIR)]) num_valid_samples = sum([len(files) for r, d, files in os.walk(VALID_DIR)]) num_train_steps = math.floor(num_train_samples / BATCH_SIZE) num_valid_steps = math.floor(num_valid_samples / BATCH_SIZE) shift = 0.05 train_gen = ImageDataGenerator(width_shift_range=shift, height_shift_range=shift, horizontal_flip=False, vertical_flip=False, rotation_range=4, zoom_range=0.1) batches = train_gen.flow_from_directory(directory=TRAIN_DIR, target_size=SIZE, color_mode="rgb", batch_size=BATCH_SIZE, class_mode="categorical", shuffle=True) val_gen = ImageDataGenerator() val_batches = val_gen.flow_from_directory(directory=VALID_DIR, target_size=SIZE, color_mode="rgb", batch_size=BATCH_SIZE, class_mode="categorical", shuffle=True) model = get_model() # model.layers.pop() classes = list(iter(batches.class_indices)) for layer in model.layers: # freeze first 10 layers layer.trainable = False # add last layer x = model.layers[-1].output x = Flatten(name='flatten')(x) x = Dense(4096, activation="relu")(x) x = Dense(4096, activation="relu")(x) x = Dense(len(classes), activation="softmax")(x) finetuned_model = Model(model.input, x) finetuned_model.summary() # opt = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) # opt = Adam(lr=0.001) opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) finetuned_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) for c in batches.class_indices: classes[batches.class_indices[c]] = c finetuned_model.classes = classes # finetuned_model.summary() early_stopping = EarlyStopping(patience=10) my_log_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: write_to_log(epoch, logs), ) check_pointer = ModelCheckpoint("{}_best.h5".format(name), verbose=1, save_best_only=True) history = finetuned_model.fit_generator( batches, steps_per_epoch=num_train_steps, epochs=EPOCH, callbacks=[early_stopping, check_pointer, my_log_callback], validation_data=val_batches, validation_steps=num_valid_steps) save_history(history) model.save("{}_final.h5".format(name))
def imdb_lstm(conf, input, **kw): result_sds = kw.pop('result_sds', None) project_id = kw.pop('project_id', None) f = conf['fit'] e = conf['evaluate'] x_train = input['x_tr'] y_train = input['y_tr'] x_val = input['x_te'] y_val = input['y_te'] x_test = input['x_te'] y_test = input['y_te'] max_features = input['max_features'] maxlen = input['maxlen'] x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) x_val = x_test with graph.as_default(): model = Sequential() model.add(Embedding(max_features, 128)) model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # callback to save metrics batch_print_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: logger_service.log_epoch_end( epoch, logs, result_sds, project_id)) # checkpoint to save best weight best_checkpoint = MongoModelCheckpoint(result_sds=result_sds, verbose=0, save_best_only=True) # checkpoint to save latest weight general_checkpoint = MongoModelCheckpoint(result_sds=result_sds, verbose=0) # training history = model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[ batch_print_callback, best_checkpoint, general_checkpoint ], verbose=0, **f['args']) score = model.evaluate(x_test, y_test, **e['args']) # weights = model.get_weights() config = model.get_config() logger_service.log_train_end(result_sds, model_config=config, score=score, history=history.history) return {'score': score, 'history': history.history}
start_index = random.randint(0,len(text)-maxlen-1) for diversity in [0.2, 0.5, 1.0, 1.2]: print('------ diversity:',diversity) generated = '' sentence = text[start_index: start_index + maxlen] generated += sentence print(' ----- Generating with seed: "' + sentence + '"') sys.stdout.write(generated) for i in range(400): x_pred = np.zeros((1, maxlen, len(chars))) for t, char in enumerate(sentence): x_pred[0,t,char_indices[char]]=1. preds = model.predict(x_pred, verbose=0)[0] next_index = sample(preds, diversity) next_char = indices_char[next_index] sentence = sentence[1:]+next_char sys.stdout.write(next_char) sys.stdout.flush() print() print_callback = LambdaCallback(on_train_batch_begin=on_train_batch_begin, on_train_batch_end = on_train_batch_end, on_epoch_end= on_epoch_end) model.fit(x,y, batch_size=128, epochs=60, callbacks=[print_callback])
x_train_epoch = x_train[mask] global y_train_epoch y_train_epoch = y_train[mask] def on_epoch_end(epoch, logs): print('End of epoch') renew_train_dataset() def on_batch_end(epoch, logs): tmp_lr = lr_schedule(epoch) K.set_value(model.optimizer.lr, tmp_lr) on_epoch_end_callback = LambdaCallback(on_epoch_end=on_epoch_end) on_batch_end_callback = LambdaCallback(on_epoch_end=on_batch_end) from keras.callbacks import TensorBoard callbacks = [ on_epoch_end_callback, on_batch_end_callback, lr_reducer, lr_scheduler, TensorBoard(log_dir=(work_path / 'TB_Log' / exp_name).__str__()) ] renew_train_dataset() # Run training, with or without data augmentation. if not data_augmentation: print('Not using data augmentation.') model.fit(x_train_epoch, y_train_epoch, batch_size=batch_size,
def train_model(self, model, hparams): log.info("train_model - Start") overall_time_start = timer() model.summary() # Note this doesn't work with my GPU, feel free to try! # tensorboard_cb = keras.callbacks.TensorBoard( # log_dir=self.path_summary, # histogram_freq=1, # write_graph=1, # write_grads=1, # update_freq = 365, # write_images=False) logging_cb = LoggingCallback(log.info) checkpoint_cb = ModelCheckpoint(filepath=os.path.join( self.path_checkpoints, 'checkpoint-{epoch:02d}.hdf5')) data_container = build_train_data_container(hparams) trainX, trainY = build_sequential_training_data_set( data_container.train_scaled, look_back_window=hparams.num_time_steps) log.debug("trainX shape: %s", trainX.shape) log.debug("trainY shape: %s", trainY.shape) testX, testY = build_sequential_test_data_set( data_container.samples_scaled, hparams.test_window, look_back_window=hparams.num_time_steps) lab_ctx.data_container = data_container lab_ctx.test_x = testX lab_ctx.test_y = testY # The LSTM network expects the input data (X) to be provided with a specific array # structure in the form of: [samples, time steps, features] trainX = numpy.reshape( trainX, (trainX.shape[0], hparams.num_time_steps, hparams.num_inputs)) #TODO - Use previous file if we are in resume mode model_fit_log = create_model_fit_log_file_full_name(hparams.model_id) csv_logger = CSVLogger(model_fit_log, append=True, separator=',') predict_cb = LambdaCallback(on_epoch_end=LabRunner.take_snap_shot) model.fit( trainX, trainY, epochs=hparams.epochs, batch_size=hparams.batch_size, verbose=1, validation_split=0.09, initial_epoch=hparams.initial_epoch, shuffle=True, # Callbacks that work with GPU callbacks=[csv_logger, logging_cb, checkpoint_cb, predict_cb] # Try tensorboard_cb if you want # callbacks = [csv_logger, logging_cb, checkpoint_cb, tensorboard_cb, predict_cb] ) log.info("Training finished.") hparams.checkpoint_id += (hparams.epochs - hparams.initial_epoch) log.info("Checkpoint id has been updated to: %d", hparams.checkpoint_id) model = load_model(hparams) log.info("Model reloaded.") model.summary() log.info("Predicting...") testPredict = model.predict(testX) testPredict = data_container.scaler.inverse_transform(testPredict) testY = data_container.scaler.inverse_transform(testY) testPredict = testPredict.astype(int) testY = testY.astype(int) log.debug("testPredict shape: %s", testPredict.shape) log.debug("test_set shape: %s", data_container.test_set.shape) if hparams.num_inputs > 1: for i in range(lab_ctx.hparams.num_inputs): new_column_name = 'p_{}'.format(i) data_container.test_set.loc[:, new_column_name] = testPredict[:, i] else: data_container.test_set.loc[:, 'Predicted'] = testPredict plotted = data_container.test_set.plot(title=hparams.to_string()) figure = plotted.get_figure() figure.set_size_inches(20, 15) image_path = os.path.join(self.path_snapshots, str(hparams.model_id)) #TODO - Don't overwrite old image figure.savefig(image_path + '.final.png') # Close the figure otherwise it will be left in memory plt.close(figure) overall_time_end = timer() log.info("Total time taken: %f minutes", (overall_time_end - overall_time_start) / 60) log.info("train_model - Done")
clipnorm=5) ocrModel.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=optimizer, metrics=['accuracy']) generator = CnnRnnGenerator(dataset) ocrModel.summary() ocrModel.fit_generator( generator=generator, epochs=10, callbacks=[ swa, LambdaCallback(on_batch_end=lambda batch, logs: dumper.dump()) ], use_multiprocessing=True, workers=4) elif "predict" in sys.argv: ocrModel = ocr.Ocr(dataset.lexicon_len(), weights=None) dumper = WeightsDumper(ocrModel) dumper.restore() for root, dirs, files in os.walk("output"): files = (x for x in files if x.endswith("png")) for filename in files: img = dataset.preprocess(filename, dir="output") predicted = ocrModel.predict(img)
from keras.layers import Dense, Activation, Dropout, Input, Masking from keras.layers import LSTM from keras.utils.data_utils import get_file from keras.preprocessing.sequence import pad_sequences from shakespeare_utils import * import sys import io # To save you some time, we have already trained a model for ~1000 epochs on a collection of Shakespearian poems called [*"The Sonnets"*](shakespeare.txt). # Let's train the model for one more epoch. When it finishes training for an epoch---this will also take a few minutes---you can run `generate_output`, which will prompt asking you for an input (`<`40 characters). The poem will start with your sentence, and our RNN-Shakespeare will complete the rest of the poem for you! For example, try "Forsooth this maketh no sense " (don't enter the quotation marks). Depending on whether you include the space at the end, your results might also differ--try it both ways, and try other inputs as well. # # In[ ]: print_callback = LambdaCallback(on_epoch_end=on_epoch_end) model.fit(x, y, batch_size=128, epochs=1, callbacks=[print_callback]) # In[ ]: # Run this cell to try with different inputs without having to re-train the model generate_output() # The RNN-Shakespeare model is very similar to the one you have built for dinosaur names. The only major differences are: # - LSTMs instead of the basic RNN to capture longer-range dependencies # - The model is a deeper, stacked LSTM model (2 layer) # - Using Keras instead of python to simplify the code # # If you want to learn more, you can also check out the Keras Team's text generation implementation on GitHub: https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py. #
# patience=3,cooldown=2,verbose=1) save_model_call = ModelCheckpoint(os.path.join( model_locs, weights_prepend + '.{epoch:02d}-{val_acc:.4f}.hdf5'), verbose=1, monitor='val_acc') earlystop_call = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=5, verbose=1, mode='auto') # tensor_call=TensorBoard(log_dir=log_loc, histogram_freq=3, write_graph=True, write_images=True) dodraw_afterone = LambdaCallback( on_epoch_end=partial(draw_onfirst_epoch, model=model, big_img_size=big_img_size, do_test=do_test_after_one, quick_str=quick_str, str_to_save=config_text, split_map=split_patches)) all_calls = [save_model_call, earlystop_call, dodraw_afterone] # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> start_t = time.time() lmdbval_env = lmdb.open(val_lmdb)