def train(self, train_dir, vocabulary, test_dir=None, callbacks=None, nn_model=NN_ARCHITECTURE, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS, verbose=1): """ Train the model on given data :param train_dir: directory with data files. Text files should end with '.txt' and corresponding files containing labels should end with '.lab' :param vocabulary: iterable containing all considered labels :param test_dir: directory with test files. They will be used to evaluate the model after every epoch of training. :param callbacks: objects passed to the Keras fit function as callbacks :param nn_model: string defining the NN architecture e.g. 'crnn' :param batch_size: size of one batch :param nb_epochs: number of epochs to train :param verbose: 0, 1 or 2. As in Keras. :return: History object """ if not self.word2vec_model: print('word2vec model is not trained. Run train_word2vec() first.') return if not self.scaler: print('The scaler is not trained. Run fit_scaler() first.') return if self.keras_model: print('WARNING! Overwriting already trained Keras model.') self.labels = vocabulary self.keras_model = get_nn_model(nn_model, output_length=len(vocabulary)) (x_train, y_train), test_data = get_data_for_model( train_dir, vocabulary, test_dir=test_dir, nn_model=self.keras_model, as_generator=False, batch_size=batch_size, word2vec_model=self.word2vec_model, scaler=self.scaler, ) return self.keras_model.fit( x_train, y_train, batch_size=batch_size, nb_epoch=nb_epochs, show_accuracy=True, validation_data=test_data, callbacks=callbacks or [], verbose=verbose, )
def batch_train(nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, nn='berger_cnn', nb_worker=1, verbose=1): """ Train a NN model out-of-core with given parameters. :param nb_epochs: number of epochs :param batch_size: size of one batch :param nn: nn type, for supported ones look at `get_nn_model()` :param nb_worker: number of workers to read the data :param verbose: verbosity flag :return: tuple containing a history object and a trained keras model """ model = get_nn_model(nn) train_generator, (x_test, y_test) = get_data_for_model( model, as_generator=True, batch_size=batch_size, train_dir=HEP_TRAIN_PATH, test_dir=HEP_TEST_PATH, ) # Create callbacks logger = CustomLogger(x_test, y_test, nn) model_checkpoint = ModelCheckpoint( os.path.join(logger.log_dir, 'keras_model'), save_best_only=True, ) history = model.fit_generator( train_generator, len({filename[:-4] for filename in os.listdir(HEP_TRAIN_PATH)}), nb_epochs, show_accuracy=True, validation_data=(x_test, y_test), callbacks=[logger, model_checkpoint], nb_worker=nb_worker, verbose=verbose, ) finish_logging(logger, history) return history, model
def train(nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, nn='berger_cnn', verbose=1): """ Train a NN model with given parameters, all in memory :param nb_epochs: number of epochs :param batch_size: size of one batch :param nn: nn type, for supported ones look at `get_nn_model()` :param verbose: verbosity flag :return: tuple containing a history object and a trained keras model """ model = get_nn_model(nn) (x_train, y_train), (x_test, y_test) = get_data_for_model( model, as_generator=False, train_dir=HEP_TRAIN_PATH, test_dir=HEP_TEST_PATH, ) # Create callbacks logger = CustomLogger(x_test, y_test, nn) model_checkpoint = ModelCheckpoint( os.path.join(logger.log_dir, 'keras_model'), save_best_only=True, ) history = model.fit( x_train, y_train, batch_size=batch_size, nb_epoch=nb_epochs, show_accuracy=True, validation_data=(x_test, y_test), callbacks=[logger, model_checkpoint], verbose=verbose, ) finish_logging(logger, history) return history, model
def train(self, train_dir, vocabulary, test_dir=None, callbacks=None, nn_model=NN_ARCHITECTURE, batch_size=BATCH_SIZE, test_ratio=0.0, epochs=EPOCHS, verbose=1, logdir= None, optimizer='Adam'): """ Train the model on given data根据给定数据训练模型 :param train_dir: directory with data files. Text files should end with '.txt' and corresponding files containing labels should end with '.lab'带有数据文件的目录。文本文件应以结尾“.txt”和包含标签的相应文件应以“.lab”结尾。 :param vocabulary: iterable containing all considered labels可包含所有考虑的标签 :param test_dir: directory with test files. They will be used to evaluate the model after every epoch of training.带有测试文件的目录。它们将用于评估每一个训练时代之后的模型。 :param callbacks: objects passed to the Keras fit function as callbacks作为回调传递给keras fit函数的对象 :param nn_model: string defining the NN architecture e.g. 'crnn'定义nn架构的字符串,例如“crnn” :param batch_size: size of one batch一批的大小 :param test_ratio: the ratio of samples that will be withheld from training and used for testing. This can be overridden by test_dir.不接受培训的样本比率用于测试。这可以被test_dir覆盖。 :param epochs: number of epochs to train要培训的时段数 :param verbose: 0, 1 or 2. As in Keras.0、1或2。和Keras一样。 :return: History object历史记录对象 """ if not self.word2vec_model: raise RuntimeError('word2vec model is not trained. ' + \ 'Run train_word2vec() first.') if not self.scaler: raise RuntimeError('The scaler is not trained. ' + \ 'Run fit_scaler() first.') if not os.path.isdir(train_dir): raise ValueError('The training directory ' + train_dir + \ ' does not exist') if test_dir and not os.path.isdir(test_dir): raise ValueError('The test directory ' + test_dir + \ ' does not exist') if self.keras_model: print('WARNING! Overwriting already trained Keras model.', file=sys.stderr) self.labels = vocabulary self.keras_model = get_nn_model( nn_model, embedding=self.word2vec_model.vector_size, output_length=len(vocabulary), optimizer=optimizer #这个参数是我从方法中提出来的 ) (x_train, y_train), test_data = get_data_for_model( train_dir, vocabulary, test_dir=test_dir, nn_model=self.keras_model, as_generator=False, batch_size=batch_size, word2vec_model=self.word2vec_model, scaler=self.scaler, ) keras_model = self.keras_model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=test_data, validation_split=test_ratio, callbacks=callbacks or [], verbose=verbose, ) #logdir保存日志的文件位置 if logdir: with open(logdir, 'w') as f: f.write(str(keras_model.history)) return keras_model
def batch_train(self, train_dir, vocabulary, test_dir=None, callbacks=None, nn_model=NN_ARCHITECTURE, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1): """ Train the model on given data :param train_dir: directory with data files. Text files should end with '.txt' and corresponding files containing labels should end with '.lab' :param vocabulary: iterable containing all considered labels :param test_dir: directory with test files. They will be used to evaluate the model after every epoch of training. :param callbacks: objects passed to the Keras fit function as callbacks :param nn_model: string defining the NN architecture e.g. 'crnn' :param batch_size: size of one batch :param epochs: number of epochs to train :param verbose: 0, 1 or 2. As in Keras. :return: History object 根据给定数据训练模型 :param train_dir:带有数据文件的目录。文本文件应以结尾 “.txt”和包含标签的相应文件应以“.lab”结尾。 :参数词汇:可包含所有考虑的标签 :param test_dir:带有测试文件的目录。它们将用于评估 每一个训练时代之后的模型。 :param callbacks:作为回调传递给keras fit函数的对象 :param nn_model:定义nn架构的字符串,例如“crnn” :param batch_size:一批的大小 :param epochs:要培训的时段数 :param verbose:0、1或2。和Keras一样。 :返回:历史记录对象 """ if not self.word2vec_model: raise RuntimeError('word2vec model is not trained. ' + \ 'Run train_word2vec() first.') if not self.scaler: raise RuntimeError('The scaler is not trained. ' + \ 'Run fit_scaler() first.') if not os.path.isdir(train_dir): raise ValueError('The training directory ' + train_dir + \ ' does not exist') if test_dir and not os.path.isdir(test_dir): raise ValueError('The test directory ' + test_dir + \ ' does not exist') if self.keras_model: print('WARNING! Overwriting already trained Keras model.', file=sys.stderr) self.labels = vocabulary self.keras_model = get_nn_model( nn_model, embedding=self.word2vec_model.vector_size, output_length=len(vocabulary) ) train_generator, test_data = get_data_for_model( train_dir, vocabulary, test_dir=test_dir, nn_model=self.keras_model, as_generator=True, batch_size=batch_size, word2vec_model=self.word2vec_model, scaler=self.scaler, ) nb_of_files = len({filename[:-4] for filename in os.listdir(train_dir)}) steps_per_epoch = math.ceil(nb_of_files / batch_size) return self.keras_model.fit_generator( train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=test_data, callbacks=callbacks or [], verbose=verbose, )
def train(self, train_dir, vocabulary, test_dir=None, callbacks=None, nn_model=NN_ARCHITECTURE, batch_size=BATCH_SIZE, test_ratio=0.0, epochs=EPOCHS, verbose=1): """ Train the model on given data :param train_dir: directory with data files. Text files should end with '.txt' and corresponding files containing labels should end with '.lab' :param vocabulary: iterable containing all considered labels :param test_dir: directory with test files. They will be used to evaluate the model after every epoch of training. :param callbacks: objects passed to the Keras fit function as callbacks :param nn_model: string defining the NN architecture e.g. 'crnn' :param batch_size: size of one batch :param test_ratio: the ratio of samples that will be withheld from training and used for testing. This can be overridden by test_dir. :param epochs: number of epochs to train :param verbose: 0, 1 or 2. As in Keras. :return: History object """ if not self.word2vec_model: raise RuntimeError('word2vec model is not trained. ' + \ 'Run train_word2vec() first.') if not self.scaler: raise RuntimeError('The scaler is not trained. ' + \ 'Run fit_scaler() first.') if not os.path.isdir(train_dir): raise ValueError('The training directory ' + train_dir + \ ' does not exist') if test_dir and not os.path.isdir(test_dir): raise ValueError('The test directory ' + test_dir + \ ' does not exist') if self.keras_model: print('WARNING! Overwriting already trained Keras model.', file=sys.stderr) self.labels = vocabulary self.keras_model = get_nn_model( nn_model, embedding=self.word2vec_model.vector_size, output_length=len(vocabulary) ) (x_train, y_train), test_data = get_data_for_model( train_dir, vocabulary, test_dir=test_dir, nn_model=self.keras_model, as_generator=False, batch_size=batch_size, word2vec_model=self.word2vec_model, scaler=self.scaler, ) return self.keras_model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=test_data, validation_split=test_ratio, callbacks=callbacks or [], verbose=verbose, )
def batch_train(self, train_dir, vocabulary, test_dir=None, callbacks=None, nn_model=NN_ARCHITECTURE, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS, verbose=1): """ Train the model on given data :param train_dir: directory with data files. Text files should end with '.txt' and corresponding files containing labels should end with '.lab' :param vocabulary: iterable containing all considered labels :param test_dir: directory with test files. They will be used to evaluate the model after every epoch of training. :param callbacks: objects passed to the Keras fit function as callbacks :param nn_model: string defining the NN architecture e.g. 'crnn' :param batch_size: size of one batch :param nb_epochs: number of epochs to train :param verbose: 0, 1 or 2. As in Keras. :return: History object """ if not self.word2vec_model: print('word2vec model is not trained. Run train_word2vec() first.') return if not self.scaler: print('The scaler is not trained. Run fit_scaler() first.') return if self.keras_model: print('WARNING! Overwriting already trained Keras model.') self.labels = vocabulary self.keras_model = get_nn_model( nn_model, embedding=self.word2vec_model.vector_size, output_length=len(vocabulary)) train_generator, test_data = get_data_for_model( train_dir, vocabulary, test_dir=test_dir, nn_model=self.keras_model, as_generator=True, batch_size=batch_size, word2vec_model=self.word2vec_model, scaler=self.scaler, ) return self.keras_model.fit_generator( train_generator, len({filename[:-4] for filename in os.listdir(train_dir)}), nb_epochs, validation_data=test_data, callbacks=callbacks or [], verbose=verbose, )
def train( self, train_data: DataList, test_data: DataList, labels, callbacks=None, nn_model=NN_ARCHITECTURE, batch_size=BATCH_SIZE, test_ratio=0.0, epochs=EPOCHS, verbose=1): """ Train the model on given data :param train_dir: directory with data files. Text files should end with '.txt' and corresponding files containing labels should end with '.lab' :param vocabulary: iterable containing all considered labels :param test_dir: directory with test files. They will be used to evaluate the model after every epoch of training. :param callbacks: objects passed to the Keras fit function as callbacks :param nn_model: string defining the NN architecture e.g. 'crnn' :param batch_size: size of one batch :param test_ratio: the ratio of samples that will be withheld from training and used for testing. This can be overridden by test_dir. :param epochs: number of epochs to train :param verbose: 0, 1 or 2. As in Keras. :return: History object """ set_tf_growth() if not self.word2vec_model: raise RuntimeError('word2vec model is not trained. ' + 'Run train_word2vec() first.') if not self.scaler: raise RuntimeError('The scaler is not trained. ' + 'Run fit_scaler() first.') if self.keras_model: print('WARNING! Overwriting already trained Keras model.', file=sys.stderr) self.labels = labels self.keras_model = get_nn_model( nn_model, embedding=self.word2vec_model.vector_size, output_length=len(self.labels) ) regression = nn_model == 'cnn_regression' # TODO make this more general self.training_set = set([example['text'] for example in train_data]) (x_train, y_train), test_data_matrix = get_data_for_model(train_data, test_data, self.labels, nn_model=self.keras_model, as_generator=False, batch_size=batch_size, word2vec_model=self.word2vec_model, scaler=self.scaler, regression=regression ) return self.keras_model.fit( x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=test_data_matrix, # TODO make validation data optional for speedup callbacks=callbacks or [], verbose=verbose, )