def predict(self, x_data, batch_size=32, debug_info=False): """ Generates output predictions for the input samples. Computation is done in batches. Args: x_data: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple inputs). batch_size: Integer. If unspecified, it will default to 32. debug_info: Bool, Should print out the logging info. Returns: array(s) of predictions. """ with utils.custom_object_scope(): if isinstance(x_data, tuple): lengths = [len(sen) for sen in x_data[0]] else: lengths = [len(sen) for sen in x_data] tensor = self.embedding.process_x_dataset(x_data) pred = self.tf_model.predict(tensor, batch_size=batch_size) res = self.embedding.reverse_numerize_label_sequences( pred.argmax(-1), lengths) if debug_info: logging.info('input: {}'.format(tensor)) logging.info('output: {}'.format(pred)) logging.info('output argmax: {}'.format(pred.argmax(-1))) return res
def build_tpu_model(self, strategy: tf.contrib.distribute.TPUStrategy, x_train: Union[Tuple[List[List[str]], ...], List[List[str]]], y_train: Union[List[List[str]], List[str]], x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None, y_validate: Union[List[List[str]], List[str]] = None): """ Build TPU model with corpus Args: strategy: `TPUDistributionStrategy`. The strategy to use for replicating model across multiple TPU cores. x_train: Array of train feature data (if the model has a single input), or tuple of train feature data array (if the model has multiple inputs) y_train: Array of train label data x_validate: Array of validation feature data (if the model has a single input), or tuple of validation feature data array (if the model has multiple inputs) y_validate: Array of validation label data Returns: """ if x_validate is not None and not isinstance(x_validate, tuple): self.embedding.analyze_corpus(x_train + x_validate, y_train + y_validate) #解析语料库 建立字典 else: self.embedding.analyze_corpus(x_train, y_train) if self.tf_model is None: with utils.custom_object_scope(): self.build_model_arc() self.tf_model = tf.contrib.tpu.keras_to_tpu_model(self.tf_model, strategy=strategy) self.compile_model(optimizer=tf.train.AdamOptimizer())
def fit(self, x_train: Union[Tuple[List[List[str]], ...], List[List[str]]], y_train: Union[List[List[str]], List[str]], x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None, y_validate: Union[List[List[str]], List[str]] = None, batch_size: int = 64, epochs: int = 5, callbacks: List[keras.callbacks.Callback] = None, fit_kwargs: Dict = None): """ Trains the model for a given number of epochs with fit_generator (iterations on a dataset). Args: x_train: Array of train feature data (if the model has a single input), or tuple of train feature data array (if the model has multiple inputs) y_train: Array of train label data x_validate: Array of validation feature data (if the model has a single input), or tuple of validation feature data array (if the model has multiple inputs) y_validate: Array of validation label data batch_size: Number of samples per gradient update, default to 64. epochs: Integer. Number of epochs to train the model. default 5. callbacks: fit_kwargs: fit_kwargs: additional arguments passed to ``fit_generator()`` function from ``tensorflow.keras.Model`` - https://www.tensorflow.org/api_docs/python/tf/keras/models/Model#fit_generator Returns: """ self.build_model(x_train, y_train, x_validate, y_validate) train_generator = self.get_data_generator(x_train, y_train, batch_size) if fit_kwargs is None: fit_kwargs = {} validation_generator = None validation_steps = None if x_validate: validation_generator = self.get_data_generator(x_validate, y_validate, batch_size) if isinstance(x_validate, tuple): validation_steps = len(x_validate[0]) // batch_size + 1 else: validation_steps = len(x_validate) // batch_size + 1 if isinstance(x_train, tuple): steps_per_epoch = len(x_train[0]) // batch_size + 1 else: steps_per_epoch = len(x_train) // batch_size + 1 with utils.custom_object_scope(): return self.tf_model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=validation_generator, validation_steps=validation_steps, callbacks=callbacks, **fit_kwargs)
def fit_without_generator(self, x_train: Union[Tuple[List[List[str]], ...], List[List[str]]], y_train: Union[List[List[str]], List[str]], x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None, y_validate: Union[List[List[str]], List[str]] = None, batch_size: int = 64, epochs: int = 5, callbacks: List[keras.callbacks.Callback] = None, fit_kwargs: Dict = None): """ Trains the model for a given number of epochs (iterations on a dataset). Args: x_train: Array of train feature data (if the model has a single input), or tuple of train feature data array (if the model has multiple inputs) y_train: Array of train label data x_validate: Array of validation feature data (if the model has a single input), or tuple of validation feature data array (if the model has multiple inputs) y_validate: Array of validation label data batch_size: Number of samples per gradient update, default to 64. epochs: Integer. Number of epochs to train the model. default 5. callbacks: fit_kwargs: fit_kwargs: additional arguments passed to ``fit_generator()`` function from ``tensorflow.keras.Model`` - https://www.tensorflow.org/api_docs/python/tf/keras/models/Model#fit_generator Returns: """ self.build_model(x_train, y_train, x_validate, y_validate) tensor_x = self.embedding.process_x_dataset(x_train) tensor_y = self.embedding.process_y_dataset(y_train) validation_data = None if x_validate is not None: tensor_valid_x = self.embedding.process_x_dataset(x_validate) tensor_valid_y = self.embedding.process_y_dataset(y_validate) validation_data = (tensor_valid_x, tensor_valid_y) if fit_kwargs is None: fit_kwargs = {} if callbacks and 'callbacks' not in fit_kwargs: fit_kwargs['callbacks'] = callbacks with utils.custom_object_scope(): return self.tf_model.fit(tensor_x, tensor_y, validation_data=validation_data, epochs=epochs, batch_size=batch_size, **fit_kwargs)
def build_multi_gpu_model(self, gpus: int, x_train: Union[Tuple[List[List[str]], ...], List[List[str]]], y_train: Union[List[List[str]], List[str]], cpu_merge: bool = True, cpu_relocation: bool = False, x_validate: Union[Tuple[List[List[str]], ...], List[List[str]]] = None, y_validate: Union[List[List[str]], List[str]] = None): """ Build multi-GPU model with corpus Args: gpus: Integer >= 2, number of on GPUs on which to create model replicas. cpu_merge: A boolean value to identify whether to force merging model weights under the scope of the CPU or not. cpu_relocation: A boolean value to identify whether to create the model's weights under the scope of the CPU. If the model is not defined under any preceding device scope, you can still rescue it by activating this option. x_train: Array of train feature data (if the model has a single input), or tuple of train feature data array (if the model has multiple inputs) y_train: Array of train label data x_validate: Array of validation feature data (if the model has a single input), or tuple of validation feature data array (if the model has multiple inputs) y_validate: Array of validation label data Returns: """ if x_validate is not None and not isinstance(x_validate, tuple): self.embedding.analyze_corpus(x_train + x_validate, y_train + y_validate) else: self.embedding.analyze_corpus(x_train, y_train) if self.tf_model is None: with utils.custom_object_scope(): self.build_model_arc() self.tf_model = tf.keras.utils.multi_gpu_model( self.tf_model, gpus, cpu_merge=cpu_merge, cpu_relocation=cpu_relocation) self.compile_model()
def predict(self, x_data, batch_size=32, debug_info=False, predict_kwargs: Dict = None): """ Generates output predictions for the input samples. Computation is done in batches. Args: x_data: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple inputs). batch_size: Integer. If unspecified, it will default to 32. debug_info: Bool, Should print out the logging info. predict_kwargs: arguments passed to ``predict()`` function of ``tf.keras.Model`` Returns: array(s) of predictions. """ if predict_kwargs is None: predict_kwargs = {} with utils.custom_object_scope(): if isinstance(x_data, tuple): lengths = [len(sen) for sen in x_data[0]] else: lengths = [len(sen) for sen in x_data] tensor = self.embedding.process_x_dataset(x_data) pred = self.tf_model.predict(tensor, batch_size=batch_size, **predict_kwargs) if self.task == 'scoring': t_pred = pred else: t_pred = pred.argmax(-1) res = self.embedding.reverse_numerize_label_sequences( t_pred, lengths) if debug_info: print('input: {}'.format(tensor)) print('output: {}'.format(pred)) print('output argmax: {}'.format(t_pred)) return res