def fit_generator(self, generator, epochs=1, validation_data=None, callbacks=None, verbose=True): method = self._model.optimizer.method x0 = self._collect_weights() history = History() _callbacks = [BaseLogger(stateful_metrics=self._model.metrics_names)] _callbacks += (callbacks or []) + [history] callback_list = CallbackList(_callbacks) callback_list.set_model(self._model) callback_list.set_params({ 'epochs': epochs, 'verbose': False, 'metrics': list(self._model.metrics_names), }) state = { 'epoch': 0, 'verbose': verbose, 'callbacks': callback_list, 'in_epoch': False, 'epoch_logs': {}, } min_options = { 'maxiter': epochs, 'maxfun': epochs*10, 'ftol': 1e-10, 'gtol': 1e-10, 'eps': 1e-8, } val_generator = None if validation_data is not None: if isinstance(validation_data, keras.utils.Sequence): val_generator = validation_data elif isinstance(validation_data, tuple) and len(validation_data) == 2: val_generator = GeneratorWrapper(*validation_data) def on_iteration_end(xk): cb = state['callbacks'] if val_generator is not None: self._validate(xk, val_generator, state) cb.on_epoch_end(state['epoch'], state['epoch_logs']) # if state['verbose']: # epoch_logs = state['epoch_logs'] # print('epoch: ', state['epoch'], # ', '.join([' {0}: {1:.3e}'.format(k, v) for k, v in epoch_logs.items()])) state['epoch'] += 1 state['in_epoch'] = False state['epoch_logs'] = {} callback_list.on_train_begin() result = minimize( self._fun_generator, x0, method=method, jac=True, options=min_options, callback=on_iteration_end, args=(generator, state)) self._update_weights(result['x']) callback_list.on_train_end() return history
def train(self, epochs): """ functional API with model.fit doesn't support sparse tensors with the current implementation => we write the training loop ourselves """ callbacks = CallbackList([ EvaluateCallback(self.valid_generator, prepend_str='val_'), TensorBoard(self.log_dir, profile_batch=0), ModelCheckpoint(self.model_save_path / 'best.h5py', monitor='val_kendal', save_best_only=True, verbose=1, mode='max'), EarlyStopping(monitor='val_kendal', patience=5, mode='max', restore_best_weights=True), ReduceLROnPlateau( monitor='val_kendal', patience=2, factor=0.5, mode='max'), ], add_history=True, add_progbar=True, verbose=1, model=self.model, epochs=epochs, steps=len(self.train_generator)) callbacks.on_train_begin() for epoch in range(epochs): if epoch % 5 == 0: self.train_generator.gen_new_graphs() self.valid_generator.gen_new_graphs() callbacks.on_epoch_begin(epoch) [c.on_train_begin() for c in callbacks] for batch, (x, y) in enumerate(self.train_generator): callbacks.on_train_batch_begin(batch) logs = self.model.train_on_batch(x, y, return_dict=True) callbacks.on_train_batch_end(batch, logs) epoch_logs = copy.copy(logs) callbacks.on_epoch_end(epoch, logs=epoch_logs) pd.DataFrame(self.model.history.history).to_csv(self.log_dir / 'history.csv', index=False) if self.model.stop_training: break callbacks.on_train_end(copy.copy(epoch_logs)) print(self.model.history.history)
def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoch=0, validation_split=0., validation_data=None, shuffle=True, callbacks=None): """ :param x: Numpy array of training data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs).If input layers in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. :param y: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs). :param batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 256. :param epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached. :param verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. :param initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run). :param validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. :param validation_data: tuple `(x_val, y_val)` or tuple `(x_val, y_val, val_sample_weights)` on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. :param shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. :param callbacks: List of `deepctr_torch.callbacks.Callback` instances. List of callbacks to apply during training and validation (if ). See [callbacks](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks). Now available: `EarlyStopping` , `ModelCheckpoint` :return: A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). """ if isinstance(x, dict): x = [x[feature] for feature in self.feature_index] do_validation = False if validation_data: do_validation = True if len(validation_data) == 2: val_x, val_y = validation_data val_sample_weight = None elif len(validation_data) == 3: val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence else: raise ValueError( 'When passing a `validation_data` argument, ' 'it must contain either 2 items (x_val, y_val), ' 'or 3 items (x_val, y_val, val_sample_weights), ' 'or alternatively it could be a dataset or a ' 'dataset or a dataset iterator. ' 'However we received `validation_data=%s`' % validation_data) if isinstance(val_x, dict): val_x = [val_x[feature] for feature in self.feature_index] elif validation_split and 0. < validation_split < 1.: do_validation = True if hasattr(x[0], 'shape'): split_at = int(x[0].shape[0] * (1. - validation_split)) else: split_at = int(len(x[0]) * (1. - validation_split)) x, val_x = (slice_arrays(x, 0, split_at), slice_arrays(x, split_at)) y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) else: val_x = [] val_y = [] for i in range(len(x)): if len(x[i].shape) == 1: x[i] = np.expand_dims(x[i], axis=1) train_tensor_data = Data.TensorDataset( torch.from_numpy(np.concatenate(x, axis=-1)), torch.from_numpy(y)) if batch_size is None: batch_size = 256 model = self.train() loss_func = self.loss_func optim = self.optim if self.gpus: print('parallel running on these gpus:', self.gpus) model = torch.nn.DataParallel(model, device_ids=self.gpus) batch_size *= len( self.gpus) # input `batch_size` is batch_size per gpu else: print(self.device) train_loader = DataLoader(dataset=train_tensor_data, shuffle=shuffle, batch_size=batch_size) sample_num = len(train_tensor_data) steps_per_epoch = (sample_num - 1) // batch_size + 1 # configure callbacks callbacks = (callbacks or []) + [self.history] # add history callback callbacks = CallbackList(callbacks) callbacks.on_train_begin() callbacks.set_model(self) if not hasattr(callbacks, 'model'): callbacks.__setattr__('model', self) callbacks.model.stop_training = False # Train print( "Train on {0} samples, validate on {1} samples, {2} steps per epoch" .format(len(train_tensor_data), len(val_y), steps_per_epoch)) for epoch in range(initial_epoch, epochs): callbacks.on_epoch_begin(epoch) epoch_logs = {} start_time = time.time() loss_epoch = 0 total_loss_epoch = 0 train_result = {} try: with tqdm(enumerate(train_loader), disable=verbose != 1) as t: for _, (x_train, y_train) in t: x = x_train.to(self.device).float() y = y_train.to(self.device).float() y_pred = model(x).squeeze() optim.zero_grad() loss = loss_func(y_pred, y.squeeze(), reduction='sum') reg_loss = self.get_regularization_loss() total_loss = loss + reg_loss + self.aux_loss loss_epoch += loss.item() total_loss_epoch += total_loss.item() total_loss.backward() optim.step() if verbose > 0: for name, metric_fun in self.metrics.items(): if name not in train_result: train_result[name] = [] train_result[name].append( metric_fun( y.cpu().data.numpy(), y_pred.cpu().data.numpy().astype( "float64"))) except KeyboardInterrupt: t.close() raise t.close() # Add epoch_logs epoch_logs["loss"] = total_loss_epoch / sample_num for name, result in train_result.items(): epoch_logs[name] = np.sum(result) / steps_per_epoch if do_validation: eval_result = self.evaluate(val_x, val_y, batch_size) for name, result in eval_result.items(): epoch_logs["val_" + name] = result # verbose if verbose > 0: epoch_time = int(time.time() - start_time) print('Epoch {0}/{1}'.format(epoch + 1, epochs)) eval_str = "{0}s - loss: {1: .4f}".format( epoch_time, epoch_logs["loss"]) for name in self.metrics: eval_str += " - " + name + \ ": {0: .4f}".format(epoch_logs[name]) if do_validation: for name in self.metrics: eval_str += " - " + "val_" + name + \ ": {0: .4f}".format(epoch_logs["val_" + name]) print(eval_str) callbacks.on_epoch_end(epoch, epoch_logs) if self.stop_training: break callbacks.on_train_end() return self.history
def fit_dataset(self, dataset, steps_per_epoch=None, batch_size=32, epochs=1, verbose=1, callbacks=None, on_sample=None, on_scores=None): """Train the model on the given dataset for a given number of epochs. Arguments --------- dataset: Instance of `BaseDataset` that provides the data to train on. steps_per_epoch: int or None, number of gradient updates before considering an epoch has passed. If None it is set to be `len(dataset.train_data) / batch_size`. batch_size: int, number of samples per gradient update epochs: int, number of times to iterate `steps_per_epoch` times verbose: {0, >0}, whether to employ the progress bar Keras callback or not callbacks: list of Keras callbacks to be called during training on_sample: callable that accepts the sampler, idxs, w, scores on_scores: callable that accepts the sampler and scores """ try: if len(dataset.train_data) < batch_size: raise ValueError(("The model cannot be trained with " "batch_size > training set")) except RuntimeError as e: assert "no size" in str(e) # Set steps_per_epoch properly if steps_per_epoch is None: steps_per_epoch = len(dataset.train_data) // batch_size # Create the callbacks list self.history = History() callbacks = [BaseLogger()] + (callbacks or []) + [self.history] if verbose > 0: callbacks += [ProgbarLogger(count_mode="steps")] callbacks = CallbackList(callbacks) callbacks.set_model(self.original_model) callbacks.set_params({ "epochs": epochs, "steps": steps_per_epoch, "verbose": verbose, "do_validation": len(dataset.test_data) > 0, "metrics": self.metrics_names + ["val_" + n for n in self.metrics_names] }) # Create the sampler sampler = self.sampler(dataset, batch_size, steps_per_epoch, epochs) # Start the training loop epoch = 0 self.original_model.stop_training = False callbacks.on_train_begin() while epoch < epochs: callbacks.on_epoch_begin(epoch) for step in range(steps_per_epoch): batch_logs = {"batch": step, "size": batch_size} callbacks.on_batch_begin(step, batch_logs) # Importance sampling is done here idxs, (x, y), w = sampler.sample(batch_size) # Train on the sampled data loss, metrics, scores = self.model.train_batch(x, y, w) # Update the sampler sampler.update(idxs, scores) values = map(lambda x: x.mean(), [loss] + metrics) for l, o in zip(self.metrics_names, values): batch_logs[l] = o callbacks.on_batch_end(step, batch_logs) if on_scores is not None and hasattr(self, "_latest_scores"): on_scores(sampler, self._latest_scores) if on_sample is not None: on_sample(sampler, self._latest_sample_event["idxs"], self._latest_sample_event["w"], self._latest_sample_event["predicted_scores"]) if self.original_model.stop_training: break # Evaluate now that an epoch passed epoch_logs = {} if len(dataset.test_data) > 0: val = self.model.evaluate(*dataset.test_data[:], batch_size=batch_size) epoch_logs = { "val_" + l: o for l, o in zip(self.metrics_names, val) } callbacks.on_epoch_end(epoch, epoch_logs) if self.original_model.stop_training: break epoch += 1 callbacks.on_train_end() return self.history
def fit_generator(self, generator, n_steps_per_epoch, n_epochs=1, validation_data=None, n_validation_steps=None): """Train the network on batches of data generated from `generator` :param generator: a generator yielding batches indefinitely, where each batch is a tuple of (inputs, targets) :type generator: generator :param n_steps_per_epoch: number of batches to train on in one epoch :type n_steps_per_epoch: int :param n_epochs: number of epochs to train the model :type n_epochs: int :param validation_data: generator yielding batches to evaluate the loss on at the end of each epoch, where each batch is a tuple of (inputs, targets) :type validation_data: generator :param n_validation_steps: number of batches to evaluate on from `validation_data` :raises RuntimeError: if only one of `validation_data` and `n_validation_steps` are passed in """ default_callbacks = self._default_callbacks() callbacks = CallbackList(default_callbacks) self._assert_compiled() invalid_inputs = ( (validation_data is not None and n_validation_steps is None) or (n_validation_steps is not None and validation_data is None)) if invalid_inputs: msg = ('`validation_data` and `n_validation_steps` must both be ' 'passed, or neither.') raise RuntimeError(msg) if self.device: self.network.to(self.device) callbacks.set_params({ 'epochs': n_epochs, 'metrics': ['loss', 'val_loss'], 'steps': n_steps_per_epoch, 'verbose': True }) callbacks.set_model(self) callbacks.on_train_begin() for idx_epoch in range(n_epochs): if self.stop_training: break epoch_logs = {} callbacks.on_epoch_begin(idx_epoch) for idx_batch in range(n_steps_per_epoch): batch_logs = {'batch': idx_batch, 'size': 1} callbacks.on_batch_begin(idx_batch, batch_logs) inputs, targets = next(generator) loss = self.train_on_batch(inputs, targets) batch_logs['loss'] = loss callbacks.on_batch_end(idx_batch, batch_logs) if self.stop_training: break if validation_data: val_loss = self.evaluate_generator(validation_data, n_validation_steps) epoch_logs['val_loss'] = val_loss callbacks.on_epoch_end(idx_epoch, epoch_logs) callbacks.on_train_end()