def test_optimizer_init(): optimizer = Optimizer( learn_rate=0.123, use_averages=False, use_radam=True, L2=0.1, L2_is_weight_decay=False, ) _, gradient = optimizer((0, "x"), numpy.zeros((1, 2)), numpy.zeros(0)) assert numpy.array_equal(gradient, numpy.zeros(0)) W = numpy.asarray([1.0, 0.0, 0.0, 1.0], dtype="f").reshape((4,)) dW = numpy.asarray([[-1.0, 0.0, 0.0, 1.0]], dtype="f").reshape((4,)) optimizer((0, "x"), W, dW) optimizer = Optimizer(learn_rate=0.123, beta1=0.1, beta2=0.1) optimizer((1, "x"), W, dW)
def test_optimizer_schedules_valid(schedule_valid): lr, lr_next1, lr_next2, lr_next3 = schedule_valid optimizer = Optimizer(learn_rate=lr) assert optimizer.learn_rate == lr_next1 optimizer.step_schedules() assert optimizer.learn_rate == lr_next2 optimizer.step_schedules() assert optimizer.learn_rate == lr_next3 optimizer.learn_rate = 1.0 assert optimizer.learn_rate == 1.0
def train_while_improving( nlp: "Language", optimizer: Optimizer, train_data, evaluate, *, dropout: float, eval_frequency: int, accumulate_gradient: int, patience: int, max_steps: int, exclude: List[str], annotating_components: List[str], ): """Train until an evaluation stops improving. Works as a generator, with each iteration yielding a tuple `(batch, info, is_best_checkpoint)`, where info is a dict, and is_best_checkpoint is in [True, False, None] -- None indicating that the iteration was not evaluated as a checkpoint. The evaluation is conducted by calling the evaluate callback. Positional arguments: nlp: The spaCy pipeline to evaluate. optimizer: The optimizer callable. train_data (Iterable[Batch]): A generator of batches, with the training data. Each batch should be a Sized[Tuple[Input, Annot]]. The training data iterable needs to take care of iterating over the epochs and shuffling. evaluate (Callable[[], Tuple[float, Any]]): A callback to perform evaluation. The callback should take no arguments and return a tuple `(main_score, other_scores)`. The main_score should be a float where higher is better. other_scores can be any object. Every iteration, the function yields out a tuple with: * batch: A list of Example objects. * info: A dict with various information about the last update (see below). * is_best_checkpoint: A value in None, False, True, indicating whether this was the best evaluation so far. You should use this to save the model checkpoints during training. If None, evaluation was not conducted on that iteration. False means evaluation was conducted, but a previous evaluation was better. The info dict provides the following information: epoch (int): How many passes over the data have been completed. step (int): How many steps have been completed. score (float): The main score from the last evaluation. other_scores: : The other scores from the last evaluation. losses: The accumulated losses throughout training. checkpoints: A list of previous results, where each result is a (score, step, epoch) tuple. """ if isinstance(dropout, float): dropouts = constant(dropout) else: dropouts = dropout results = [] losses = {} words_seen = 0 start_time = timer() for step, (epoch, batch) in enumerate(train_data): dropout = next(dropouts) for subbatch in subdivide_batch(batch, accumulate_gradient): nlp.update( subbatch, drop=dropout, losses=losses, sgd=False, exclude=exclude, annotates=annotating_components, ) # TODO: refactor this so we don't have to run it separately in here for name, proc in nlp.pipeline: if (name not in exclude and hasattr(proc, "is_trainable") and proc.is_trainable and proc.model not in (True, False, None)): proc.finish_update(optimizer) optimizer.step_schedules() if not (step % eval_frequency): if optimizer.averages: with nlp.use_params(optimizer.averages): score, other_scores = evaluate() else: score, other_scores = evaluate() results.append((score, step)) is_best_checkpoint = score == max(results)[0] else: score, other_scores = (None, None) is_best_checkpoint = None words_seen += sum(len(eg) for eg in batch) info = { "epoch": epoch, "step": step, "score": score, "other_scores": other_scores, "losses": losses, "checkpoints": results, "seconds": int(timer() - start_time), "words": words_seen, } yield batch, info, is_best_checkpoint if is_best_checkpoint is not None: losses = {} # Stop if no improvement in `patience` updates (if specified) # Negate step value so that the earliest best step is chosen for the # same score, i.e. (1.0, 100) is chosen over (1.0, 200) best_result = max((r_score, -r_step) for r_score, r_step in results) best_step = -best_result[1] if patience and (step - best_step) >= patience: break # Stop if we've exhausted our max steps (if specified) if max_steps and step >= max_steps: break
def test_optimizer_schedules_invalid(schedule_invalid): with pytest.raises(ValueError): Optimizer(learn_rate=schedule_invalid)