Ejemplo n.º 1
0
def test_update(nlp: TorchLanguage):
    texts = ["This is a test sentence to check\u3000model.update!"]
    labels = [{}]
    pipe: TrfModel = nlp.get_pipe(TRANSFORMERS_MODEL)
    optimizer = nlp.resume_training()
    eps = 1e-5

    def sum_param(params):
        return sum(p.sum().item() for p in params)

    def train():
        docs, golds = nlp._format_docs_and_golds(texts, labels)
        before = sum_param(pipe.optim_parameters())
        nlp._update_pipes(docs, golds)
        h = get_last_hidden_state_from_docs(docs)
        loss = h.sum() + torch.tensor(0.0, requires_grad=True)
        add_loss_to_docs(docs, loss)
        nlp._update_params(docs, optimizer)
        return abs(before - sum_param(pipe.optim_parameters()))

    assert train() > eps

    # freeze model
    pipe.cfg["freeze"] = True
    assert train() < eps
    # restore freeze state
    pipe.cfg["freeze"] = False
Ejemplo n.º 2
0
def evaluate(cfg: Config, nlp: TorchLanguage, val_data: InputData) -> Dict:
    try:
        scores = nlp.evaluate(val_data, batch_size=cfg.nbatch * 2)
    except Exception:
        report_fail(val_data)
        raise
    return scores
Ejemplo n.º 3
0
def create_lang(cfg: LangConfig) -> Language:
    kwargs = cfg.kwargs or {}
    kwargs = (OmegaConf.to_container(kwargs) if isinstance(
        kwargs, omegaconf.Config) else kwargs)
    if cfg.torch:
        kwargs["meta"] = merge(kwargs.get("meta", {}), {"lang": cfg.name})
        return TorchLanguage(True, optimizer_config=cfg.optimizer, **kwargs)
    return spacy.blank(cfg.name, **kwargs)
Ejemplo n.º 4
0
def train_epoch(
    cfg: Config,
    nlp: TorchLanguage,
    optim: Optimizer,
    train_data: InputData,
    val_data: InputData,
    epoch: int,
    eval_fn: EvalFn,
) -> None:
    for j, batch in enumerate(minibatch(train_data, size=cfg.nbatch)):
        texts, golds = zip(*batch)
        try:
            nlp.update(texts, golds, optim, verbose=True)
        except Exception:
            report_fail(batch)
            raise
        logger.info(f"epoch {epoch} {j*cfg.nbatch}/{cfg.data.ndata}")
Ejemplo n.º 5
0
def evaluate_textcat(cfg: Config, nlp: TorchLanguage,
                     val_data: InputData) -> Dict:
    # TODO: https://github.com/explosion/spaCy/pull/4664
    texts, golds = cast(Tuple[Tuple[str], Dict], zip(*val_data))
    try:
        y = np.array(list(map(lambda x: goldcat_to_label(x["cats"]), golds)))
        docs = list(nlp.pipe(texts, batch_size=cfg.nbatch * 2))
        preds = np.array([doc._.get(TOP_LABEL) for doc in docs])
    except Exception:
        report_fail(val_data)
        raise
    return classification_report(y, preds, output_dict=True)
Ejemplo n.º 6
0
def train(
    cfg: Config,
    nlp: TorchLanguage,
    train_data: InputData,
    val_data: InputData,
    savedir: Path,
) -> None:
    eval_fn = EVAL_FN_MAP[cfg.task]
    optim = nlp.resume_training()
    scheduler = load_scheduler(cfg, optim)
    for i in range(cfg.niter):
        random.shuffle(train_data)
        train_epoch(cfg, nlp, optim, train_data, val_data, i, eval_fn)
        scheduler.step(
        )  # type: ignore # (https://github.com/pytorch/pytorch/pull/26531)
        scores = eval_fn(cfg, nlp, val_data)
        nlp.meta.update({
            "score": scores,
            "config": OmegaConf.to_container(cfg)
        })
        save_model(nlp, savedir / str(i))
Ejemplo n.º 7
0
def test(nlp: TorchLanguage, data):
    if torch.cuda.is_available():
        nlp.to(torch.device("cuda"))
    nlp.evaluate(data, batch_size=256)
Ejemplo n.º 8
0
def nlp():
    return TorchLanguage(meta={"lang": "en"})
Ejemplo n.º 9
0
def torch_lang():
    return TorchLanguage()