Esempio n. 1
0
def nlp(trf_name_or_path):
    config = f"""
    lang:
        name: en
        torch: true
        optimizer:
            class: torch.optim.SGD
    pipeline:
        {TRANSFORMERS_TOKENIZER}:
          trf_name_or_path: {trf_name_or_path}
    """
    return create_model(config)
Esempio n. 2
0
def nlp(trf_name_or_path, labels, lang, device, textcat_type):
    config = f"""
    lang:
        name: {lang}
        torch: true
        optimizer:
            class: torch.optim.SGD
            params:
                lr: 0.01
    pipeline:
        {TRANSFORMERS_SEQ_CLASSIFIER if textcat_type == "single" else TRANSFORMERS_MULTILABEL_SEQ_CLASSIFIER}:
            trf_name_or_path: {trf_name_or_path}
            labels: {labels}
    """
    return create_model(config)
Esempio n. 3
0
def nlp():
    name = "albert-base-v2"
    config = f"""
    lang:
        name: en
        optimizer:
            class: torch.optim.SGD
            params:
                lr: 0.01
    pipeline:
        transformers_model:
          trf_name_or_path: {name}
        transformers_ner:
          labels: {ALL_LABELS}
    """
    return create_model(config)
Esempio n. 4
0
def _main(cfg: Config) -> None:
    if cfg.user_config is not None:
        # Override config by user config.
        # This `user_config` have some limitations, and it will be improved
        # after the issue https://github.com/facebookresearch/hydra/issues/386 solved
        cfg = OmegaConf.merge(
            cfg, OmegaConf.load(hydra.utils.to_absolute_path(cfg.user_config)))
    cfg = parse(cfg)
    if cfg.seed:
        set_seed(cfg.seed)
    logger.info(cfg.pretty())
    train_data, val_data = create_data(cfg.train.data)
    nlp = cast(TorchLanguage, create_model(cfg.model))
    logger.info("output dir: {}".format(os.getcwd()))
    if torch.cuda.is_available():
        logger.info("CUDA enabled")
        nlp.to(torch.device("cuda"))
    savedir = Path.cwd() / "models"
    savedir.mkdir(exist_ok=True)
    train(cfg.train, nlp, train_data, val_data, savedir)
Esempio n. 5
0
def _main(cfg):
    cfg = parse(cfg)
    if cfg.seed:
        set_seed(cfg.seed)
    org_cwd = hydra.utils.get_original_cwd()
    logger.info(cfg.pretty())
    nlp = cast(TorchLanguage, create_model(cfg.model))
    train_data = list(
        srsly.read_jsonl(os.path.join(org_cwd, cfg.train.data.train)))
    cfg.train.data.ndata = len(train_data)
    val_data = list(srsly.read_jsonl(os.path.join(org_cwd,
                                                  cfg.train.data.val)))
    logger.info("output dir: {}".format(os.getcwd()))
    if torch.cuda.is_available():
        logger.info("CUDA enabled")
        nlp.to(torch.device("cuda"))
    savedir = Path.cwd() / "models"
    srsly.write_jsonl(Path.cwd() / f"train-data.jsonl", train_data)
    srsly.write_jsonl(Path.cwd() / f"val-data.jsonl", val_data)
    savedir.mkdir(exist_ok=True)
    train(cfg.train, nlp, train_data, val_data, savedir)
Esempio n. 6
0
def nlp_trf_model(trf_model_config, device):
    _nlp = create_model(trf_model_config)
    _nlp.to(device)
    return _nlp
Esempio n. 7
0
 def get_model_value(cfg):
     nlp = create_model(cfg.model)
     pipe = nlp.get_pipe(TRANSFORMERS_NER)
     return sum(p.sum().cpu().item() for p in pipe.model.parameters())
Esempio n. 8
0
def test_freeze_model(trf_testmodel_path, trf_model_config: NLPConfig):
    config = omegaconf.OmegaConf.to_container(trf_model_config)
    config["pipeline"][TRANSFORMERS_MODEL]["freeze"] = True
    nlp = create_model(config)
    pipe = nlp.pipeline[-1][1]
    assert pipe.cfg["freeze"]
Esempio n. 9
0
def nlp(config, device):
    _nlp = create_model(config)
    _nlp.to(device)
    return _nlp
Esempio n. 10
0
def test_add_pipes_parser(yml, pipe):
    nlp = create_model(yml)
    assert nlp.get_pipe(pipe)