Ejemplo n.º 1
0
                for n, s in zip(self.recorder.metric_names, self.recorder.log)
                if n not in ["train_loss", "epoch", "time"]
            })

        def after_fit(self):
            wandb.log({})  # ensure sync of last step
            self.run.finish()


# my model
if c.my_model:
    sys.path.insert(0, os.path.abspath(".."))
    from modeling.model import ModelForDiscriminator
    from hyperparameter import electra_hparam_from_hf

    hparam = electra_hparam_from_hf(electra_config, hf_tokenizer)
    hparam.update(hparam_update)

# Path
Path("./datasets").mkdir(exist_ok=True)
Path("./checkpoints/glue").mkdir(exist_ok=True, parents=True)
Path("./test_outputs/glue").mkdir(exist_ok=True, parents=True)
c.pretrained_ckp_path = Path(
    f"./checkpoints/pretrain/{c.pretrained_checkpoint}")
if c.group_name is None:
    if c.pretrained_checkpoint:
        c.group_name = c.pretrained_checkpoint[:-4]
    elif c.pretrained_checkpoint is None:
        c.group_name = f"{c.size}++"

# Print info
Ejemplo n.º 2
0
  book_cache_dir = Path("./datasets/bookcorpus/plain_text/1.0.0")
  wbdl_cache_dir = Path("./datasets/wikibook_dl")
  wbdl_cache_dir.mkdir(exist_ok=True)

# Print info
print(f"process id: {os.getpid()}")
print(c)
print(hparam_update)


# %%
if c.my_model:
  sys.path.insert(0, os.path.abspath(".."))
  from modeling.model import ModelForGenerator,ModelForDiscriminator
  from hyperparameter import electra_hparam_from_hf
  gen_hparam = electra_hparam_from_hf(gen_config, hf_tokenizer)
  gen_hparam.update(hparam_update)
  disc_hparam = electra_hparam_from_hf(disc_config, hf_tokenizer)
  disc_hparam.update(hparam_update)

# %% [markdown]
# # 1. Load Data

# %%
if c.size in ['small', 'base']:
  
  # wiki
  if (wiki_cache_dir/f"wiki_electra_{c.max_length}.arrow").exists():
    print('loading the electra data (wiki)')
    wiki = nlp.Dataset.from_file(str(wiki_cache_dir/f"wiki_electra_{c.max_length}.arrow"))
  else: