def baseline(): train_ds = init_dataset("./data", stage="train", style="fnn") val_ds = init_dataset("./data", stage="dev", style="fnn") train_loader = SenseInstanceLoader(train_ds, batch_size=16) val_loader = SenseInstanceLoader(val_ds, batch_size=16) criterion = NLLLoss() model = SimpleFFN(768, train_loader.num_senses()) optimizer = optim.Adam(model.parameters(), lr=0.001) decoder = AllwordsEmbeddingDecoder() n_epochs = 20 trainer = SingleEmbeddingTrainer(criterion, optimizer, train_loader, val_loader, decoder, n_epochs) best_net = trainer(model)
def confidence_max4(): train_ds = init_dataset("./data", stage="train", style="fnn") val_ds = init_dataset("./data", stage="dev", style="fnn") train_loader = SenseInstanceLoader(train_ds, batch_size=16) val_loader = SenseInstanceLoader(val_ds, batch_size=16) criterion = ConfidenceLoss4(0.5) model = SimpleAbstainingFFN(768, train_loader.num_senses(), zone_applicant='max_non_abs') optimizer = optim.Adam(model.parameters(), lr=0.001) decoder = AllwordsEmbeddingDecoder() n_epochs = 20 trainer = SingleEmbeddingTrainer(criterion, optimizer, train_loader, val_loader, decoder, n_epochs) best_net = trainer(model)
def init_loader(stage, architecture, style, corpus_id, bsz): data_dir = allwords_data_dir filename = join(data_dir, 'raganato.json') sents = SenseTaggedSentences.from_json(filename, corpus_id) if architecture == "bem": ds = BEMDataset(sents) loader = BEMLoader(ds, bsz) if architecture == 'simple' or architecture == 'abstaining': vecmgr = DiskBasedVectorManager( join(join(data_dir, 'vecs'), corpus_id)) ds = SenseInstanceDataset(sents, vecmgr) if stage == 'train': if style == 'single': loader = SenseInstanceLoader(ds, batch_size=bsz) if style == 'pairwise': loader = TwinSenseInstanceLoader(ds, batch_size=bsz) if stage == 'test': loader = SenseInstanceLoader(ds, batch_size=bsz) return loader