def run_experiment(args: dict[str, Any]): set_seeds(seed=0) # Remove subolder so we can control location directly NER_Results.subfolder = "" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") entity_vocab, metadata, state_dict, token_map = load_from_archive( args["model"]) state_dict, ent_embed_size = mutate_for_ner( state_dict, mask_id=entity_vocab["[MASK]"]["id"], pad_id=entity_vocab["[PAD]"]["id"]) log(f"Loading dataset {args['dataset']} ...") dataset = load_dataset(args, metadata, device, token_map) log("Loading model ...") model = load_model(state_dict, dataset, metadata, device, entity_embedding_size=ent_embed_size, bert_attention=args["bert_attention"], dropout=args["dropout"]) cv_results = cross_validate(model, dataset, args["k"], args) log(f"Saving results to {args['location']}") for i, r in enumerate(cv_results): r.save(os.path.join(args["location"], f"res-cv{i}")) log("Micro avg. F1 estimate", np.mean([r.statistics["micro avg"]["f1-score"] for r in cv_results]))
def plots_vs_length(location: str): res = GeometryResults.load() only_pos = not (res.labels == 0).any() # Hardcoded to train log.debug("Loading data...") data = load_dataset(dict(dataset="DaNE"), DUMMY_METADATA, torch.device("cpu")).data[Split.TRAIN] seq_lengths = np.array( [len(data.texts[c["text_num"]]) for c in res.content]) span_lengths = np.array([c["span"][1] - c["span"][0] for c in res.content]) N = 4 for name, Z in zip( ("PCA", "t-SNE", "UMAP"), (res.pca_transformed, res.tsne_transformed, res.umap_transformed)): for dim in range(min(Z.shape[1], N)): for lenname, lengths in zip(("sequence", "span"), (seq_lengths, span_lengths)): log.debug(f"Plotting {name}{dim} on {lenname}") _, ax = plt.subplots(figsize=figsize_std) ax.set_title( f"{name} Representations, Dim. {dim+1} vs. Example {lenname.title()} Length" ) Z_ = Z[:, dim] _scatter_transformed(lengths[:len(Z_)], Z_, res.labels[:len(Z_)], ax) ax.legend(*_get_h_l(only_pos), loc="lower right") ax.set_ylabel(f"{name}$_{dim+1}$") ax.set_xlabel(f"Entity Example {lenname.title()} Length") plt.tight_layout() plt.savefig( os.path.join(location, "geometry-plots", f"{name}{dim}-{lenname}-len.png")) plt.close()
def main(daluke_path: str, other_path: str, show: bool): other_name = os.path.split(other_path)[-1] log.configure(os.path.join(daluke_path, f"comparison_with_{other_name}.log"), print_level=Levels.DEBUG) daluke_res = NER_Results.load(daluke_path) other_res = NER_TestResults.load(other_path) if show: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data = load_dataset(dict(dataset="DaNE"), DUMMY_METADATA, device).data[Split.TEST] for da_preds, ot_preds, truths, text in zip(daluke_res.preds, other_res.predictions, data.annotations, data.texts): if da_preds != ot_preds: t = Table() t.add_row(["Text:"] + text) t.add_row(["Truth:"] + truths) t.add_row(["DaLUKE pred:"] + da_preds) t.add_row([f"{other_name} pred:"] + ot_preds) log(str(t).replace("|", ""), with_info=False) log(f"Confusion matrix with DaLUKE results ↓ and results from {other_name} →" ) log( _format_confmat( confusion_matrix(daluke_res.preds, other_res.predictions, ["LOC", "PER", "ORG", "MISC", "O"]))) log(f"Covar. {sequence_covar(daluke_res.preds, other_res.predictions)}")
def collect_representations( modelpath: str, device: torch.device, target_device: torch.device, only_positives: bool, fine_tuned: bool ) -> tuple[np.ndarray, np.ndarray, list[dict[str, int | list[tuple[int, int]]]]]: entity_vocab, metadata, state_dict, token_map = load_from_archive( args["model"]) log("Loading dataset") # Note: We dont fill out dict as we dont allow changing max-entities and max-entity-span here. If this results in an error for any dataset, we must change this. dataset = load_dataset(dict(dataset="DaNE"), metadata, device, token_map) dataloader = dataset.build(Split.TRAIN, FP_SIZE, shuffle=False) log("Loading model") if not fine_tuned: state_dict, ent_embed_size = mutate_for_ner( state_dict, mask_id=entity_vocab["[MASK]"]["id"], pad_id=entity_vocab["[PAD]"]["id"]) model = load_model( state_dict, dataset, metadata, device, entity_embedding_size=ent_embed_size if not fine_tuned else None) model.eval() log("Forward passing examples") batch_representations, labels, content = list(), list(), list() for batch in tqdm(dataloader): # Use super class as we want the represenations word_representations, entity_representations = super( type(model), model).forward(batch) start_word_representations, end_word_representations = model.collect_start_and_ends( word_representations, batch) representations = torch.cat([ start_word_representations, end_word_representations, entity_representations ], dim=2) # We dont want padding mask = batch.entities.attention_mask.bool() if only_positives: mask &= (batch.entities.labels != 0) batch_representations.append( representations[mask].contiguous().to(target_device)) labels.append( batch.entities.labels[mask].contiguous().to(target_device)) for i, text_num in enumerate(batch.text_nums): for j in range(batch.entities.N[i]): if mask[i, j]: content.append( dict( text_num=text_num, span=batch.entities.fullword_spans[i][j], )) return torch.cat(batch_representations).numpy(), torch.cat( labels).numpy(), content
def main(path: str, pred: str, truth: str): log.configure(os.path.join(path, f"prediction-examples-{pred}-{truth}.log"), print_level=Levels.DEBUG) log(f"Looking for examples where model predicted {pred}, but the truth was {truth}" ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") res = NER_Results.load(path) data = load_dataset(dict(dataset="DaNE"), DUMMY_METADATA, device).data[Split.TEST] for preds, truths, text in zip(res.preds, data.annotations, data.texts): if any(p != t and cla(p) == pred and cla(t) == truth for p, t in zip(preds, truths)): t = Table() t.add_row(["Text:"] + text) t.add_row(["Truth:"] + truths) t.add_row(["Pred:"] + preds) log(str(t).replace("|", ""), with_info=False)
def make_cal_plots(location: str, base_model: str): log.configure(os.path.join(location, "calibration-plot.log")) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") results = NER_Results.load(location) log("Loading data") dataset = load_dataset(dict(dataset="DaNE"), {**DEFAULT_METADATA, **{"base-model": base_model}}, device) dataloader = dataset.build(Split.TEST, 1, shuffle=False) log("Fetching probs and labels") truths = [dict() for _ in range(len(results.span_probs))] for _, ex in dataloader.dataset: truths[ex.text_num].update({s: l for s, l in zip(ex.entities.fullword_spans, ex.entities.labels)}) flat_preds, flat_truths = list(), list() for p, t in zip(results.span_probs, truths): for k, probs in p.items(): flat_preds.append(probs) flat_truths.append(t[k]) log("Calibration plot") calibration_plot(flat_preds, flat_truths, location)
def run_experiment(args: dict[str, Any]): set_seeds(seed=0) # Remove subfolder so we can control location directly NER_Results.subfolder = "" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") entity_vocab, metadata, state_dict, token_map = load_from_archive(args["model"]) state_dict, ent_embed_size = mutate_for_ner(state_dict, mask_id=entity_vocab["[MASK]"]["id"], pad_id=entity_vocab["[PAD]"]["id"]) log("Setting up sampler") with open(args["params"], "r") as f: param_lists = json.load(f) sampler = SAMPLERS[args["sampler"]](param_lists) log(f"Loading dataset {args['dataset']} ...") dataset = load_dataset(args, metadata, device, token_map) log("Loading model ...") model = load_model(state_dict, dataset, metadata, device, entity_embedding_size=ent_embed_size) optimize(model, dataset, args, sampler)
def run_experiment(args: dict[str, Any]): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") _, metadata, state_dict, token_map = load_from_archive(args["model"]) log("Loading dataset ...") dataset = load_dataset(args, metadata, device, token_map) dataloader = dataset.build(Split.TEST, FP_SIZE) log("Loading model ...") model = load_model(state_dict, dataset, metadata, device) # Print some important information to stdout log.debug(model) dataset.document(dataloader, Split.TEST) type_distribution(dataset.data[Split.TEST].annotations) log("Starting evaluation of daLUKE for NER") results = evaluate_ner(model, dataloader, dataset, device, Split.TEST) results.save(args["location"]) type_distribution(results.preds)
def main(path: str, n: int): log.configure(os.path.join(path, "geometry-examples.log"), "daLUKE examples", print_level=Levels.DEBUG) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Hardcoded to train data = load_dataset(dict(dataset="DaNE"), DUMMY_METADATA, device).data[Split.TRAIN] set_seeds() GeometryResults.subfolder = "" res = GeometryResults.load(path) for field, axis in OF_INTEREST.items(): log.section(field) X = getattr(res, field) order = X[:, axis].argsort() log(f"Examples where dim. {axis} is high") _show_examples(res, X, order[::-1][:n], data) log(f"Examples where dim. {axis} is low") _show_examples(res, X, order[:n], data)
def run_experiment(args: dict[str, Any]): log.configure( os.path.join(args["location"], "daluke-train-ner.log"), args["name"] + " Fine-tuning", logger=args["name"] + "-fine-tune", print_level=Levels.INFO if args["quieter"] else Levels.DEBUG, ) set_seeds(seed=args["seed"]) assert not (args["words_only"] and args["entities_only"]), "--words-only and --entities-only cannot be used together" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") entity_vocab, metadata, state_dict, token_map = load_from_archive(args["model"]) state_dict, ent_embed_size = mutate_for_ner(state_dict, mask_id=entity_vocab["[MASK]"]["id"], pad_id=entity_vocab["[PAD]"]["id"]) # Add new NER specific fields to metadata metadata["NER-words-only"] = args["words_only"] metadata["NER-entities-only"] = args["entities_only"] log(f"Loading dataset {args['dataset']} ...") dataset = load_dataset(args, metadata, device, token_map) dataloader = dataset.build(Split.TRAIN, args["batch_size"]) dev_dataloader = dataset.build(Split.DEV, args["batch_size"]) if args["eval"] else None # Remember the dimensionality that the model will be trained with metadata["output-size"] = len(dataset.all_labels) log("Loading model ...") model = load_model( state_dict, dataset, metadata, device, bert_attention = args["bert_attention"], entity_embedding_size = ent_embed_size, dropout = args["dropout"], ) log(f"Starting training of DaLUKE for NER on {args['dataset']}") training = TrainNER( model, dataloader, dataset, device = device, epochs = args["epochs"], lr = args["lr"], warmup_prop = args["warmup_prop"], weight_decay = args["weight_decay"], dev_dataloader = dev_dataloader, loss_weight = args["loss_weight"], ) # Log important information out log.debug(training.model) log.debug(training.scheduler) log.debug(training.optimizer) dataset.document(dataloader, Split.TRAIN) type_distribution(dataset.data[Split.TRAIN].annotations) results = training.run() log("Saving results and model to %s" % args["location"]) save_to_archive(os.path.join(args["location"], TRAIN_OUT), entity_vocab, metadata, model, token_map) if args["eval"]: log("True dev. set distributions") results.dev_true_type_distribution = type_distribution(dataset.data[Split.DEV].annotations) log("True dev. set distributions") results.train_true_type_distribution = type_distribution(dataset.data[Split.TRAIN].annotations) log("Saving best model") save_to_archive(os.path.join(args["location"], TRAIN_OUT_BEST), entity_vocab, metadata, training.best_model, token_map) results.save(args["location"])