def load_train_vectors(self, embeddings_infile, prune=True): # load vectors print('Loading vectors...') self.train_vectors = Reach.load_fast_format(embeddings_infile) if prune: # prune embeddings to selected target ontology assert self.exemplar_to_concept self.train_vectors.prune(list(self.exemplar_to_concept.keys())) print(len(self.train_vectors.items), len(self.exemplar_to_concept))
perfect = False gold = json.load(open("data/test_gold.json")) gold = list(zip(*sorted(gold.items())))[1] if perfect: data = json.load(open("data/test_gold.json")) else: data = json.load(open("data/test_uima.json")) data = list(zip(*sorted(data.items())))[1] txt, gold_bio = zip(*gold) _, data_bio = zip(*data) embeddings = Reach.load("", unk_word="UNK") concept_reach = Reach.load_fast_format("data/concept_vectors") concept_labels = json.load(open("data/concept_names2label.json")) gold_bio = list(chain.from_iterable(gold_bio)) results_bio = {} r_phrases = compose(data, f1=np.mean, f2=np.mean, window=0, embeddings=embeddings, context_function=reciprocal) pred_bio_focus = eval_extrinsic(list(chain.from_iterable(data_bio)), r_phrases, concept_reach, concept_labels,
# Set this flag to true to replicate the perfect chunking setting # in experiment 3. perfect = True gold = json.load(open("data/test_gold.json")) gold = list(zip(*sorted(gold.items())))[1] if perfect: data = json.load(open("data/test_gold.json")) data = list(zip(*sorted(data.items())))[1] txt, gold_bio = zip(*gold) r = Reach.load("../../corpora/mimiciii-min5-neg3-w5-100.vec", unk_word="<UNK>") r_concept = Reach.load_fast_format(f"data/concept_vectors") concept_labels = json.load(open("data/names2label.json")) grouped = defaultdict(list) for k, v in concept_labels.items(): grouped[v].append(r_concept[k]) grouped.pop("np") memory = {} for k, v in tqdm(grouped.items()): km = KMeans(10) km.fit(v) memory[k] = km.cluster_centers_
def load_test_vectors(self, embeddings_infile): # load vectors print('Loading vectors...') self.test_vectors = Reach.load_fast_format(embeddings_infile)