def test_concept_similarity(self): cdb = CDB(config=self.config) np.random.seed(11) for i in range(500): cui = "C" + str(i) type_ids = {'T-' + str(i%10)} cdb.add_concept(cui=cui, names=prepare_name('Name: ' + str(i), self.maker.nlp, {}, self.config), ontologies=set(), name_status='P', type_ids=type_ids, description='', full_build=True) vectors = {} for cntx_type in self.config.linking['context_vector_sizes']: vectors[cntx_type] = np.random.rand(300) cdb.update_context_vector(cui, vectors, negative=False) res = cdb.most_similar('C200', 'long', type_id_filter=['T-0'], min_cnt=1, topn=10, force_build=True) assert len(res) == 10
cdb = CDB(config=config) np.random.seed(11) for i in range(500): cui = "C" + str(i) type_ids = {'T-' + str(i % 10)} cdb.add_concept(cui=cui, names=prepare_name('Name: ' + str(i), maker.nlp, {}, config), ontologies=set(), name_status='P', type_ids=type_ids, description='', full_build=True) vectors = {} for cntx_type in config.linking['context_vector_sizes']: vectors[cntx_type] = np.random.rand(300) cdb.update_context_vector(cui, vectors, negative=False) res = cdb.most_similar('C200', 'long', type_id_filter=['T-0'], min_cnt=1, topn=10, force_build=True) assert len(res) == 10 # Test training reset cdb.reset_training() assert len(cdb.cui2context_vectors['C0']) == 0 assert cdb.cui2count_train['C0'] == 0