def test_serialize(): """Test that models can correctly be saved to and loaded from gzipped json """ texts = data['texts'] classifier1 = load_model(os.path.join(TEST_MODEL_PATH, 'IR', 'IR_model.gz')) temp_filename = os.path.join(SCRATCH_PATH, uuid.uuid4().hex) classifier1.dump_model(temp_filename) classifier2 = load_model(temp_filename) classifier2.dump_model(temp_filename) classifier3 = load_model(temp_filename) preds1, preds2, preds3 = (classifier1.predict_proba(texts), classifier2.predict_proba(texts), classifier3.predict_proba(texts)) # Check that generated predictions are the same assert np.array_equal(preds1, preds2) assert np.array_equal(preds2, preds3) # Check that model stats are the same assert classifier1.stats == classifier2.stats == classifier3.stats # Check that the calculated feature importance scores are the same assert classifier1.feature_importances() == \ classifier2.feature_importances() == \ classifier3.feature_importances() os.remove(temp_filename)
def test_disambiguate(): test_model = load_model(os.path.join(TEST_MODEL_PATH, 'IR', 'IR_model.gz')) with open(os.path.join(TEST_MODEL_PATH, 'IR', 'IR_grounding_dict.json')) as f: grounding_dict = json.load(f) with open(os.path.join(TEST_MODEL_PATH, 'IR', 'IR_names.json')) as f: names = json.load(f) ad = AdeftDisambiguator(test_model, grounding_dict, names) # case where there is a unique defining pattern disamb1 = ad.disambiguate(example1) assert disamb1[0] == 'HGNC:6091' assert disamb1[1] == 'INSR' assert disamb1[2]['HGNC:6091'] == 1.0 assert disamb1[2]['MESH:D011839'] == 0.0 # case where there are conflicting defining patterns disamb2 = ad.disambiguate(example2) preds = disamb2[2] nonzero = {key for key, value in preds.items() if value > 0.0} assert nonzero == {'HGNC:6091', 'MESH:D007333'} # case without a defining pattern disamb3 = ad.disambiguate(example3) assert disamb3[0] == 'HGNC:6091' assert disamb3[1] == 'INSR'
def test_serialize(): """Test that models can correctly be saved to and loaded from gzipped json """ texts = data['texts'] classifier1 = load_model(os.path.join(TEST_MODEL_PATH, 'IR', 'IR_model.gz')) temp_filename = os.path.join(SCRATCH_PATH, uuid.uuid4().hex) classifier1.dump_model(temp_filename) classifier2 = load_model(temp_filename) classifier2.other_metadata = {'test': 'This is a test.'} classifier2.dump_model(temp_filename) classifier3 = load_model(temp_filename) preds1, preds2, preds3 = (classifier1.predict_proba(texts), classifier2.predict_proba(texts), classifier3.predict_proba(texts)) # Check that generated predictions are the same assert np.array_equal(preds1, preds2) assert np.array_equal(preds2, preds3) # Check that model stats are the same assert classifier1.stats == classifier2.stats == classifier3.stats # Check that the calculated feature importance scores are the same assert classifier1.feature_importances() == \ classifier2.feature_importances() == \ classifier3.feature_importances() # Check timestamps are unchanged assert classifier1.timestamp == classifier2.timestamp == \ classifier3.timestamp # Check hash of training set is unchanged assert classifier1.training_set_digest == \ classifier2.training_set_digest == \ classifier3.training_set_digest # Check standard deviations of feature values are unchanged assert np.array_equal(classifier1._std, classifier2._std) assert np.array_equal(classifier2._std, classifier3._std) # Check classifier versions are unchanged assert classifier1.version == classifier2.version == \ classifier3.version # Check that model params are unchanged assert classifier1.params == classifier2.params == classifier3.params assert classifier2.other_metadata == classifier3.other_metadata os.remove(temp_filename)
def load_disambiguator_directly(path): """Returns disambiguator located at path Parameters ---------- path : str Path to a disambiguation model. Must be a path to a directory <model_name> containing the files <model_name>_model.gz, <model_name>_grounding_dict.json, <model_name>_names.json Returns ------- py:class:`adeft.disambiguate.AdeftDisambiguator` A disambiguation model loaded from folder specified by path """ model_name = os.path.basename(os.path.abspath(path)) model = load_model(os.path.join(path, model_name + '_model.gz')) with open(os.path.join(path, model_name + '_grounding_dict.json')) as f: grounding_dict = json.load(f) with open(os.path.join(path, model_name + '_names.json')) as f: names = json.load(f) output = AdeftDisambiguator(model, grounding_dict, names) return output
if os.path.isdir(model_path) and \ model_name in set(available_shortforms.values()): names_path = os.path.join(model_path, f'{model_name}_names.json') with open(names_path, 'r') as f: names = strip_dictionary(json.load(f)) with open(names_path, 'w') as f: json.dump(names, f) gdict_path = os.path.join(model_path, f'{model_name}_grounding_dict.json') with open(gdict_path, 'r') as f: grounding_dict = json.load(f) grounding_dict = {shortform: strip_dictionary(grounding_map) for shortform, grounding_map in grounding_dict.items()} model_file = os.path.join(model_path, f'{model_name}_model.gz') model = load_model(model_file) model.pos_labels = [label.strip() for label in model.pos_labels] for i, label in ( enumerate(model.estimator.named_steps['logit'].classes_)): model.estimator.named_steps['logit'].classes_[i] = \ label.strip() model.dump_model(model_file) with open(gdict_path, 'w') as f: json.dump(grounding_dict, f) for shortform in grounding_dict: grounding_path = os.path.join(groundings_path, shortform) names_path = os.path.join(grounding_path, f'{shortform}_names.json')