def __init__(self, samples, results_csv_path, tuner_domains=TUNER_DOMAINS, validation_samples=None, show_progress=True, show_epoch_eval=True, dump_models=False, dump_pss_eval=False, evaluator=PSSClasifierEvaluator(), tuner_score_getter=lambda evaluations: max([e['f1'] or 0 for e in evaluations]), tuner_results_getter=extract_classifier_evaluator_results, task_name=''): self.task_name = task_name self.dump_models = dump_models self.dump_pss_eval = dump_pss_eval self.fit_kwargs = None self.tuner_results_getter = tuner_results_getter self.tuner_score_getter = tuner_score_getter assert evaluator is not None def dump_result(output_dir, result, params): if self.dump_models: result.predictor.save(output_dir + '/model') if self.dump_pss_eval: ident = 'autoid' StreusleEvaluator(result.predictor).evaluate(validation_samples, output_tsv_path=output_dir + '/psseval_out.tsv', ident=ident) self.tuner = HyperparametersTuner(task_name=task_name, results_csv_path=results_csv_path, params_settings=tuner_domains, executor=self._execute, csv_row_builder=build_csv_rows, shared_csv=True, lock_file_path=results_csv_path + '.lock', dump_result=dump_result) self.fit_kwargs = { 'samples': samples, 'validation_samples': validation_samples, 'show_progress': show_progress, 'show_epoch_eval': show_epoch_eval, 'evaluator': evaluator }
def run(): loader = StreusleLoader() STREUSLE_BASE = os.environ.get( 'STREUSLE_BASE' ) or '/cs/usr/aviramstern/lab/nlp/datasets/streusle_v4/release' task = 'goldid.goldsyn' train_records = loader.load(STREUSLE_BASE + '/train/streusle.ud_train.' + task + '.json', input_format='json') dev_records = loader.load(STREUSLE_BASE + '/dev/streusle.ud_dev.' + task + '.json', input_format='json') test_records = loader.load(STREUSLE_BASE + '/test/streusle.ud_test.' + task + '.json', input_format='json') train_samples = [ streusle_record_to_lstm_model_sample(r) for r in train_records ] dev_samples = [ streusle_record_to_lstm_model_sample(r) for r in dev_records ] test_samples = [ streusle_record_to_lstm_model_sample(r) for r in test_records ] test_features() GOLD_ID_AUTO_PREP = json.loads("""{ "mask_mwes": false, "learning_rate_decay": 0.0001, "lstm_h_dim": 100, "mlp_layers": 2, "is_bilstm": true, "num_lstm_layers": 2, "dynet_random_seed": "7564313", "use_ud_xpos": true, "ner_embd_dim": 10, "allow_empty_prediction": false, "learning_rate": 0.15848931924611143, "mlp_activation": "relu", "use_lexcat": true, "use_govobj": true, "token_embd_dim": 300, "update_lemmas_embd": true, "govobj_config_embd_dim": 3, "ud_deps_embd_dim": 10, "mlp_layer_dim": 100, "mlp_dropout_p": 0.37, "ud_xpos_embd_dim": 25, "use_ner": true, "update_token_embd": false, "epochs": 1, "lstm_dropout_p": 0.38, "use_ud_dep": true, "lexcat_embd_dim": 3, "use_prep_onehot": false, "use_token": true, "use_token_internal": true, "token_internal_embd_dim": 10, "labels_to_predict": [ "supersense_role", "supersense_func" ] }""") print('Training model..') model = LstmMlpSupersensesModel( hyperparameters=LstmMlpSupersensesModel.HyperParameters( **GOLD_ID_AUTO_PREP), ) predictor = model.fit(train_samples, dev_samples) evaluator = PSSClasifierEvaluator(predictor.model) evaluator.evaluate([model.sample_to_lowlevel(s) for s in test_samples]) btrain, bdev, btest = load_boknilev() all_samples = btrain + bdev + btest predictions = {} for ind, sample in enumerate(all_samples): print("%d/%d" % (ind, len(all_samples))) lm_sample_xs = boknilev_record_to_lstm_model_sample_xs(sample) lm_sample_ys = model.predict(lm_sample_xs) predictions[sample['sent_id']] = {} for ind, (sx, sy) in enumerate(zip(lm_sample_xs, lm_sample_ys)): if sx.identified_for_pss: predictions[sample['sent_id']][ind] = (sy.supersense_role, sy.supersense_func) dump_boknilev_pss(predictions)
import os import copy from collections import defaultdict from datasets.pp_attachement.boknilev.load_boknilev import load_boknilev, dump_boknilev_pss from datasets.streusle_v4 import StreusleLoader from evaluators.pss_classifier_evaluator import PSSClasifierEvaluator from models.supersenses.boknilev_integration import boknilev_record_to_lstm_model_sample_xs from models.supersenses.features.features_test import test_features from models.supersenses.lstm_mlp_supersenses_model import LstmMlpSupersensesModel from models.supersenses.streusle_integration import streusle_record_to_lstm_model_sample import json evaluator = PSSClasifierEvaluator() def run(): loader = StreusleLoader() STREUSLE_BASE = os.environ.get( 'STREUSLE_BASE' ) or '/cs/usr/aviramstern/lab/nlp/datasets/streusle_v4/release' task = 'goldid.goldsyn' train_records = loader.load(STREUSLE_BASE + '/train/streusle.ud_train.' + task + '.json', input_format='json') dev_records = loader.load(STREUSLE_BASE + '/dev/streusle.ud_dev.' + task + '.json', input_format='json') test_records = loader.load(STREUSLE_BASE + '/test/streusle.ud_test.' + task + '.json', input_format='json')
def run(): loader = StreusleLoader() STREUSLE_BASE = os.environ.get( 'STREUSLE_BASE' ) or '/cs/usr/aviramstern/lab/nlp/datasets/streusle_v4/release' task = 'goldid.goldsyn' train_records = loader.load(STREUSLE_BASE + '/train/streusle.ud_train.' + task + '.json', input_format='json') dev_records = loader.load(STREUSLE_BASE + '/dev/streusle.ud_dev.' + task + '.json', input_format='json') test_records = loader.load(STREUSLE_BASE + '/test/streusle.ud_test.' + task + '.json', input_format='json') train_samples = [ streusle_record_to_lstm_model_sample(r) for r in train_records ] dev_samples = [ streusle_record_to_lstm_model_sample(r) for r in dev_records ] # test_samples = [streusle_record_to_lstm_model_sample(r) for r in test_records] test_features() GOLD_ID_GOLD_PREP_WITH_NER = json.loads("""{ "mask_mwes": false, "learning_rate_decay": 0.00031622776601683794, "lstm_h_dim": 100, "mlp_layers": 2, "is_bilstm": true, "num_lstm_layers": 2, "dynet_random_seed": "3857654", "use_ud_xpos": true, "ner_embd_dim": 5, "allow_empty_prediction": false, "learning_rate": 0.15848931924611143, "mlp_activation": "relu", "use_lexcat": true, "use_govobj": true, "token_embd_dim": 300, "update_lemmas_embd": false, "govobj_config_embd_dim": 3, "ud_deps_embd_dim": 25, "mlp_layer_dim": 100, "mlp_dropout_p": 0.42, "ud_xpos_embd_dim": 5, "use_ner": true, "update_token_embd": false, "epochs": 80, "lstm_dropout_p": 0.49, "use_ud_dep": true, "lexcat_embd_dim": 3, "use_prep_onehot": false, "use_token": true, "use_token_internal": true, "token_internal_embd_dim": 10, "labels_to_predict": [ "supersense_role", "supersense_func" ] }""") GOLD_ID_GOLD_PREP_WITHOUT_NER = copy.deepcopy(GOLD_ID_GOLD_PREP_WITH_NER) GOLD_ID_GOLD_PREP_WITHOUT_NER['use_ner'] = False tasks = { 'GOLD_ID_GOLD_PREP_WITH_NER': GOLD_ID_GOLD_PREP_WITH_NER, 'GOLD_ID_GOLD_PREP_WITHOUT_NER': GOLD_ID_GOLD_PREP_WITHOUT_NER } task_acc = defaultdict(lambda: []) N_SAMPLES = 3 for _ in range(N_SAMPLES): for task, hp in tasks.items(): model = LstmMlpSupersensesModel( hyperparameters=LstmMlpSupersensesModel.HyperParameters( **hp), ) predictor = model.fit(train_samples, dev_samples) evaluator = PSSClasifierEvaluator(predictor.model) acc = evaluator.evaluate( [model.sample_to_lowlevel(s) for s in dev_samples])['f1'] task_acc[task].append(acc) for task, accs in task_acc.items(): print(task + ": " + ", ".join(["%2.2f" % acc for acc in accs])) print(task + ": Mean is %2.2f" % (sum(accs) / len(accs)))