def on_epoch_end(self, epoch, logs=None): if logs is None: logs = {} self.multi = self.multi or len(self.model.outputs) > 1 val_predict = self.model.predict(self.dev_x) if self.multi: val_predict = val_predict[0] if val_predict.shape[1] == 1: # Regression val_predict = rescale_regression_results( val_predict, self.highest_class ).ravel() if self.ranked: val_predict = K.eval(ranked_prediction(val_predict)) logger.debug('Transformed predict\n%r', val_predict[:5]) _val_f1 = f1_metric(self.dev_y, val_predict, self.average) self.val_f1s.append(_val_f1) logs['val_f1'] = _val_f1 if _val_f1 > self.best_f1: print( "Epoch %d: val_f1 improved from %f to %f: saving weights as %s" % (epoch + 1, self.best_f1, _val_f1, self.weights_path) ) self.best_f1 = _val_f1 self.model.save_weights(self.weights_path) else: print( "Epoch %d: val_f1 did not improve (%f >= %f)" % (epoch + 1, self.best_f1, _val_f1) )
def main(): args = parse_args() model_paths = find_model_paths(args.job_ids) print(model_paths) if args.collapsed: test_meta = load_split("test", round_cefr=True) round_target_scores = np.array( [ROUND_CEFR_LABELS.index(c) for c in test_meta["cefr"]], dtype=int) targets = round_target_scores highest_class = 3 labels = ROUND_CEFR_LABELS else: test_meta = load_split("test", round_cefr=False) target_scores = np.array( [CEFR_LABELS.index(c) for c in test_meta["cefr"]], dtype=int) targets = target_scores highest_class = 6 labels = CEFR_LABELS for model_path in model_paths: model, w2i = load_model_and_w2i(model_path) multi_input = isinstance(model.input, list) and len(model.input) == 2 multi_output = isinstance(model.outputs, list) and len(model.outputs) > 1 x = get_input_reps(w2i, multi_input) del w2i predictions = get_predictions(model, x, multi_output) del x del model # Round to integers and clip to score range pred = rescale_regression_results(predictions, highest_class).ravel() report(targets, pred, labels) name = model_path.stem + "_test_eval" save_results(name, {}, {}, targets, pred)
def test_rescale_regression_results(): num_class = 7 y = np.array([1, 2, 6, 4, 0, 2, 1, 5, 1, 0, 4, 4, 6, 2, 5]) norm_y = y / num_class assert_array_equal(rescale_regression_results(norm_y, num_class), y)
def main(): args = parse_args() set_reproducible(args.seed_delta) train_meta = load_split("train", round_cefr=args.round_cefr) dev_meta = load_split("dev", round_cefr=args.round_cefr) target_col = "lang" if args.nli else "cefr" labels = sorted(train_meta[target_col].unique()) train_x, dev_x, num_pos, w2i = get_sequence_input_reps(args) args.vocab_size = len(w2i) print("Vocabulary size is {}".format(args.vocab_size)) train_target_scores = np.array( [labels.index(c) for c in train_meta[target_col]], dtype=int) dev_target_scores = np.array( [labels.index(c) for c in dev_meta[target_col]], dtype=int) del target_col train_y, dev_y, output_units = get_targets_and_output_units( train_target_scores, dev_target_scores, args.method) optimizer, loss, metrics = get_compile_args(args.method, args.lr) multi_task = args.aux_loss_weight > 0 if multi_task: assert not args.nli, "Both NLI and multi-task specified" lang_labels = sorted(train_meta.lang.unique()) train_y.append( to_categorical([lang_labels.index(l) for l in train_meta.lang])) dev_y.append( to_categorical([lang_labels.index(l) for l in dev_meta.lang])) output_units.append(len(lang_labels)) loss_weights = { AUX_OUTPUT_NAME: args.aux_loss_weight, OUTPUT_NAME: 1.0 - args.aux_loss_weight, } else: loss = loss[OUTPUT_NAME] metrics = metrics[OUTPUT_NAME] loss_weights = None del train_meta, dev_meta model = build_model( args.vocab_size, args.doc_length, output_units, args.embed_dim, windows=args.windows, num_pos=num_pos, constraint=args.constraint, static_embs=args.static_embs, classification=args.method == "classification", ) model.summary() if args.vectors: init_pretrained_embs(model, args.vectors, w2i) model.compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights, metrics=metrics) logger.debug("Train y\n%r", train_y[0][:5]) logger.debug("Model config\n%r", model.get_config()) temp_handle, weights_path = tempfile.mkstemp(suffix=".h5") val_y = dev_target_scores callbacks = [ F1Metrics(dev_x, val_y, weights_path, ranked=args.method == "ranked") ] history = model.fit( train_x, train_y, epochs=args.epochs, batch_size=args.batch_size, callbacks=callbacks, validation_data=(dev_x, dev_y), verbose=2, ) model.load_weights(weights_path) os.close(temp_handle) os.remove(weights_path) true = dev_target_scores if multi_task: predictions = model.predict(dev_x)[0] else: predictions = model.predict(dev_x) if args.method == "classification": pred = np.argmax(predictions, axis=1) elif args.method == "regression": # Round to integers and clip to score range highest_class = train_target_scores.max() pred = rescale_regression_results(predictions, highest_class).ravel() elif args.method == "ranked": pred = K.eval(ranked_prediction(predictions)) try: if multi_task: multi_task_report(history.history, true, pred, labels) else: report(true, pred, labels) except Exception: pass name = get_name(args.nli, multi_task) name = get_file_name(name) if args.save_model: save_model(name, model, w2i) save_results(name, args.__dict__, history.history, true, pred) plt.show()
def main(): args = parse_args() set_reproducible(args.seed_delta) train_meta = load_split('train', round_cefr=args.round_cefr) dev_meta = load_split('dev', round_cefr=args.round_cefr) target_col = 'lang' if args.nli else 'cefr' labels = sorted(train_meta[target_col].unique()) train_x, dev_x, num_pos, w2i = get_sequence_input_reps(args) args.vocab_size = len(w2i) print("Vocabulary size is {}".format(args.vocab_size)) train_target_scores = np.array( [labels.index(c) for c in train_meta[target_col]], dtype=int) dev_target_scores = np.array( [labels.index(c) for c in dev_meta[target_col]], dtype=int) del target_col train_y, dev_y, output_units = get_targets_and_output_units( train_target_scores, dev_target_scores, args.method) optimizer, loss, metrics = get_compile_args(args) multi_task = args.aux_loss_weight > 0 if multi_task: assert not args.nli, "Both NLI and multi-task specified" lang_labels = sorted(train_meta.lang.unique()) train_y.append( to_categorical([lang_labels.index(l) for l in train_meta.lang])) dev_y.append( to_categorical([lang_labels.index(l) for l in dev_meta.lang])) output_units.append(len(lang_labels)) loss_weights = { AUX_OUTPUT_NAME: args.aux_loss_weight, OUTPUT_NAME: 1.0 - args.aux_loss_weight, } else: loss = loss[OUTPUT_NAME] metrics = metrics[OUTPUT_NAME] loss_weights = None model = build_model(args, output_units=output_units, num_pos=num_pos) model.summary() if args.vectors: init_pretrained_embs(model, args.vectors, w2i) model.compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights, metrics=metrics) # Context manager fails on Windows (can't open an open file again) temp_handle, weights_path = tempfile.mkstemp(suffix='.h5') val_y = dev_target_scores callbacks = [ F1Metrics(dev_x, val_y, weights_path, ranked=args.method == 'ranked') ] history = model.fit( train_x, train_y, epochs=args.epochs, batch_size=args.batch_size, callbacks=callbacks, validation_data=(dev_x, dev_y), verbose=2, ) model.load_weights(weights_path) os.close(temp_handle) os.remove(weights_path) predictions = get_predictions(model, dev_x, multi_task) true = dev_target_scores if args.method == 'classification': pred = np.argmax(predictions, axis=1) elif args.method == 'regression': # Round to integers and clip to score range highest_class = train_target_scores.max() pred = rescale_regression_results(predictions, highest_class).ravel() elif args.method == 'ranked': pred = K.eval(ranked_prediction(predictions)) try: if multi_task: multi_task_report(history.history, true, pred, labels) else: report(true, pred, labels) except Exception: pass if args.nli: name = 'rnn-nli' elif multi_task: name = 'rnn-multi' else: name = 'rnn' name = get_file_name(name) if args.save_model: save_model(name, model, w2i) save_results(name, args.__dict__, history.history, true, pred) plt.show()
def main(): args = parse_args() set_reproducible(args.seed_delta) do_classification = args.method == 'classification' train_meta = load_split('train', round_cefr=args.round_cefr) dev_meta = load_split('dev', round_cefr=args.round_cefr) kind = args.featuretype train_x, dev_x, num_features = preprocess( kind, args.max_features, train_meta, dev_meta ) target_col = 'lang' if args.nli else 'cefr' labels = sorted(train_meta[target_col].unique()) train_target_scores = np.array( [labels.index(c) for c in train_meta[target_col]], dtype=int ) dev_target_scores = np.array( [labels.index(c) for c in dev_meta[target_col]], dtype=int ) train_y, dev_y, output_units = get_targets_and_output_units( train_target_scores, dev_target_scores, args.method ) multi_task = args.aux_loss_weight > 0 if multi_task: assert not args.nli, "Both NLI and multi-task specified" lang_labels = sorted(train_meta.lang.unique()) train_y.append(to_categorical([lang_labels.index(l) for l in train_meta.lang])) dev_y.append(to_categorical([lang_labels.index(l) for l in dev_meta.lang])) output_units.append(len(lang_labels)) loss_weights = { AUX_OUTPUT_NAME: args.aux_loss_weight, OUTPUT_NAME: 1.0 - args.aux_loss_weight, } else: loss_weights = None del train_meta, dev_meta model = build_model(num_features, output_units, do_classification) model.summary() optimizer, loss, metrics = get_compile_args(args.method, args.lr) model.compile( optimizer=optimizer, loss=loss, loss_weights=loss_weights, metrics=metrics ) # Context manager fails on Windows (can't open an open file again) temp_handle, weights_path = tempfile.mkstemp(suffix='.h5') val_y = dev_target_scores callbacks = [F1Metrics(dev_x, val_y, weights_path, ranked=args.method == 'ranked')] history = model.fit( train_x, train_y, epochs=args.epochs, batch_size=args.batch_size, callbacks=callbacks, validation_data=(dev_x, dev_y), verbose=2, ) model.load_weights(weights_path) os.close(temp_handle) os.remove(weights_path) true = dev_target_scores if multi_task: predictions = model.predict(dev_x)[0] else: predictions = model.predict(dev_x) if args.method == 'classification': pred = np.argmax(predictions, axis=1) elif args.method == 'regression': # Round to integers and clip to score range highest_class = train_target_scores.max() pred = rescale_regression_results(predictions, highest_class).ravel() elif args.method == 'ranked': pred = K.eval(ranked_prediction(predictions)) if multi_task: multi_task_report(history.history, true, pred, labels) else: report(true, pred, labels) plt.show() prefix = 'mlp_%s' % args.featuretype fname = get_file_name(prefix) save_results(fname, args.__dict__, history.history, true, pred) if args.save_model: save_model(fname, model, None)