def custom_fit(rank, lrows, lcols, modelfile, perplexity=False, train_file="", target_file=""): # Params : model = keras.models.load_model(modelfile) nalpha = int(model.output.shape[1])-2 quiet = False # Préparations : pr(quiet, "Enumeration of prefixes and suffixes...") ligs, cols = enumerate_words(nalpha, lrows, lcols) pr(quiet, "Building of hankel matrices...") lhankels = hankels(model, ligs, cols) # lhankels = hankels_para(model, ligs, cols) spectral_estimator = sp.Spectral(rank=rank, lrows=lrows, lcolumns=lcols, version='classic', partial=True, sparse=False, smooth_method='none', mode_quiet=quiet) # Les doigts dans la prise ! pr(quiet, "Custom fit ...") spectral_estimator._hankel = sp.Hankel(sample_instance=None, lrows=lrows, lcolumns=lcols, version='classic', partial=True, sparse=False, mode_quiet=quiet, lhankel=lhankels) # noinspection PyProtectedMember spectral_estimator._automaton = spectral_estimator._hankel.to_automaton(rank, quiet) # OK on a du a peu près rattraper l'état après fit. pr(quiet, "... Done !") # Perplexity : if perplexity: print("Perplexity :") epsilon = 0.0001 x_test = parse.parse_fullwords(train_file) x_test_sp = spparse.load_data_sample(train_file) y_test = parse.parse_pautomac_results(target_file) perp_proba_rnn = fix_probas(proba_words_para(model, x_test, nalpha, asdict=False, quiet=False), f=epsilon) perp_proba_spec = fix_probas(spectral_estimator.predict(x_test_sp.data), f=epsilon) test_perp = scores.pautomac_perplexity(y_test, y_test) rnn_perp = scores.pautomac_perplexity(y_test, perp_proba_rnn) extract_perp = scores.pautomac_perplexity(y_test, perp_proba_spec) test_rnn_kl = scores.kullback_leibler(y_test, perp_proba_rnn) rnn_extr_kl = scores.kullback_leibler(perp_proba_rnn, perp_proba_spec) test_extr_kl = scores.kullback_leibler(y_test, perp_proba_spec) print("\tTest :\t{0}\n\tRNN :\t{1}\n\tExtr :\t{2}" .format(test_perp, rnn_perp, extract_perp)) print("KL Divergence :") print("\tTest-RNN :\t{0}\n\tRNN-Extr :\t{1}\n\tTest-Extr :\t{2}" .format(test_rnn_kl, rnn_extr_kl, test_extr_kl)) # return spectral_estimator
def rank_dependent_metrics(self): """Metrics involving the extracted automaton depend on the rank""" rank = self.last_extr_aut.nbS self.ranks.append(rank) print("Metrics for rank {0} :".format(rank)) self.y_test_extr = [self.last_extr_aut.val(w) for w in self.x_test] self.y_rnnw_extr = [self.last_extr_aut.val(w) for w in self.x_rnnw] self.y_test_extr_prefixes = proba_all_prefixes_aut( self.last_extr_aut, self.x_test) self.y_rnnw_extr_prefixes = proba_all_prefixes_aut( self.last_extr_aut, self.x_rnnw) self.kld_test_rnn_extr = scores.kullback_leibler( self.y_test_rnn, self.fix_probas(self.y_test_extr)) self.ndcg1_test_rnn_extr = scores.ndcg( self.x_test, self.rnn_model, self.last_extr_aut, ndcg_l=1, dic_ref=self.y_test_rnn_prefixes, dic_approx=self.y_test_extr_prefixes) self.ndcg1_rnnw_rnn_extr = scores.ndcg( self.x_rnnw, self.rnn_model, self.last_extr_aut, ndcg_l=1, dic_ref=self.y_rnnw_rnn_prefixes, dic_approx=self.y_rnnw_extr_prefixes) self.ndcg5_test_rnn_extr = scores.ndcg( self.x_test, self.rnn_model, self.last_extr_aut, ndcg_l=5, dic_ref=self.y_test_rnn_prefixes, dic_approx=self.y_test_extr_prefixes) self.ndcg5_rnnw_rnn_extr = scores.ndcg( self.x_rnnw, self.rnn_model, self.last_extr_aut, ndcg_l=5, dic_ref=self.y_rnnw_rnn_prefixes, dic_approx=self.y_rnnw_extr_prefixes) t, e = scores.wer_aut(self.last_extr_aut, self.x_test) self.wer_test_extr = e / t t, e = scores.wer_aut(self.last_extr_aut, self.x_rnnw) self.wer_rnnw_extr = e / t self.eps_test_zeros_extr = len( [x for x in self.y_test_extr if x <= 0.0]) / len(self.y_test_extr) self.eps_rnnw_zeros_extr = len( [x for x in self.y_rnnw_extr if x <= 0.0]) / len(self.y_rnnw_extr) self.perprnn_test_extr = scores.pautomac_perplexity( self.y_test_rnn, self.fix_probas(self.y_test_extr)) self.perprnn_rnnw_extr = scores.pautomac_perplexity( self.y_rnnw_rnn, self.fix_probas(self.y_rnnw_extr)) if self.metrics_calc_level > 1: self.y_rand_extr = [self.last_extr_aut.val(w) for w in self.x_rand] self.perp_test_extr = scores.pautomac_perplexity( self.y_test_target, self.fix_probas(self.y_test_extr)) self.kld_test_target_extr = scores.kullback_leibler( self.y_test_target, self.fix_probas(self.y_test_extr)) self.ndcg1_test_target_extr = scores.ndcg( self.x_test, self.true_automaton, self.last_extr_aut, ndcg_l=1, dic_ref=self.y_test_target_prefixes, dic_approx=self.y_test_extr_prefixes) self.ndcg5_test_target_extr = scores.ndcg( self.x_test, self.true_automaton, self.last_extr_aut, ndcg_l=5, dic_ref=self.y_test_target_prefixes, dic_approx=self.y_test_extr_prefixes) self.perp_rand_extr = scores.pautomac_perplexity( self.y_rand_target, self.fix_probas(self.y_rand_extr)) self.kld_rand_rnn_extr = scores.kullback_leibler( self.fix_probas(self.y_rand_rnn), self.fix_probas(self.y_rand_extr)) self.kld_rand_extr_rnn = scores.kullback_leibler( self.y_rand_extr, self.fix_probas(self.y_rand_rnn)) self.kld_rand_target_extr = scores.kullback_leibler( self.y_rand_target, self.fix_probas(self.y_rand_extr)) self.eps_kl_rand_target_extr = neg_zero(self.y_rand_extr, self.y_rand_target) self.eps_rand_zeros_extr = len([ x for x in self.y_rand_extr if x <= 0.0 ]) / len(self.y_rand_extr) # self.l2dis_target_extr = scores.l2dist(self.true_automaton, extr_aut, l2dist_method="gramian") # pr(self.quiet, "\tEvaluating words and prefixes...") # pr(self.quiet, "\tRank-dependent metrics...") self.metrics[(rank, "perp-test-extr")] = self.perp_test_extr self.metrics[(rank, "perp-test-extr-eps")] = self.eps_test_zeros_extr self.metrics[(rank, "perp-rand-extr")] = self.perp_rand_extr self.metrics[(rank, "perp-rand-extr-eps")] = self.eps_rand_zeros_extr self.metrics[(rank, "kld-test-rnn-extr")] = self.kld_test_rnn_extr self.metrics[(rank, "kld-test-rnn-extr-eps")] = self.eps_test_zeros_extr self.metrics[(rank, "kld-test-target-extr")] = self.kld_test_target_extr self.metrics[(rank, "kld-test-target-extr-eps")] = self.eps_test_zeros_extr self.metrics[(rank, "kld-rand-rnn-extr")] = self.kld_rand_rnn_extr self.metrics[(rank, "kld-rand-rnn-extr-eps")] = self.eps_rand_zeros_extr self.metrics[(rank, "kld-rand-extr-rnn")] = self.kld_rand_extr_rnn self.metrics[(rank, "kld-rand-target-extr")] = self.kld_rand_target_extr self.metrics[(rank, "kld-rand-target-extr-eps")] = self.eps_rand_zeros_extr self.metrics[(rank, "(1-wer)-test-extr")] = ( 1 - self.wer_test_extr if self.wer_test_extr is not None else None) self.metrics[(rank, "(1-wer)-rnnw-extr")] = ( 1 - self.wer_rnnw_extr if self.wer_rnnw_extr is not None else None) self.metrics[(rank, "ndcg1-test-rnn-extr")] = self.ndcg1_test_rnn_extr self.metrics[(rank, "ndcg1-test-target-extr")] = self.ndcg1_test_target_extr self.metrics[(rank, "ndcg1-rnnw-rnn-extr")] = self.ndcg1_rnnw_rnn_extr self.metrics[(rank, "ndcg5-test-rnn-extr")] = self.ndcg5_test_rnn_extr self.metrics[(rank, "ndcg5-test-target-extr")] = self.ndcg5_test_target_extr self.metrics[(rank, "ndcg5-rnnw-rnn-extr")] = self.ndcg5_rnnw_rnn_extr # self.metrics[(rank, "l2dis-target-extr")] = self.l2dis_target_extr self.metrics[(rank, "perprnn-test-rnn")] = self.perprnn_test_rnn self.metrics[(rank, "perprnn-test-extr-eps")] = self.eps_test_zeros_extr self.metrics[(rank, "perprnn-test-extr")] = self.perprnn_test_extr self.metrics[(rank, "perprnn-rnnw-rnn")] = self.perprnn_rnnw_rnn self.metrics[(rank, "perprnn-rnnw-extr-eps")] = self.eps_rnnw_zeros_extr self.metrics[(rank, "perprnn-rnnw-extr")] = self.perprnn_rnnw_extr
def rank_independent_metrics(self): """Metrics between RNN and target are computed only once, as they are rank independent""" pr(0, self.quiet, "Rank independent metrics :") self.x_test, _ = trainer.parse(self.metrics_test_set) self.x_rnnw = self.gen_with_rnn(nb=self.randwords_nb) self.y_test_rnn_prefixes = proba_all_prefixes_rnn(self.rnn_model, self.x_test, bsize=self.batch_vol, quiet=self.quiet, device=self.device) self.y_test_rnn, t, e = self.proba_words_normal( self.x_test, asdict=False, wer=True, prefixes_dict=self.y_test_rnn_prefixes) self.wer_test_rnn = e / t self.y_rnnw_rnn_prefixes = proba_all_prefixes_rnn(self.rnn_model, self.x_rnnw, bsize=self.batch_vol, quiet=self.quiet, device=self.device) self.y_rnnw_rnn, t, e = self.proba_words_normal( self.x_rnnw, asdict=False, wer=True, prefixes_dict=self.y_rnnw_rnn_prefixes) self.wer_rnnw_rnn = e / t # self.perprnn_test_rnn = scores.pautomac_perplexity( self.y_test_rnn, self.y_test_rnn) self.perprnn_rnnw_rnn = scores.pautomac_perplexity( self.y_rnnw_rnn, self.y_rnnw_rnn) if self.metrics_calc_level > 1: self.true_automaton = sp.Automaton.load_Pautomac_Automaton( self.metrics_model) self.x_rand = self.aut_rand_words(self.randwords_nb, self.rand_temperature) self.y_test_target = [ self.true_automaton.val(w) for w in self.x_test ] self.y_test_target_prefixes = proba_all_prefixes_aut( self.true_automaton, self.x_test) # noinspection PyTypeChecker self.y_rand_target = [ self.true_automaton.val(w) for w in self.x_rand ] self.y_rand_rnn = self.proba_words_normal(self.x_rand, asdict=False) t, e = scores.wer_aut(self.true_automaton, self.x_test) self.wer_test_target = e / t self.perp_test_target = scores.pautomac_perplexity( self.y_test_target, self.y_test_target) self.perp_test_rnn = scores.pautomac_perplexity( self.y_test_target, self.y_test_rnn) self.perp_rand_target = scores.pautomac_perplexity( self.y_rand_target, self.fix_probas(self.y_rand_target)) self.perp_rand_rnn = scores.pautomac_perplexity( self.y_rand_target, self.fix_probas(self.y_rand_rnn)) self.kld_test_target_rnn = scores.kullback_leibler( self.y_test_target, self.y_test_rnn) self.kld_rand_target_rnn = scores.kullback_leibler( self.y_rand_target, self.fix_probas(self.y_rand_rnn)) self.ndcg1_test_target_rnn = scores.ndcg( self.x_test, self.true_automaton, self.rnn_model, ndcg_l=1, dic_ref=self.y_test_target_prefixes, dic_approx=self.y_test_rnn_prefixes) self.ndcg5_test_target_rnn = scores.ndcg( self.x_test, self.true_automaton, self.rnn_model, ndcg_l=5, dic_ref=self.y_test_target_prefixes, dic_approx=self.y_test_rnn_prefixes) self.eps_rand_zeros_target = len([ x for x in self.y_rand_target if x <= 0.0 ]) / len(self.y_rand_target) self.eps_rand_zeros_rnn = len( [x for x in self.y_rand_rnn if x <= 0.0]) / len(self.y_rand_rnn) self.eps_kl_rand_target_rnn = neg_zero(self.y_rand_rnn, self.y_rand_target) self.metrics[(-1, "perp-test-target")] = self.perp_test_target self.metrics[(-1, "perp-test-rnn")] = self.perp_test_rnn self.metrics[(-1, "perp-rand-target")] = self.perp_rand_target self.metrics[(-1, "perp-rand-target-eps")] = self.eps_rand_zeros_target self.metrics[(-1, "perp-rand-rnn")] = self.perp_rand_rnn self.metrics[(-1, "perp-rand-rnn-eps")] = self.eps_rand_zeros_rnn self.metrics[(-1, "kld-test-target-rnn")] = self.kld_test_target_rnn self.metrics[(-1, "kld-rand-target-rnn")] = self.kld_rand_target_rnn self.metrics[(-1, "kld-rand-target-rnn-eps")] = self.eps_kl_rand_target_rnn self.metrics[(-1, "(1-wer)-test-target")] = self.wer_test_target self.metrics[(-1, "(1-wer)-test-rnn")] = ( 1 - self.wer_test_rnn if self.wer_test_rnn is not None else None) self.metrics[(-1, "(1-wer)-rnnw-rnn")] = ( 1 - self.wer_rnnw_rnn if self.wer_rnnw_rnn is not None else None) self.metrics[(-1, "ndcg1-test-target-rnn")] = self.ndcg1_test_target_rnn self.metrics[(-1, "ndcg5-test-target-rnn")] = self.ndcg5_test_target_rnn # self.metrics[(-1, "perprnn-test-rnn")] = self.perprnn_test_rnn self.metrics[(-1, "perprnn-rnnw-rnn")] = self.perprnn_rnnw_rnn
def custom_fit(rank, lrows, lcols, modelfile, perplexity=False, train_file="", target_file=""): model = keras.models.load_model(modelfile) nalpha = int(model.output.shape[1])-2 # nalpha = 4 # train_file = "/home/nono/stage2018/rnntospectral/data/pautomac/4.pautomac.test" # target_file = "/home/nono/stage2018/rnntospectral/data/pautomac/4.pautomac_solution.txt" ### # Params : quiet = False partial = False # Préparations : if not quiet: print("Construction of set of words...") sys.stdout.flush() ligs, cols, lw = gen_words(nalpha, lrows, lcols) if not quiet: print("Prediction of probabilities of words...") sys.stdout.flush() probas = proba_words_para(model, lw, nalpha) if not quiet: print("Building of hankel matrices...") sys.stdout.flush() lhankels = hankels(ligs, cols, probas, nalpha) spectral_estimator = sp.Spectral(rank=rank, lrows=lrows, lcolumns=lcols, version='classic', partial=partial, sparse=False, smooth_method='none', mode_quiet=quiet) # Les doigts dans la prise ! if not quiet: print("Custom fit ...") sys.stdout.flush() spectral_estimator._hankel = sp.Hankel(sample_instance=None, lrows=lrows, lcolumns=lcols, version='classic', partial=partial, sparse=False, mode_quiet=quiet, lhankel=lhankels) # noinspection PyProtectedMember spectral_estimator._automaton = spectral_estimator._hankel.to_automaton(rank, quiet) # OK on a du a peu près rattraper l'état après fit. if not quiet: print("... Done !") sys.stdout.flush() # Perplexity : if perplexity: print("Perplexity :") epsilon = 0.0001 x_test = parse.parse_fullwords(train_file) x_test_sp = spparse.load_data_sample(train_file) y_test = parse.parse_pautomac_results(target_file) perp_proba_rnn = fix_probas(proba_words_para(model, x_test, nalpha, asdict=False, quiet=False), f=epsilon) perp_proba_spec = fix_probas(spectral_estimator.predict(x_test_sp.data), f=epsilon) test_perp = scores.pautomac_perplexity(y_test, y_test) rnn_perp = scores.pautomac_perplexity(y_test, perp_proba_rnn) extract_perp = scores.pautomac_perplexity(y_test, perp_proba_spec) test_rnn_kl = scores.kullback_leibler(y_test, perp_proba_rnn) rnn_extr_kl = scores.kullback_leibler(perp_proba_rnn, perp_proba_spec) test_extr_kl = scores.kullback_leibler(y_test, perp_proba_spec) print("\tTest :\t{0}\n\tRNN :\t{1}\n\tExtr :\t{2}" .format(test_perp, rnn_perp, extract_perp)) print("KL Divergence :") print("\tTest-RNN :\t{0}\n\tRNN-Extr :\t{1}\n\tTest-Extr :\t{2}" .format(test_rnn_kl, rnn_extr_kl, test_extr_kl)) # return spectral_estimator
def trainf(train_file, wid, sample, neurons, epochs, batch, pautomac=False, pautomac_test_file="", pautomac_sol_file="", layer=1): # None things : x_val = None y_val = None pautomac_test = None pautomac_sol = None pautomac_perp = [] losses = [] val_losses = [] # OK : nalpha, x_train, y_train = parse.parse_train(train_file, wid, padbefore=True) print(sample) if -1 < sample < len(x_train): x_train, y_train = parse.random_sample(x_train, y_train, sample) print(x_train.shape) if pautomac: pautomac_test = parse.parse_fullwords(pautomac_test_file) pautomac_sol = parse.parse_pautomac_results(pautomac_sol_file) _, x_val, y_val = parse.parse_train(pautomac_test_file, wid, padbefore=True) if -1 < sample < len(x_train): x_val, y_val = parse.random_sample(x_val, y_val, sample) model = keras.models.Sequential() model.add( keras.layers.Embedding(nalpha + 3, 4 * nalpha, input_shape=x_train[0].shape, mask_zero=True)) model.add( nono_layer(layer, units=neurons, return_sequences=True, dropout=0.15)) model.add(keras.layers.Activation('tanh')) model.add(nono_layer(layer, units=neurons)) model.add(keras.layers.Activation('tanh')) model.add(keras.layers.Dense(int(neurons / 2))) model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(len(y_train[0]))) model.add(keras.layers.Activation('softmax')) # mmdi = keras.models.Model(inputs=inp, outputs=mbed) print(model.summary()) model.compile(optimizer=(keras.optimizers.rmsprop()), loss="categorical_crossentropy", metrics=['categorical_accuracy']) if pautomac: print("Pautomac base perplexity : {0}".format( scores.pautomac_perplexity(pautomac_sol, pautomac_sol))) for i in range(1, epochs + 1): h = model.fit(x_train, y_train, batch, 1) model.save(modelname + "-" + str(i)) losses.append(h.history["loss"][0]) if pautomac: val_losses.append(model.evaluate(x_val, y_val, 2048)) pautomac_perp.append( scores.pautomac_perplexity( pautomac_sol, spextractor_common.proba_words_2(model, pautomac_test, asdict=False, quiet=True))) if pautomac: for e in range(i): print("Loss at epoch {0} : {1} on train, {2} on validation". format(e + 1, losses[e], val_losses[e])) sys.stdout.flush() for e in range(i): print("Perplexity at epoch {0} : {1}".format( e + 1, pautomac_perp[e])) sys.stdout.flush() else: for e in range(i): print("Loss at epoch {0} : {1} on train".format( e + 1, losses[e])) sys.stdout.flush()