Beispiel #1
0
def custom_fit(rank, lrows, lcols, modelfile, perplexity=False, train_file="", target_file=""):
    # Params :
    model = keras.models.load_model(modelfile)
    nalpha = int(model.output.shape[1])-2
    quiet = False
    # Préparations :
    pr(quiet, "Enumeration of prefixes and suffixes...")
    ligs, cols = enumerate_words(nalpha, lrows, lcols)
    pr(quiet, "Building of hankel matrices...")
    lhankels = hankels(model, ligs, cols)
    # lhankels = hankels_para(model, ligs, cols)
    spectral_estimator = sp.Spectral(rank=rank, lrows=lrows, lcolumns=lcols,
                                     version='classic', partial=True, sparse=False,
                                     smooth_method='none', mode_quiet=quiet)
    # Les doigts dans la prise !
    pr(quiet, "Custom fit ...")
    spectral_estimator._hankel = sp.Hankel(sample_instance=None, lrows=lrows, lcolumns=lcols,
                                           version='classic', partial=True, sparse=False,
                                           mode_quiet=quiet, lhankel=lhankels)
    # noinspection PyProtectedMember
    spectral_estimator._automaton = spectral_estimator._hankel.to_automaton(rank, quiet)
    # OK on a du a peu près rattraper l'état après fit.
    pr(quiet, "... Done !")
    # Perplexity :
    if perplexity:
        print("Perplexity :")
        epsilon = 0.0001

        x_test = parse.parse_fullwords(train_file)
        x_test_sp = spparse.load_data_sample(train_file)
        y_test = parse.parse_pautomac_results(target_file)

        perp_proba_rnn = fix_probas(proba_words_para(model, x_test, nalpha, asdict=False, quiet=False), f=epsilon)
        perp_proba_spec = fix_probas(spectral_estimator.predict(x_test_sp.data), f=epsilon)
        test_perp = scores.pautomac_perplexity(y_test, y_test)
        rnn_perp = scores.pautomac_perplexity(y_test, perp_proba_rnn)
        extract_perp = scores.pautomac_perplexity(y_test, perp_proba_spec)

        test_rnn_kl = scores.kullback_leibler(y_test, perp_proba_rnn)
        rnn_extr_kl = scores.kullback_leibler(perp_proba_rnn, perp_proba_spec)
        test_extr_kl = scores.kullback_leibler(y_test, perp_proba_spec)

        print("\tTest :\t{0}\n\tRNN :\t{1}\n\tExtr :\t{2}"
              .format(test_perp, rnn_perp, extract_perp))
        print("KL Divergence :")
        print("\tTest-RNN :\t{0}\n\tRNN-Extr :\t{1}\n\tTest-Extr :\t{2}"
              .format(test_rnn_kl, rnn_extr_kl, test_extr_kl))
    #
    return spectral_estimator
Beispiel #2
0
    def rank_dependent_metrics(self):
        """Metrics involving the extracted automaton depend on the rank"""
        rank = self.last_extr_aut.nbS
        self.ranks.append(rank)
        print("Metrics for rank {0} :".format(rank))
        self.y_test_extr = [self.last_extr_aut.val(w) for w in self.x_test]
        self.y_rnnw_extr = [self.last_extr_aut.val(w) for w in self.x_rnnw]
        self.y_test_extr_prefixes = proba_all_prefixes_aut(
            self.last_extr_aut, self.x_test)
        self.y_rnnw_extr_prefixes = proba_all_prefixes_aut(
            self.last_extr_aut, self.x_rnnw)
        self.kld_test_rnn_extr = scores.kullback_leibler(
            self.y_test_rnn, self.fix_probas(self.y_test_extr))
        self.ndcg1_test_rnn_extr = scores.ndcg(
            self.x_test,
            self.rnn_model,
            self.last_extr_aut,
            ndcg_l=1,
            dic_ref=self.y_test_rnn_prefixes,
            dic_approx=self.y_test_extr_prefixes)
        self.ndcg1_rnnw_rnn_extr = scores.ndcg(
            self.x_rnnw,
            self.rnn_model,
            self.last_extr_aut,
            ndcg_l=1,
            dic_ref=self.y_rnnw_rnn_prefixes,
            dic_approx=self.y_rnnw_extr_prefixes)
        self.ndcg5_test_rnn_extr = scores.ndcg(
            self.x_test,
            self.rnn_model,
            self.last_extr_aut,
            ndcg_l=5,
            dic_ref=self.y_test_rnn_prefixes,
            dic_approx=self.y_test_extr_prefixes)
        self.ndcg5_rnnw_rnn_extr = scores.ndcg(
            self.x_rnnw,
            self.rnn_model,
            self.last_extr_aut,
            ndcg_l=5,
            dic_ref=self.y_rnnw_rnn_prefixes,
            dic_approx=self.y_rnnw_extr_prefixes)
        t, e = scores.wer_aut(self.last_extr_aut, self.x_test)
        self.wer_test_extr = e / t
        t, e = scores.wer_aut(self.last_extr_aut, self.x_rnnw)
        self.wer_rnnw_extr = e / t
        self.eps_test_zeros_extr = len(
            [x for x in self.y_test_extr if x <= 0.0]) / len(self.y_test_extr)
        self.eps_rnnw_zeros_extr = len(
            [x for x in self.y_rnnw_extr if x <= 0.0]) / len(self.y_rnnw_extr)
        self.perprnn_test_extr = scores.pautomac_perplexity(
            self.y_test_rnn, self.fix_probas(self.y_test_extr))
        self.perprnn_rnnw_extr = scores.pautomac_perplexity(
            self.y_rnnw_rnn, self.fix_probas(self.y_rnnw_extr))

        if self.metrics_calc_level > 1:
            self.y_rand_extr = [self.last_extr_aut.val(w) for w in self.x_rand]
            self.perp_test_extr = scores.pautomac_perplexity(
                self.y_test_target, self.fix_probas(self.y_test_extr))
            self.kld_test_target_extr = scores.kullback_leibler(
                self.y_test_target, self.fix_probas(self.y_test_extr))
            self.ndcg1_test_target_extr = scores.ndcg(
                self.x_test,
                self.true_automaton,
                self.last_extr_aut,
                ndcg_l=1,
                dic_ref=self.y_test_target_prefixes,
                dic_approx=self.y_test_extr_prefixes)
            self.ndcg5_test_target_extr = scores.ndcg(
                self.x_test,
                self.true_automaton,
                self.last_extr_aut,
                ndcg_l=5,
                dic_ref=self.y_test_target_prefixes,
                dic_approx=self.y_test_extr_prefixes)
            self.perp_rand_extr = scores.pautomac_perplexity(
                self.y_rand_target, self.fix_probas(self.y_rand_extr))
            self.kld_rand_rnn_extr = scores.kullback_leibler(
                self.fix_probas(self.y_rand_rnn),
                self.fix_probas(self.y_rand_extr))
            self.kld_rand_extr_rnn = scores.kullback_leibler(
                self.y_rand_extr, self.fix_probas(self.y_rand_rnn))
            self.kld_rand_target_extr = scores.kullback_leibler(
                self.y_rand_target, self.fix_probas(self.y_rand_extr))
            self.eps_kl_rand_target_extr = neg_zero(self.y_rand_extr,
                                                    self.y_rand_target)
            self.eps_rand_zeros_extr = len([
                x for x in self.y_rand_extr if x <= 0.0
            ]) / len(self.y_rand_extr)
            # self.l2dis_target_extr = scores.l2dist(self.true_automaton, extr_aut, l2dist_method="gramian")

        # pr(self.quiet, "\tEvaluating words and prefixes...")
        # pr(self.quiet, "\tRank-dependent metrics...")

        self.metrics[(rank, "perp-test-extr")] = self.perp_test_extr
        self.metrics[(rank, "perp-test-extr-eps")] = self.eps_test_zeros_extr
        self.metrics[(rank, "perp-rand-extr")] = self.perp_rand_extr
        self.metrics[(rank, "perp-rand-extr-eps")] = self.eps_rand_zeros_extr
        self.metrics[(rank, "kld-test-rnn-extr")] = self.kld_test_rnn_extr
        self.metrics[(rank,
                      "kld-test-rnn-extr-eps")] = self.eps_test_zeros_extr
        self.metrics[(rank,
                      "kld-test-target-extr")] = self.kld_test_target_extr
        self.metrics[(rank,
                      "kld-test-target-extr-eps")] = self.eps_test_zeros_extr
        self.metrics[(rank, "kld-rand-rnn-extr")] = self.kld_rand_rnn_extr
        self.metrics[(rank,
                      "kld-rand-rnn-extr-eps")] = self.eps_rand_zeros_extr
        self.metrics[(rank, "kld-rand-extr-rnn")] = self.kld_rand_extr_rnn
        self.metrics[(rank,
                      "kld-rand-target-extr")] = self.kld_rand_target_extr
        self.metrics[(rank,
                      "kld-rand-target-extr-eps")] = self.eps_rand_zeros_extr
        self.metrics[(rank, "(1-wer)-test-extr")] = (
            1 - self.wer_test_extr if self.wer_test_extr is not None else None)
        self.metrics[(rank, "(1-wer)-rnnw-extr")] = (
            1 - self.wer_rnnw_extr if self.wer_rnnw_extr is not None else None)
        self.metrics[(rank, "ndcg1-test-rnn-extr")] = self.ndcg1_test_rnn_extr
        self.metrics[(rank,
                      "ndcg1-test-target-extr")] = self.ndcg1_test_target_extr
        self.metrics[(rank, "ndcg1-rnnw-rnn-extr")] = self.ndcg1_rnnw_rnn_extr
        self.metrics[(rank, "ndcg5-test-rnn-extr")] = self.ndcg5_test_rnn_extr
        self.metrics[(rank,
                      "ndcg5-test-target-extr")] = self.ndcg5_test_target_extr
        self.metrics[(rank, "ndcg5-rnnw-rnn-extr")] = self.ndcg5_rnnw_rnn_extr
        # self.metrics[(rank, "l2dis-target-extr")] = self.l2dis_target_extr
        self.metrics[(rank, "perprnn-test-rnn")] = self.perprnn_test_rnn
        self.metrics[(rank,
                      "perprnn-test-extr-eps")] = self.eps_test_zeros_extr
        self.metrics[(rank, "perprnn-test-extr")] = self.perprnn_test_extr
        self.metrics[(rank, "perprnn-rnnw-rnn")] = self.perprnn_rnnw_rnn
        self.metrics[(rank,
                      "perprnn-rnnw-extr-eps")] = self.eps_rnnw_zeros_extr
        self.metrics[(rank, "perprnn-rnnw-extr")] = self.perprnn_rnnw_extr
Beispiel #3
0
    def rank_independent_metrics(self):
        """Metrics between RNN and target are computed only once, as they are rank independent"""
        pr(0, self.quiet, "Rank independent metrics :")
        self.x_test, _ = trainer.parse(self.metrics_test_set)
        self.x_rnnw = self.gen_with_rnn(nb=self.randwords_nb)
        self.y_test_rnn_prefixes = proba_all_prefixes_rnn(self.rnn_model,
                                                          self.x_test,
                                                          bsize=self.batch_vol,
                                                          quiet=self.quiet,
                                                          device=self.device)

        self.y_test_rnn, t, e = self.proba_words_normal(
            self.x_test,
            asdict=False,
            wer=True,
            prefixes_dict=self.y_test_rnn_prefixes)
        self.wer_test_rnn = e / t
        self.y_rnnw_rnn_prefixes = proba_all_prefixes_rnn(self.rnn_model,
                                                          self.x_rnnw,
                                                          bsize=self.batch_vol,
                                                          quiet=self.quiet,
                                                          device=self.device)

        self.y_rnnw_rnn, t, e = self.proba_words_normal(
            self.x_rnnw,
            asdict=False,
            wer=True,
            prefixes_dict=self.y_rnnw_rnn_prefixes)

        self.wer_rnnw_rnn = e / t
        #
        self.perprnn_test_rnn = scores.pautomac_perplexity(
            self.y_test_rnn, self.y_test_rnn)
        self.perprnn_rnnw_rnn = scores.pautomac_perplexity(
            self.y_rnnw_rnn, self.y_rnnw_rnn)

        if self.metrics_calc_level > 1:
            self.true_automaton = sp.Automaton.load_Pautomac_Automaton(
                self.metrics_model)
            self.x_rand = self.aut_rand_words(self.randwords_nb,
                                              self.rand_temperature)
            self.y_test_target = [
                self.true_automaton.val(w) for w in self.x_test
            ]
            self.y_test_target_prefixes = proba_all_prefixes_aut(
                self.true_automaton, self.x_test)
            # noinspection PyTypeChecker
            self.y_rand_target = [
                self.true_automaton.val(w) for w in self.x_rand
            ]
            self.y_rand_rnn = self.proba_words_normal(self.x_rand,
                                                      asdict=False)
            t, e = scores.wer_aut(self.true_automaton, self.x_test)
            self.wer_test_target = e / t
            self.perp_test_target = scores.pautomac_perplexity(
                self.y_test_target, self.y_test_target)
            self.perp_test_rnn = scores.pautomac_perplexity(
                self.y_test_target, self.y_test_rnn)
            self.perp_rand_target = scores.pautomac_perplexity(
                self.y_rand_target, self.fix_probas(self.y_rand_target))
            self.perp_rand_rnn = scores.pautomac_perplexity(
                self.y_rand_target, self.fix_probas(self.y_rand_rnn))
            self.kld_test_target_rnn = scores.kullback_leibler(
                self.y_test_target, self.y_test_rnn)
            self.kld_rand_target_rnn = scores.kullback_leibler(
                self.y_rand_target, self.fix_probas(self.y_rand_rnn))
            self.ndcg1_test_target_rnn = scores.ndcg(
                self.x_test,
                self.true_automaton,
                self.rnn_model,
                ndcg_l=1,
                dic_ref=self.y_test_target_prefixes,
                dic_approx=self.y_test_rnn_prefixes)
            self.ndcg5_test_target_rnn = scores.ndcg(
                self.x_test,
                self.true_automaton,
                self.rnn_model,
                ndcg_l=5,
                dic_ref=self.y_test_target_prefixes,
                dic_approx=self.y_test_rnn_prefixes)
            self.eps_rand_zeros_target = len([
                x for x in self.y_rand_target if x <= 0.0
            ]) / len(self.y_rand_target)
            self.eps_rand_zeros_rnn = len(
                [x
                 for x in self.y_rand_rnn if x <= 0.0]) / len(self.y_rand_rnn)
            self.eps_kl_rand_target_rnn = neg_zero(self.y_rand_rnn,
                                                   self.y_rand_target)
        self.metrics[(-1, "perp-test-target")] = self.perp_test_target
        self.metrics[(-1, "perp-test-rnn")] = self.perp_test_rnn
        self.metrics[(-1, "perp-rand-target")] = self.perp_rand_target
        self.metrics[(-1, "perp-rand-target-eps")] = self.eps_rand_zeros_target
        self.metrics[(-1, "perp-rand-rnn")] = self.perp_rand_rnn
        self.metrics[(-1, "perp-rand-rnn-eps")] = self.eps_rand_zeros_rnn
        self.metrics[(-1, "kld-test-target-rnn")] = self.kld_test_target_rnn
        self.metrics[(-1, "kld-rand-target-rnn")] = self.kld_rand_target_rnn
        self.metrics[(-1,
                      "kld-rand-target-rnn-eps")] = self.eps_kl_rand_target_rnn
        self.metrics[(-1, "(1-wer)-test-target")] = self.wer_test_target
        self.metrics[(-1, "(1-wer)-test-rnn")] = (
            1 - self.wer_test_rnn if self.wer_test_rnn is not None else None)
        self.metrics[(-1, "(1-wer)-rnnw-rnn")] = (
            1 - self.wer_rnnw_rnn if self.wer_rnnw_rnn is not None else None)
        self.metrics[(-1,
                      "ndcg1-test-target-rnn")] = self.ndcg1_test_target_rnn
        self.metrics[(-1,
                      "ndcg5-test-target-rnn")] = self.ndcg5_test_target_rnn
        #
        self.metrics[(-1, "perprnn-test-rnn")] = self.perprnn_test_rnn
        self.metrics[(-1, "perprnn-rnnw-rnn")] = self.perprnn_rnnw_rnn
Beispiel #4
0
def custom_fit(rank, lrows, lcols, modelfile, perplexity=False, train_file="", target_file=""):
    model = keras.models.load_model(modelfile)
    nalpha = int(model.output.shape[1])-2
    # nalpha = 4
    # train_file = "/home/nono/stage2018/rnntospectral/data/pautomac/4.pautomac.test"
    # target_file = "/home/nono/stage2018/rnntospectral/data/pautomac/4.pautomac_solution.txt"
    ###
    # Params :
    quiet = False
    partial = False
    # Préparations :
    if not quiet:
        print("Construction of set of words...")
        sys.stdout.flush()
    ligs, cols, lw = gen_words(nalpha, lrows, lcols)
    if not quiet:
        print("Prediction of probabilities of words...")
        sys.stdout.flush()
    probas = proba_words_para(model, lw, nalpha)
    if not quiet:
        print("Building of hankel matrices...")
        sys.stdout.flush()
    lhankels = hankels(ligs, cols, probas, nalpha)
    spectral_estimator = sp.Spectral(rank=rank, lrows=lrows, lcolumns=lcols,
                                     version='classic', partial=partial, sparse=False,
                                     smooth_method='none', mode_quiet=quiet)
    # Les doigts dans la prise !
    if not quiet:
        print("Custom fit ...")
        sys.stdout.flush()
    spectral_estimator._hankel = sp.Hankel(sample_instance=None, lrows=lrows, lcolumns=lcols,
                                           version='classic', partial=partial, sparse=False,
                                           mode_quiet=quiet, lhankel=lhankels)
    # noinspection PyProtectedMember
    spectral_estimator._automaton = spectral_estimator._hankel.to_automaton(rank, quiet)
    # OK on a du a peu près rattraper l'état après fit.
    if not quiet:
        print("... Done !")
        sys.stdout.flush()
    # Perplexity :
    if perplexity:
        print("Perplexity :")
        epsilon = 0.0001

        x_test = parse.parse_fullwords(train_file)
        x_test_sp = spparse.load_data_sample(train_file)
        y_test = parse.parse_pautomac_results(target_file)

        perp_proba_rnn = fix_probas(proba_words_para(model, x_test, nalpha, asdict=False, quiet=False), f=epsilon)
        perp_proba_spec = fix_probas(spectral_estimator.predict(x_test_sp.data), f=epsilon)
        test_perp = scores.pautomac_perplexity(y_test, y_test)
        rnn_perp = scores.pautomac_perplexity(y_test, perp_proba_rnn)
        extract_perp = scores.pautomac_perplexity(y_test, perp_proba_spec)

        test_rnn_kl = scores.kullback_leibler(y_test, perp_proba_rnn)
        rnn_extr_kl = scores.kullback_leibler(perp_proba_rnn, perp_proba_spec)
        test_extr_kl = scores.kullback_leibler(y_test, perp_proba_spec)

        print("\tTest :\t{0}\n\tRNN :\t{1}\n\tExtr :\t{2}"
              .format(test_perp, rnn_perp, extract_perp))
        print("KL Divergence :")
        print("\tTest-RNN :\t{0}\n\tRNN-Extr :\t{1}\n\tTest-Extr :\t{2}"
              .format(test_rnn_kl, rnn_extr_kl, test_extr_kl))
    #
    return spectral_estimator
Beispiel #5
0
def trainf(train_file,
           wid,
           sample,
           neurons,
           epochs,
           batch,
           pautomac=False,
           pautomac_test_file="",
           pautomac_sol_file="",
           layer=1):
    # None things :
    x_val = None
    y_val = None
    pautomac_test = None
    pautomac_sol = None
    pautomac_perp = []
    losses = []
    val_losses = []
    # OK :

    nalpha, x_train, y_train = parse.parse_train(train_file,
                                                 wid,
                                                 padbefore=True)
    print(sample)
    if -1 < sample < len(x_train):
        x_train, y_train = parse.random_sample(x_train, y_train, sample)
    print(x_train.shape)
    if pautomac:
        pautomac_test = parse.parse_fullwords(pautomac_test_file)
        pautomac_sol = parse.parse_pautomac_results(pautomac_sol_file)
        _, x_val, y_val = parse.parse_train(pautomac_test_file,
                                            wid,
                                            padbefore=True)
        if -1 < sample < len(x_train):
            x_val, y_val = parse.random_sample(x_val, y_val, sample)

    model = keras.models.Sequential()
    model.add(
        keras.layers.Embedding(nalpha + 3,
                               4 * nalpha,
                               input_shape=x_train[0].shape,
                               mask_zero=True))
    model.add(
        nono_layer(layer, units=neurons, return_sequences=True, dropout=0.15))
    model.add(keras.layers.Activation('tanh'))
    model.add(nono_layer(layer, units=neurons))
    model.add(keras.layers.Activation('tanh'))
    model.add(keras.layers.Dense(int(neurons / 2)))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dense(len(y_train[0])))
    model.add(keras.layers.Activation('softmax'))

    # mmdi = keras.models.Model(inputs=inp, outputs=mbed)

    print(model.summary())

    model.compile(optimizer=(keras.optimizers.rmsprop()),
                  loss="categorical_crossentropy",
                  metrics=['categorical_accuracy'])
    if pautomac:
        print("Pautomac base perplexity : {0}".format(
            scores.pautomac_perplexity(pautomac_sol, pautomac_sol)))

    for i in range(1, epochs + 1):
        h = model.fit(x_train, y_train, batch, 1)
        model.save(modelname + "-" + str(i))
        losses.append(h.history["loss"][0])
        if pautomac:
            val_losses.append(model.evaluate(x_val, y_val, 2048))
            pautomac_perp.append(
                scores.pautomac_perplexity(
                    pautomac_sol,
                    spextractor_common.proba_words_2(model,
                                                     pautomac_test,
                                                     asdict=False,
                                                     quiet=True)))

        if pautomac:
            for e in range(i):
                print("Loss at epoch {0} : {1} on train, {2} on validation".
                      format(e + 1, losses[e], val_losses[e]))
                sys.stdout.flush()
            for e in range(i):
                print("Perplexity at epoch {0} : {1}".format(
                    e + 1, pautomac_perp[e]))
                sys.stdout.flush()
        else:
            for e in range(i):
                print("Loss at epoch {0} : {1} on train".format(
                    e + 1, losses[e]))
                sys.stdout.flush()