Python HAN.save_weights 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: keras_han.model

클래스/타입: HAN

메소드/함수: save_weights

hotexamples.com에서의 예제들: 6

Python HAN.save_weights - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 keras_han.model.HAN.save_weights에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

compile(11)

HAN(9)

fit(9)

predict(9)

summary(9)

save(6)

save_weights(6)

예제 #1

파일 보기

파일: cocube_utils_beta_imdb.py 프로젝트: dheeraj7596/metaguide

def train_classifier(df,
                     labels,
                     label_term_dict,
                     label_adult_dict,
                     label_actor_dict,
                     label_actress_dict,
                     label_producer_dict,
                     label_writer_dict,
                     label_director_dict,
                     label_composer_dict,
                     label_cinematographer_dict,
                     label_editor_dict,
                     label_prod_designer_dict,
                     label_dir_adult_dict,
                     label_dir_actor_dict,
                     label_dir_actress_dict,
                     label_dir_producer_dict,
                     label_dir_writer_dict,
                     label_dir_composer_dict,
                     label_dir_cinematographer_dict,
                     label_dir_editor_dict,
                     label_dir_prod_designer_dict,
                     label_actor_actress_dict,
                     label_to_index, index_to_label, model_name, soft=False):
    basepath = "/data4/dheeraj/metaguide/"
    dataset = "imdb/"
    # glove_dir = basepath + "glove.6B"
    dump_dir = basepath + "models/" + dataset + model_name + "/"
    tmp_dir = basepath + "checkpoints/" + dataset + model_name + "/"
    os.makedirs(dump_dir, exist_ok=True)
    os.makedirs(tmp_dir, exist_ok=True)
    max_sentence_length = 100
    max_sentences = 15
    max_words = 20000
    embedding_dim = 100
    tokenizer = pickle.load(open(basepath + dataset + "tokenizer.pkl", "rb"))

    X, y, y_true = get_train_data(df,
                                  labels,
                                  label_term_dict,
                                  label_adult_dict,
                                  label_actor_dict,
                                  label_actress_dict,
                                  label_producer_dict,
                                  label_writer_dict,
                                  label_director_dict,
                                  label_composer_dict,
                                  label_cinematographer_dict,
                                  label_editor_dict,
                                  label_prod_designer_dict,
                                  label_dir_adult_dict,
                                  label_dir_actor_dict,
                                  label_dir_actress_dict,
                                  label_dir_producer_dict,
                                  label_dir_writer_dict,
                                  label_dir_composer_dict,
                                  label_dir_cinematographer_dict,
                                  label_dir_editor_dict,
                                  label_dir_prod_designer_dict,
                                  label_actor_actress_dict, tokenizer, label_to_index, soft=soft)
    print("****************** CLASSIFICATION REPORT FOR TRAINING DATA ********************")
    # df_train = create_training_df(X, y, y_true)
    # df_train.to_csv(basepath + dataset + "training_label.csv")
    if not soft:
        y_vec = make_one_hot(y, label_to_index)
        print(classification_report(y_true, y))
    else:
        y_vec = np.array(y)
        y_argmax = np.argmax(y, axis=-1)
        y_str = []
        for i in y_argmax:
            y_str.append(index_to_label[i])
        print(classification_report(y_true, y_str))
    # print("Fitting tokenizer...")
    # tokenizer = fit_get_tokenizer(X, max_words)
    print("Getting tokenizer")
    tokenizer = pickle.load(open(basepath + dataset + "tokenizer.pkl", "rb"))

    print("Splitting into train, dev...")
    X_train, y_train, X_val, y_val, _, _ = create_train_dev(X, labels=y_vec, tokenizer=tokenizer,
                                                            max_sentences=max_sentences,
                                                            max_sentence_length=max_sentence_length,
                                                            max_words=max_words, val=False)
    # print("Creating Embedding matrix...")
    # embedding_matrix = create_embedding_matrix(glove_dir, tokenizer, embedding_dim)
    print("Getting Embedding matrix...")
    embedding_matrix = pickle.load(open(basepath + dataset + "embedding_matrix.pkl", "rb"))
    print("Initializing model...")
    model = HAN(max_words=max_sentence_length, max_sentences=max_sentences, output_size=len(y_train[0]),
                embedding_matrix=embedding_matrix)
    print("Compiling model...")
    model.summary()
    if not soft:
        model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['acc'])
    else:
        model.compile(loss=kullback_leibler_divergence, optimizer='adam', metrics=['acc'])
    print("model fitting - Hierachical attention network...")
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
    mc = ModelCheckpoint(filepath=tmp_dir + 'model.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_acc', mode='max',
                         verbose=1, save_weights_only=True, save_best_only=True)
    model.fit(X_train, y_train, validation_data=(X_val, y_val), nb_epoch=100, batch_size=256, callbacks=[es, mc])
    # print("****************** CLASSIFICATION REPORT FOR DOCUMENTS WITH LABEL WORDS ********************")
    # X_label_all = prep_data(texts=X, max_sentences=max_sentences, max_sentence_length=max_sentence_length,
    #                         tokenizer=tokenizer)
    # pred = model.predict(X_label_all)
    # pred_labels = get_from_one_hot(pred, index_to_label)
    # print(classification_report(y_true, pred_labels))
    print("****************** CLASSIFICATION REPORT FOR All DOCUMENTS ********************")
    X_all = prep_data(texts=df["text"], max_sentences=max_sentences, max_sentence_length=max_sentence_length,
                      tokenizer=tokenizer)
    y_true_all = df["label"]
    pred = model.predict(X_all)
    pred_labels = get_from_one_hot(pred, index_to_label)
    print(classification_report(y_true_all, pred_labels))
    print("Dumping the model...")
    model.save_weights(dump_dir + "model_weights_" + model_name + ".h5")
    model.save(dump_dir + "model_" + model_name + ".h5")
    return pred_labels, pred

예제 #2

파일 보기

def train_classifier(df, labels, label_term_dict, label_author_dict, label_pub_dict, label_year_dict,
                     label_author_pub_dict, label_pub_year_dict, label_author_year_dict, label_to_index, index_to_label,
                     model_name, clf, use_gpu, old=True, soft=False):
    basepath = "/data4/dheeraj/metaguide/"
    dataset = "books/"
    # glove_dir = basepath + "glove.6B"
    dump_dir = basepath + "models/" + dataset + model_name + "/"
    tmp_dir = basepath + "checkpoints/" + dataset + model_name + "/"
    os.makedirs(dump_dir, exist_ok=True)
    os.makedirs(tmp_dir, exist_ok=True)
    max_sentence_length = 100
    max_sentences = 15
    max_words = 20000
    embedding_dim = 100
    tokenizer = pickle.load(open(basepath + dataset + "tokenizer.pkl", "rb"))

    if old:
        X, y, y_true = get_train_data(df, labels, label_term_dict, label_author_dict, label_pub_dict, label_year_dict,
                                      label_author_pub_dict, label_pub_year_dict, label_author_year_dict, tokenizer,
                                      label_to_index, soft=soft, clf=clf)
        if clf == "BERT":
            df_orig = pickle.load(open(basepath + dataset + "df.pkl", "rb"))
            X = list(df_orig.iloc[X]["text"])
    else:
        X, y, y_true = get_confident_train_data(df, labels, label_term_dict, label_author_dict, label_pub_dict,
                                                label_year_dict, label_author_pub_dict, label_pub_year_dict,
                                                label_author_year_dict, tokenizer)
    print("****************** CLASSIFICATION REPORT FOR TRAINING DATA ********************")
    # df_train = create_training_df(X, y, y_true)
    # df_train.to_csv(basepath + dataset + "training_label.csv")
    if not soft:
        y_vec = make_one_hot(y, label_to_index)
        print(classification_report(y_true, y))
    else:
        y_vec = np.array(y)
        y_argmax = np.argmax(y, axis=-1)
        y_str = []
        for i in y_argmax:
            y_str.append(index_to_label[i])
        print(classification_report(y_true, y_str))
    # print("Fitting tokenizer...")
    # tokenizer = fit_get_tokenizer(X, max_words)
    print("Getting tokenizer")
    tokenizer = pickle.load(open(basepath + dataset + "tokenizer.pkl", "rb"))

    # print("Creating Embedding matrix...")
    # embedding_matrix = create_embedding_matrix(glove_dir, tokenizer, embedding_dim)
    if clf == "HAN":
        print("Splitting into train, dev...")
        X_train, y_train, X_val, y_val, _, _ = create_train_dev(X, labels=y_vec, tokenizer=tokenizer,
                                                                max_sentences=max_sentences,
                                                                max_sentence_length=max_sentence_length,
                                                                max_words=max_words, val=False)
        print("Getting Embedding matrix...")
        embedding_matrix = pickle.load(open(basepath + dataset + "embedding_matrix.pkl", "rb"))
        print("Initializing model...")
        model = HAN(max_words=max_sentence_length, max_sentences=max_sentences, output_size=len(y_train[0]),
                    embedding_matrix=embedding_matrix)
        print("Compiling model...")
        model.summary()
        if not soft:
            model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['acc'])
        else:
            model.compile(loss=kullback_leibler_divergence, optimizer='adam', metrics=['acc'])
        print("model fitting - Hierachical attention network...")
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
        mc = ModelCheckpoint(filepath=tmp_dir + 'model.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_acc', mode='max',
                             verbose=1, save_weights_only=True, save_best_only=True)
        model.fit(X_train, y_train, validation_data=(X_val, y_val), nb_epoch=100, batch_size=256, callbacks=[es, mc])
        # print("****************** CLASSIFICATION REPORT FOR DOCUMENTS WITH LABEL WORDS ********************")
        # X_label_all = prep_data(texts=X, max_sentences=max_sentences, max_sentence_length=max_sentence_length,
        #                         tokenizer=tokenizer)
        # pred = model.predict(X_label_all)
        # pred_labels = get_from_one_hot(pred, index_to_label)
        # print(classification_report(y_true, pred_labels))
        print("****************** CLASSIFICATION REPORT FOR All DOCUMENTS ********************")
        X_all = prep_data(texts=df["text"], max_sentences=max_sentences, max_sentence_length=max_sentence_length,
                          tokenizer=tokenizer)
        y_true_all = df["label"]
        pred = model.predict(X_all)
        pred_labels = get_from_one_hot(pred, index_to_label)
        print("Dumping the model...")
        model.save_weights(dump_dir + "model_weights_" + model_name + ".h5")
        model.save(dump_dir + "model_" + model_name + ".h5")
    elif clf == "BERT":
        y_vec = []
        for lbl_ in y:
            y_vec.append(label_to_index[lbl_])
        model = train_bert(X, y_vec, use_gpu)

        y_true_all = []
        for lbl_ in df.label:
            y_true_all.append(label_to_index[lbl_])

        predictions = test(model, df_orig["text"], y_true_all, use_gpu)
        for i, p in enumerate(predictions):
            if i == 0:
                pred = p
            else:
                pred = np.concatenate((pred, p))

        pred_labels = []
        for p in pred:
            pred_labels.append(index_to_label[p.argmax(axis=-1)])
        y_true_all = df["label"]
    elif clf == "CNN":
        y_vec = []
        for lbl_ in y:
            y_vec.append(label_to_index[lbl_])

        y_true_all = []
        for lbl_ in df.label:
            y_true_all.append(label_to_index[lbl_])

        pred_idxs, pred, true_idxs = train_cnn(X, y_vec, df["text"], y_true_all, use_gpu)

        pred_labels = []
        for p in pred_idxs:
            pred_labels.append(index_to_label[p])

        y_true_all = []
        for p in true_idxs:
            y_true_all.append(index_to_label[p])
    else:
        raise ValueError("clf can only be HAN or BERT or CNN")
    print(classification_report(y_true_all, pred_labels))
    return pred_labels, pred

예제 #3

파일 보기

                  metrics=['acc'])
    print("model fitting - Hierachical attention network...")

    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
    mc = ModelCheckpoint(filepath=tmp_dir +
                         'model.{epoch:02d}-{val_loss:.2f}.hdf5',
                         monitor='val_acc',
                         mode='max',
                         verbose=1,
                         save_weights_only=True,
                         save_best_only=True)

    model.fit(X_train,
              y_train,
              validation_data=(X_val, y_val),
              nb_epoch=100,
              batch_size=256,
              callbacks=[es, mc])

    print(
        "****************** CLASSIFICATION REPORT ON TEST DATA ********************"
    )
    pred = model.predict(X_test)
    pred_labels = get_from_one_hot(pred, index_to_label)
    true_labels = get_from_one_hot(y_test, index_to_label)
    print(classification_report(true_labels, pred_labels))

    print("Dumping the model...")
    model.save_weights(dump_dir + "model_weights_" + model_name + ".h5")
    model.save(dump_dir + "model_" + model_name + ".h5")

예제 #4

파일 보기

파일: train.py 프로젝트: mayoyamasaki/ConWea

    def train_classifier(df, labels, label_term_dict, label_to_index,
                         index_to_label, dataset_path):
        print("Going to train classifier..")
        basepath = dataset_path
        model_name = "conwea"
        dump_dir = basepath + "models/" + model_name + "/"
        tmp_dir = basepath + "checkpoints/" + model_name + "/"
        os.makedirs(dump_dir, exist_ok=True)
        os.makedirs(tmp_dir, exist_ok=True)
        max_sentence_length = 100
        max_sentences = 15
        max_words = 20000
        tokenizer = pickle.load(open(dataset_path + "tokenizer.pkl", "rb"))

        X, y, y_true = generate_pseudo_labels(df, labels, label_term_dict,
                                              tokenizer)
        y_one_hot = make_one_hot(y, label_to_index)
        print("Fitting tokenizer...")
        print("Splitting into train, dev...")
        X_train, y_train, X_val, y_val = create_train_dev(
            X,
            labels=y_one_hot,
            tokenizer=tokenizer,
            max_sentences=max_sentences,
            max_sentence_length=max_sentence_length,
            max_words=max_words)
        print("Creating Embedding matrix...")
        embedding_matrix = pickle.load(
            open(dataset_path + "embedding_matrix.pkl", "rb"))
        print("Initializing model...")
        model = HAN(max_words=max_sentence_length,
                    max_sentences=max_sentences,
                    output_size=len(y_train[0]),
                    embedding_matrix=embedding_matrix)
        print("Compiling model...")
        model.summary()
        model.compile(loss="categorical_crossentropy",
                      optimizer='adam',
                      metrics=['acc'])
        print("model fitting - Hierachical attention network...")
        es = EarlyStopping(monitor='val_loss',
                           mode='min',
                           verbose=1,
                           patience=3)
        mc = ModelCheckpoint(filepath=tmp_dir +
                             'model.{epoch:02d}-{val_loss:.2f}.hdf5',
                             monitor='val_acc',
                             mode='max',
                             verbose=1,
                             save_weights_only=True,
                             save_best_only=True)
        model.fit(X_train,
                  y_train,
                  validation_data=(X_val, y_val),
                  epochs=100,
                  batch_size=256,
                  callbacks=[es, mc])
        print(
            "****************** CLASSIFICATION REPORT FOR All DOCUMENTS ********************"
        )
        X_all = prep_data(texts=df["sentence"],
                          max_sentences=max_sentences,
                          max_sentence_length=max_sentence_length,
                          tokenizer=tokenizer)
        y_true_all = df["label"]
        pred = model.predict(X_all)
        pred_labels = get_from_one_hot(pred, index_to_label)
        print(classification_report(y_true_all, pred_labels))
        print("Dumping the model...")
        model.save_weights(dump_dir + "model_weights_" + model_name + ".h5")
        model.save(dump_dir + "model_" + model_name + ".h5")
        return pred_labels

예제 #5

파일 보기

def train_weight_classifier(df, labels, label_term_dict, label_author_dict, label_conf_dict, label_to_index,
                            index_to_label, model_name, AND=True):
    basepath = "/data4/dheeraj/metaguide/"
    dataset = "dblp/"
    # glove_dir = basepath + "glove.6B"
    dump_dir = basepath + "models/" + dataset + model_name + "/"
    tmp_dir = basepath + "checkpoints/" + dataset + model_name + "/"
    os.makedirs(dump_dir, exist_ok=True)
    os.makedirs(tmp_dir, exist_ok=True)
    max_sentence_length = 100
    max_sentences = 15
    max_words = 20000
    embedding_dim = 100
    tokenizer = pickle.load(open(basepath + dataset + "tokenizer.pkl", "rb"))

    X, y, y_true, weights = get_weighted_train_data(df, labels, label_term_dict, label_author_dict, label_conf_dict,
                                                    tokenizer, label_to_index, AND=AND)
    print("****************** CLASSIFICATION REPORT FOR TRAINING DATA ********************")
    # df_train = create_training_df(X, y, y_true)
    # df_train.to_csv(basepath + dataset + "training_label.csv")
    y_vec = make_one_hot(y, label_to_index)
    print(classification_report(y_true, y))
    # y = np.array(y)
    # print("Fitting tokenizer...")
    # tokenizer = fit_get_tokenizer(X, max_words)
    print("Getting tokenizer")
    tokenizer = pickle.load(open(basepath + dataset + "tokenizer.pkl", "rb"))

    print("Splitting into train, dev...")
    X_train, y_train, X_val, y_val, weights_train, _ = create_train_dev_weights(X, labels=y_vec,
                                                                                weights=weights,
                                                                                tokenizer=tokenizer,
                                                                                max_sentences=max_sentences,
                                                                                max_sentence_length=max_sentence_length,
                                                                                max_words=max_words)
    # print("Creating Embedding matrix...")
    # embedding_matrix = create_embedding_matrix(glove_dir, tokenizer, embedding_dim)
    print("Getting Embedding matrix...")
    embedding_matrix = pickle.load(open(basepath + dataset + "embedding_matrix.pkl", "rb"))
    print("Initializing model...")
    model = HAN(max_words=max_sentence_length, max_sentences=max_sentences, output_size=len(y_train[0]),
                embedding_matrix=embedding_matrix)
    print("Compiling model...")
    model.summary()
    model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['acc'])
    print("model fitting - Hierachical attention network...")
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
    mc = ModelCheckpoint(filepath=tmp_dir + 'model.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_acc', mode='max',
                         verbose=1, save_weights_only=True, save_best_only=True)
    model.fit(X_train, y_train, validation_data=(X_val, y_val), nb_epoch=100, batch_size=256, callbacks=[es, mc],
              sample_weight=np.array(weights_train))
    # print("****************** CLASSIFICATION REPORT FOR DOCUMENTS WITH LABEL WORDS ********************")
    # X_label_all = prep_data(texts=X, max_sentences=max_sentences, max_sentence_length=max_sentence_length,
    #                         tokenizer=tokenizer)
    # pred = model.predict(X_label_all)
    # pred_labels = get_from_one_hot(pred, index_to_label)
    # print(classification_report(y_true, pred_labels))
    print("****************** CLASSIFICATION REPORT FOR All DOCUMENTS ********************")
    X_all = prep_data(texts=df["abstract"], max_sentences=max_sentences, max_sentence_length=max_sentence_length,
                      tokenizer=tokenizer)
    y_true_all = df["label"]
    pred = model.predict(X_all)
    pred_labels = get_from_one_hot(pred, index_to_label)
    print(classification_report(y_true_all, pred_labels))
    print("Dumping the model...")
    model.save_weights(dump_dir + "model_weights_" + model_name + ".h5")
    model.save(dump_dir + "model_" + model_name + ".h5")
    return pred_labels, pred

예제 #6

파일 보기

파일: train.py 프로젝트: dheeraj7596/META

def train_classifier(df, tokenizer, embedding_matrix, labels,
                     motpat_label_motifs_dict, label_to_index, index_to_label,
                     index_word, dataset_path, config):
    def generate_pseudo_labels(df, labels, motpat_label_motifs_dict, tokenizer,
                               index_word, config):
        y = []
        X = []

        for index, row in df.iterrows():
            count_dict = {}
            flag = 0
            for mot_pat in motpat_label_motifs_dict:
                label_motifs_dict = motpat_label_motifs_dict[mot_pat]
                if len(label_motifs_dict) == 0:
                    continue
                if mot_pat == "phrase":
                    tokens = tokenizer.texts_to_sequences([row["text"]])[0]
                    words = []
                    for tok in tokens:
                        words.append(index_word[tok])
                    for l in labels:
                        if len(label_motifs_dict[l]) == 0:
                            continue
                        seed_words = set(label_motifs_dict[l].keys())
                        int_words = list(set(words).intersection(seed_words))
                        for word in int_words:
                            flag = 1
                            try:
                                count_dict[l] += label_motifs_dict[l][word]
                            except:
                                count_dict[l] = label_motifs_dict[l][word]
                else:
                    size = len(mot_pat)
                    if size == 1:
                        first = mot_pat[0]
                        entities = get_entity_from_col(row[first], first,
                                                       config)
                    elif size == 2:
                        first = mot_pat[0]
                        second = mot_pat[1]
                        first_ents = get_entity_from_col(
                            row[first], first, config)
                        second_ents = get_entity_from_col(
                            row[second], second, config)
                        if first == second:
                            entities = set(
                                itertools.combinations(first_ents, 2))
                        else:
                            entities = set(
                                itertools.product(first_ents, second_ents))
                    else:
                        raise Exception(
                            "Motif patterns of size more than 2 not yet handled but can be easily extended."
                        )
                    for l in labels:
                        if len(label_motifs_dict[l]) == 0:
                            continue
                        seed_entities = set(label_motifs_dict[l].keys())
                        int_ents = list(entities.intersection(seed_entities))
                        for ent in int_ents:
                            flag = 1
                            try:
                                count_dict[l] += label_motifs_dict[l][ent]
                            except:
                                count_dict[l] = label_motifs_dict[l][ent]

            if flag:
                lbl = max(count_dict, key=count_dict.get)
                if not lbl:
                    continue
                y.append(lbl)
                X.append(row["text"])
        return X, y

    basepath = dataset_path
    model_name = "meta"
    dump_dir = basepath + "models/" + model_name + "/"
    tmp_dir = basepath + "checkpoints/" + model_name + "/"
    os.makedirs(dump_dir, exist_ok=True)
    os.makedirs(tmp_dir, exist_ok=True)
    max_sentence_length = 100
    max_sentences = 15
    max_words = 20000

    print("Generating pseudo-labels", flush=True)
    X, y = generate_pseudo_labels(df, labels, motpat_label_motifs_dict,
                                  tokenizer, index_word, config)
    y_vec = make_one_hot(y, label_to_index)

    print("Splitting into train, dev...", flush=True)
    X_train, y_train, X_val, y_val = create_train_dev(
        X,
        labels=y_vec,
        tokenizer=tokenizer,
        max_sentences=max_sentences,
        max_sentence_length=max_sentence_length,
        max_words=max_words)

    print("Initializing model...", flush=True)
    model = HAN(max_words=max_sentence_length,
                max_sentences=max_sentences,
                output_size=len(y_train[0]),
                embedding_matrix=embedding_matrix)
    print("Compiling model...", flush=True)
    model.summary()
    model.compile(loss="categorical_crossentropy",
                  optimizer='adam',
                  metrics=['acc'])
    print("model fitting - Hierachical attention network...", flush=True)
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
    mc = ModelCheckpoint(filepath=tmp_dir +
                         'model.{epoch:02d}-{val_loss:.2f}.hdf5',
                         monitor='val_acc',
                         mode='max',
                         verbose=1,
                         save_weights_only=True,
                         save_best_only=True)
    model.fit(X_train,
              y_train,
              validation_data=(X_val, y_val),
              nb_epoch=100,
              batch_size=256,
              callbacks=[es, mc])
    print(
        "****************** CLASSIFICATION REPORT FOR All DOCUMENTS ********************",
        flush=True)
    X_all = prep_data(texts=df["text"],
                      max_sentences=max_sentences,
                      max_sentence_length=max_sentence_length,
                      tokenizer=tokenizer)
    y_true_all = df["label"]
    pred = model.predict(X_all)
    pred_labels = get_from_one_hot(pred, index_to_label)
    print(classification_report(y_true_all, pred_labels), flush=True)
    print("Dumping the model...", flush=True)
    model.save_weights(dump_dir + "model_weights_" + model_name + ".h5")
    model.save(dump_dir + "model_" + model_name + ".h5")
    return pred_labels, pred