Example #1
0
def main():
    rospy.init_node('training_node', anonymous=True, log_level=rospy.WARN)
    print("Start")
    
    # Parameters:
    EPISODE_MAX = 4000
    MAX_STEPS = 50
    #PARAMS
    GAMMA = 0.95
    MEMORY_SIZE = EPISODE_MAX*10
    BATCH_SIZE = 256
    EXPLORATION_MAX = 1.0
    EXPLORATION_MIN = 0.01
    EXPLORATION_DECAY = compute_exploration_decay(EXPLORATION_MAX, EXPLORATION_MIN, EPISODE_MAX, MAX_STEPS)

    # Env params
    observation_space = 10
    action_space = 4
    step_size = 0.025
    task = "position_learning"

    # Neural Net:
    hidden_layers = 2
    neurons = 64
    LEARNING_RATE = 0.001

    training = True
    if training:
        _, folder_path = utils.init_folders(task=task)
        utils.create_summary(folder_path, task, EPISODE_MAX, MAX_STEPS, GAMMA,MEMORY_SIZE, BATCH_SIZE, EXPLORATION_MAX, EXPLORATION_MIN, EXPLORATION_DECAY, observation_space, action_space, hidden_layers, neurons, LEARNING_RATE, step_size)

        # Training using demos
        model = utils.create_model(inputs=observation_space, outputs=action_space, hidden_layers=hidden_layers, neurons=neurons, LEARNING_RATE = LEARNING_RATE)
        env = init_env()
        # memory = create_demo(env)
        # GAMMA = 0.95
        # model = training_from_demo(model, memory, GAMMA)
        dqn_learning_keras_memoryReplay_v2(env, model, folder_path, EPISODE_MAX, MAX_STEPS, GAMMA,MEMORY_SIZE, BATCH_SIZE, EXPLORATION_MAX, EXPLORATION_MIN, EXPLORATION_DECAY, observation_space, action_space, step_size)
    else:
        # Predict model
        env = init_env()
        number_episode = 1195
        step_btw2_load = 500
        list_mean_loss = []
        folder_path = "/media/roboticlab14/DocumentsToShare/Reinforcement_learning/Datas/"
        folder_name = "20190802_184358_learn_to_go_position/"
        folder_path = folder_path + folder_name
        for i in xrange(0, number_episode, step_btw2_load):
            model_path = folder_path +"model/"
            model_name = "model_"
            total_model_path = model_path + model_name + str(i) + ".h5"
            model = utils.load_trained_model(total_model_path)
            list_mean_loss.append((i, use_model(env, model, MAX_STEPS, observation_space, step_size, GAMMA)))
        utils.save(list_mean_loss, number_episode,
            arg_path = folder_path + "mean_loss/", 
            arg_name = "mean_loss")
        print(list_mean_loss)
    print("End")
Example #2
0
def fit_and_eval(X_train, y_train, X_val, y_val, module, pretrained=False):
    """
    train model and eval hold-out performance

    BTW, write scores to csv files

    Parameters
    ----------
    X_train, y_train, X_val, y_val: features and targets

    module: a python module

    pretrained: bool, if true, load the model pickle

    Return
    ------
    best_thres: float

    df_score: dataframe with thres and f1 score
    """
    # get model
    model = module.get_model()
    # load model
    if pretrained:
        print('loading model ......')
        network = model.model
        model.model = load_trained_model(network, module.MODEL_FILEPATH)
    else:  # or, train model
        print('fitting model ......')
        model = model.fit(X_train, y_train)
    # predict probas
    print('predicting probas ......')
    y_proba = model.predict_proba(X_val)
    # score
    scores = {}
    for thres in np.arange(0, 0.51, 0.01):
        thres = round(thres, 2)
        scores[thres] = metrics.f1_score(y_val, (y_proba > thres).astype(int))
        print("val F1 score: {:.4f} with threshold at {}".format(
            scores[thres], thres))  # noqa
    # get max
    best_thres, best_score = max(scores.items(), key=operator.itemgetter(1))
    print("best F1 score: {:.4f} with threshold at {}".format(
        best_score, best_thres))  # noqa
    # write to disk
    df_score = pd.DataFrame(scores, index=['f1']).transpose()
    return best_thres, df_score
Example #3
0
def gen_analysis_and_save(model_name="auc_cons",
                          weights_folder=MODELS_RESULTS / "auc_cons" / "tmp",
                          db_name="toy2",
                          param_files=None,
                          n=500):

    print("Starting eval...- {}".format(datetime.datetime.now()), flush=True)
    data_train, _ = load_db_by_name(db_name)
    X_train, Y_train, Z_train = data_train
    X_train, Y_train, Z_train = X_train[:n], Y_train[:n], Z_train[:n]

    plot_2d_dist(X_train, Y_train, Z_train, n=500)
    model = load_trained_model(model_name, pathlib.Path(weights_folder),
                               param_files)

    plot_01_square(lambda x: np.array(model.score(x)).ravel().astype(float))

    path_expes = weights_folder
    path_analysis = path_expes / "final_analysis"

    if not path_analysis.exists():
        path_analysis.mkdir()

    plt.xlabel("$x_1$")
    plt.ylabel("$x_2$")

    plt.savefig("{}/decision_plot.pdf".format(path_analysis))

    plt.figure(figsize=LEGEND_SIZE)
    labels = (["Y=+1, Z={}".format(i)
               for i in [0, 1]] + ["Y=-1, Z={}".format(i) for i in [0, 1]])
    handles = [
        plt.scatter([], [], color="green", marker="x", alpha=0.50),
        plt.scatter([], [], color="green", marker="o", alpha=0.50),
        plt.scatter([], [], color="red", marker="x", alpha=0.50),
        plt.scatter([], [], color="red", marker="o", alpha=0.50)
    ]
    plt.legend(handles, labels, loc="center")
    plt.gca().axis('off')
    plt.savefig("{}/legend_decision_plot.pdf".format(path_analysis))
Example #4
0
def test_model(config, model_name):

    db = TCGDB.TCGDB(config["path"]["tcg_root"])

    db.open()

    db.set_subclasses(
        train_subclasses=int(config["training-mode"]["subclasses"]))

    print("Finished opening TCG dataset.")

    for run_id in range(
            db.get_number_runs(
                protocol_type=config["training-mode"]["protocol_type"])):

        X_train, Y_train, X_test, Y_test = db.get_train_test_data(
            run_id=run_id,
            protocol_type=config["training-mode"]["protocol_type"])

        if config["model"]["name"] == 'TCN':
            model = utils.load_model_TCN(
                os.path.join(model_name, "combination_{}".format(run_id + 1)))
        else:
            model = utils.load_trained_model(
                os.path.join(model_name, "combination_{}".format(run_id + 1)))

        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        predictions = model.predict(X_test)

        if int(config['training-mode']['subclasses']) == 1:
            y_test_arr = Y_test.reshape(-1, 4)
            preds_arr = predictions.reshape(-1, 4)
        else:
            y_test_arr = Y_test.reshape(-1, 15)
            preds_arr = predictions.reshape(-1, 15)

        non_zero_indices = np.where(np.sum(y_test_arr, axis=1) != 0)[0]

        y_test_wo_pad_ohenc = y_test_arr[non_zero_indices]
        preds_wo_pad_ohenc = preds_arr[non_zero_indices]

        y_test_eval = np.argmax(y_test_wo_pad_ohenc, axis=1)
        preds_eval = np.argmax(preds_wo_pad_ohenc, axis=1)

        acc = accuracy_score(y_test_eval, preds_eval)

        predictions_bin = utils.binarize_predictions(
            X_test.reshape(-1, X_test.shape[2]),
            Y_test.reshape(-1, Y_test.shape[2]),
            predictions.reshape(-1, Y_test.shape[2]),
            subclasses=bool(int(config['training-mode']["subclasses"])))

        cnf_matrix = confusion_matrix(predictions_bin[1],
                                      predictions_bin[2],
                                      labels=utils.get_labels(config))

        jaccard_index = jaccard_score(np.argmax(Y_test, axis=2).reshape(-1),
                                      np.argmax(predictions,
                                                axis=2).reshape(-1),
                                      average='macro')

        f1 = f1_score(np.argmax(Y_test, axis=2).reshape(-1),
                      np.argmax(predictions, axis=2).reshape(-1),
                      average='macro')

        return acc, cnf_matrix, jaccard_index, f1
Example #5
0
def main():
    rospy.init_node('training_node', anonymous=True, log_level=rospy.WARN)
    print("Start")
    
    # Parameters:
    EPISODE_MAX = 4000
    MAX_STEPS = 50
    #PARAMS
    GAMMA = 0.95
    MEMORY_SIZE = EPISODE_MAX*10
    BATCH_SIZE = 256
    EXPLORATION_MAX = 1.0
    EXPLORATION_MIN = 0.01
    EXPLORATION_DECAY = compute_exploration_decay(EXPLORATION_MAX, EXPLORATION_MIN, EPISODE_MAX, MAX_STEPS)

    # Env params
    observation_space = 10
    action_space = 4
    step_size = 0.025
    task = "position_learning"

    # Neural Net:
    hidden_layers = 2
    neurons = 64
    LEARNING_RATE = 0.001

    training = True
    bool_evaluate = False
    
    if training:
        print("training")
        _, folder_path = utils.init_folders(task=task)
        utils.create_summary(folder_path, task, EPISODE_MAX, MAX_STEPS, GAMMA,MEMORY_SIZE, BATCH_SIZE, EXPLORATION_MAX, EXPLORATION_MIN, EXPLORATION_DECAY, observation_space, action_space, hidden_layers, neurons, LEARNING_RATE, step_size)

        # Training using demos
        model = utils.create_model(inputs=observation_space, outputs=action_space, hidden_layers=hidden_layers, neurons=neurons, LEARNING_RATE = LEARNING_RATE)
        env = init_env()
        # memory = create_demo(env)
        # GAMMA = 0.95
        # model = training_from_demo(model, memory, GAMMA)
        dqn_learning_keras_memoryReplay_v2(env, model, folder_path, EPISODE_MAX, MAX_STEPS, GAMMA,MEMORY_SIZE, BATCH_SIZE, EXPLORATION_MAX, EXPLORATION_MIN, EXPLORATION_DECAY, observation_space, action_space, step_size)
    elif bool_evaluate:
        print("evaluating")
        env = init_env()
        folder_path = "/media/roboticlab14/DocumentsToShare/Reinforcement_learning/Datas/position_learning/"
        folder_name = "model_for_kuka_position_learning/"
        folder_path = folder_path + folder_name
        
        model_path = folder_path +"model/"
        model_name = "model_"
        total_model_path = model_path + model_name + str(3500) + ".h5"
        model = utils.load_trained_model(total_model_path)
        use_model(env, model, MAX_STEPS, observation_space, step_size, GAMMA, folder_path)
    # Use the model as a working phase
    else:
        print("Predicting")
        env = init_env()
        folder_path = "/media/roboticlab14/DocumentsToShare/Reinforcement_learning/Datas/position_learning/"
        folder_name = "model_for_kuka_position_learning/"
        folder_path = folder_path + folder_name
        
        model_path = folder_path +"model/"
        model_name = "model_"
        total_model_path = model_path + model_name + str(3500) + ".h5"
        model = utils.load_trained_model(total_model_path)
        use_model_v2(env, model, MAX_STEPS, observation_space, step_size, GAMMA)
    print("End")
def main() -> None:
    """
    Program entry point. Parses command line arguments to decide which dataset and model to use.
    Originally written as a group for the common pipeline. Later amended by Adam Jaamour.
    :return: None.
    """
    set_random_seeds()
    parse_command_line_arguments()
    print_num_gpus_available()

    # Create label encoder.
    l_e = create_label_encoder()

    # Run in training mode.
    if config.run_mode == "train":

        print("-- Training model --\n")

        # Start recording time.
        start_time = time.time()

        # Multi-class classification (mini-MIAS dataset)
        if config.dataset == "mini-MIAS":
            # Import entire dataset.
            images, labels = import_minimias_dataset(data_dir="../data/{}/images_processed".format(config.dataset),
                                                     label_encoder=l_e)

            # Split dataset into training/test/validation sets (80/20% split).
            X_train, X_test, y_train, y_test = dataset_stratified_split(split=0.20, dataset=images, labels=labels)

            # Create CNN model and split training/validation set (80/20% split).
            model = CnnModel(config.model, l_e.classes_.size)
            X_train, X_val, y_train, y_val = dataset_stratified_split(split=0.25,
                                                                      dataset=X_train,
                                                                      labels=y_train)

            # Calculate class weights.
            class_weights = calculate_class_weights(y_train, l_e)

            # Data augmentation.
            y_train_before_data_aug = y_train
            X_train, y_train = generate_image_transforms(X_train, y_train)
            y_train_after_data_aug = y_train
            np.random.shuffle(y_train)

            if config.verbose_mode:
                print("Before data augmentation:")
                print(Counter(list(map(str, y_train_before_data_aug))))
                print("After data augmentation:")
                print(Counter(list(map(str, y_train_after_data_aug))))

            # Fit model.
            if config.verbose_mode:
                print("Training set size: {}".format(X_train.shape[0]))
                print("Validation set size: {}".format(X_val.shape[0]))
                print("Test set size: {}".format(X_test.shape[0]))
            model.train_model(X_train, X_val, y_train, y_val, class_weights)

        # Binary classification (binarised mini-MIAS dataset)
        elif config.dataset == "mini-MIAS-binary":
            # Import entire dataset.
            images, labels = import_minimias_dataset(data_dir="../data/{}/images_processed".format(config.dataset),
                                                     label_encoder=l_e)

            # Split dataset into training/test/validation sets (80/20% split).
            X_train, X_val, y_train, y_val = dataset_stratified_split(split=0.20, dataset=images, labels=labels)

            # Create CNN model and split training/validation set (80/20% split).
            model = CnnModel(config.model, l_e.classes_.size)
            # model.load_minimias_weights()
            # model.load_minimias_fc_weights()

            # Fit model.
            if config.verbose_mode:
                print("Training set size: {}".format(X_train.shape[0]))
                print("Validation set size: {}".format(X_val.shape[0]))
            model.train_model(X_train, X_val, y_train, y_val, None)

        # Binary classification (CBIS-DDSM dataset).
        elif config.dataset == "CBIS-DDSM":
            images, labels = import_cbisddsm_training_dataset(l_e)

            # Split training dataset into training/validation sets (75%/25% split).
            X_train, X_val, y_train, y_val = dataset_stratified_split(split=0.25, dataset=images, labels=labels)
            train_dataset = create_dataset(X_train, y_train)
            validation_dataset = create_dataset(X_val, y_val)

            # Calculate class weights.
            class_weights = calculate_class_weights(y_train, l_e)

            # Create and train CNN model.
            model = CnnModel(config.model, l_e.classes_.size)
            # model.load_minimias_fc_weights()
            # model.load_minimias_weights()

            # Fit model.
            if config.verbose_mode:
                print("Training set size: {}".format(X_train.shape[0]))
                print("Validation set size: {}".format(X_val.shape[0]))
            model.train_model(train_dataset, validation_dataset, None, None, class_weights)

        # Save training runtime.
        runtime = round(time.time() - start_time, 2)

        # Save the model and its weights/biases.
        model.save_model()
        model.save_weights()

        # Evaluate training results.
        print_cli_arguments()
        if config.dataset == "mini-MIAS":
            model.make_prediction(X_val)
            model.evaluate_model(y_val, l_e, 'N-B-M', runtime)
        elif config.dataset == "mini-MIAS-binary":
            model.make_prediction(X_val)
            model.evaluate_model(y_val, l_e, 'B-M', runtime)
        elif config.dataset == "CBIS-DDSM":
            model.make_prediction(validation_dataset)
            model.evaluate_model(y_val, l_e, 'B-M', runtime)
        print_runtime("Training", runtime)

    # Run in testing mode.
    elif config.run_mode == "test":

        print("-- Testing model --\n")

        # Start recording time.
        start_time = time.time()

        # Test multi-class classification (mini-MIAS dataset).
        if config.dataset == "mini-MIAS":
            images, labels = import_minimias_dataset(data_dir="../data/{}/images_processed".format(config.dataset),
                                                     label_encoder=l_e)
            _, X_test, _, y_test = dataset_stratified_split(split=0.20, dataset=images, labels=labels)
            model = load_trained_model()
            predictions = model.predict(x=X_test)
            runtime = round(time.time() - start_time, 2)
            test_model_evaluation(y_test, predictions, l_e, 'N-B-M', runtime)

        # Test binary classification (binarised mini-MIAS dataset).
        elif config.dataset == "mini-MIAS-binary":
            pass

        # Test binary classification (CBIS-DDSM dataset).
        elif config.dataset == "CBIS-DDSM":
            images, labels = import_cbisddsm_testing_dataset(l_e)
            test_dataset = create_dataset(images, labels)
            model = load_trained_model()
            predictions = model.predict(x=test_dataset)
            runtime = round(time.time() - start_time, 2)
            test_model_evaluation(labels, predictions, l_e, 'B-M', runtime)

        print_runtime("Testing", runtime)
Example #7
0
questions, answers = data.load_conv_text()
inp_lang = LanguageIndex(questions)
targ_lang = LanguageIndex(answers)

input_tensor = [[
    inp_lang.word2idx[token] for token in tokenize_sentence(question)
] for question in questions]
target_tensor = [[
    targ_lang.word2idx[token] for token in tokenize_sentence(answer)
] for answer in answers]

max_length_inp, max_length_tar = max_length(input_tensor), max_length(
    target_tensor)

model = load_trained_model(BATCH_SIZE, embedding_dim, units,
                           tf.train.AdamOptimizer())


def generate_answer(sentence, model, inp_lang, targ_lang, max_length_inp,
                    max_length_tar):
    inputs = [inp_lang.word2idx[i] for i in tokenize_sentence(sentence)]
    inputs = tf.keras.preprocessing.sequence.pad_sequences(
        [inputs], maxlen=max_length_inp, padding='post')
    inputs = tf.convert_to_tensor(inputs)

    result = ''

    hidden = [tf.zeros((1, units))]
    enc_out, enc_hidden = model.encoder(inputs, hidden)

    dec_hidden = enc_hidden
Example #8
0
def gen_plots_return_scores(model_name, weights_folder, db_name, param_files,
                            B=DEFAULT_B, save_scores=True):
    print("Working on {} - {}".format(model_name, datetime.datetime.now()),
          flush=True)
    model = load_trained_model(model_name, pathlib.Path(weights_folder),
                               param_files)

    # Evaluation
    data_train, data_test = load_db_by_name(db_name)
    X_train, y_train, z_train = data_train
    X_test, y_test, z_test = data_test

    s_test = np.array(model.score(X_test)).ravel().astype(float)
    s_train = np.array(model.score(X_train)).ravel().astype(float)

    path_expes = weights_folder
    path_analysis = path_expes/"final_analysis"

    if not path_analysis.exists():
        path_analysis.mkdir()

    # 1. Plot ROC curves for all problems on the same plot
    plot_roc_sec3(path_analysis,
                  (s_train, y_train, z_train), (s_test, y_test, z_test))

    # 2. Other view on ROCs
    plot_roc_sec4(path_analysis,
                  (s_train, y_train, z_train), (s_test, y_test, z_test))

    # 3. Give all interesting AUC values
    def filt_subgroup_AUC(y, z, z_val):
        return z == z_val

    def filt_bnsp(y, z, z_val):
        return np.logical_or(y != +1, z == z_val)

    def filt_bpsn(y, z, z_val):
        return np.logical_or(y == +1, z == z_val)

    data_train = (s_train, y_train, z_train)
    data_test = (s_test, y_test, z_test)

    save_aucs_to_file(data_train, data_test, filt_subgroup_AUC,
                      path_analysis/"auc_subgroup_auc.txt",
                      val_size=model.validation_size)

    save_aucs_to_file(data_train, data_test, filt_bnsp,
                      path_analysis/"auc_bnsp.txt",
                      val_size=model.validation_size)

    save_aucs_to_file(data_train, data_test, filt_bpsn,
                      path_analysis/"auc_bpsn.txt",
                      val_size=model.validation_size)

    # 3. Special monitorings
    if db_name in {"toy1", "toy2"}:
        x1 = model.score(np.array([[1, 0]])).ravel().astype(float)[0]
        x2 = model.score(np.array([[0, 1]])).ravel().astype(float)[0]
        if db_name == "toy1":
            c_val = x1/(np.abs(x1) + np.abs(x2))
        else:
            c_val = -x1/(np.abs(x1) + np.abs(x2))

        with open(path_analysis/"c_val.txt", "wt") as f:
            json.dump({model.coef_lagrange: c_val}, f)

    if hasattr(model, "mon_pts"):  # The model is a pointwise model
        save_ptw_to_file(data_train, data_test,
                         model.mon_pts.items(),
                         path_analysis/"ptw_summaries.txt",
                         val_size=model.validation_size)

    if save_scores:
        print("Done evaluation !", flush=True)

        path_scores = path_expes / "scorefiles"

        if not path_scores.exists():
            path_scores.mkdir()

        save_array_in_txt(s_train, path_scores/"s_train")
        save_array_in_txt(s_test, path_scores/"s_test")

        save_array_in_txt(y_train, path_scores/"y_train")
        save_array_in_txt(y_test, path_scores/"y_test")

        save_array_in_txt(z_train, path_scores/"z_train")
        save_array_in_txt(z_test, path_scores/"z_test")
Example #9
0
def main():
    tf.enable_eager_execution()

    questions1, answers1 = data.load_conv_text()
    # questions2, answers2 = data.load_opensubtitles_text()

    questions = list(questions1)
    answers = list(answers1)

    inp_lang, targ_lang = LanguageIndex(questions), LanguageIndex(answers)

    input_tensor = [[inp_lang.word2idx[token]
                     for token in tokenize_sentence(question)] for question in questions]
    target_tensor = [[targ_lang.word2idx[token]
                      for token in tokenize_sentence(answer)] for answer in answers]
    max_length_inp, max_length_tar = max_length(
        input_tensor), max_length(target_tensor)
    input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor,
                                                                 maxlen=max_length_inp,
                                                                 padding='post')
    target_tensor = tf.keras.preprocessing.sequence.pad_sequences(target_tensor,
                                                                  maxlen=max_length_tar,
                                                                  padding='post')
    BUFFER_SIZE = len(input_tensor)
    dataset = tf.data.Dataset.from_tensor_slices(
        (input_tensor, target_tensor)).shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

    model: encoder_decoder.Seq2Seq = load_trained_model(
        BATCH_SIZE, EMBEDDING_DIM, UNITS, tf.train.AdamOptimizer())

    # sentimental_words = ["absolutely","abundant","accept","acclaimed","accomplishment","achievement","action","active","activist","acumen","adjust","admire","adopt","adorable","adored","adventure","affirmation","affirmative","affluent","agree","airy","alive","alliance","ally","alter","amaze","amity","animated","answer","appreciation","approve","aptitude","artistic","assertive","astonish","astounding","astute","attractive","authentic","basic","beaming","beautiful","believe","benefactor","benefit","bighearted","blessed","bliss","bloom","bountiful","bounty","brave","bright","brilliant","bubbly","bunch","burgeon","calm","care","celebrate","certain","change","character","charitable","charming","cheer","cherish","clarity","classy","clean","clever","closeness","commend","companionship","complete","comradeship","confident","connect","connected","constant","content","conviction","copious","core","coupled","courageous","creative","cuddle","cultivate","cure","curious","cute","dazzling","delight","direct","discover","distinguished","divine","donate","each","day","eager","earnest","easy","ecstasy","effervescent","efficient","effortless","electrifying","elegance","embrace","encompassing","encourage","endorse","energized","energy","enjoy","enormous","enthuse","enthusiastic","entirely","essence","established","esteem","everyday","everyone","excited","exciting","exhilarating","expand","explore","express","exquisite","exultant","faith","familiar","family","famous","feat","fit","flourish","fortunate","fortune","freedom","fresh","friendship","full","funny","gather","generous","genius","genuine","give","glad","glow","good","gorgeous","grace","graceful","gratitude","green","grin","group","grow","handsome","happy","harmony","healed","healing","healthful","healthy","heart","hearty","heavenly","helpful","here","highest","good","hold","holy","honest","honor","hug","i","affirm","i","allow","i","am","willing","i","am.","i","can","i","choose","i","create","i","follow","i","know","i","know,","without","a","doubt","i","make","i","realize","i","take","action","i","trust","idea","ideal","imaginative","increase","incredible","independent","ingenious","innate","innovate","inspire","instantaneous","instinct","intellectual","intelligence","intuitive","inventive","joined","jovial","joy","jubilation","keen","key","kind","kiss","knowledge","laugh","leader","learn","legendary","let","go","light","lively","love","loveliness","lucidity","lucrative","luminous","maintain","marvelous","master","meaningful","meditate","mend","metamorphosis","mind-blowing","miracle","mission","modify","motivate","moving","natural","nature","nourish","nourished","novel","now","nurture","nutritious","one","open","openhanded","optimistic","paradise","party","peace","perfect","phenomenon","pleasure","plenteous","plentiful","plenty","plethora","poise","polish","popular","positive","powerful","prepared","pretty","principle","productive","project","prominent","prosperous","protect","proud","purpose","quest","quick","quiet","ready","recognize","refinement","refresh","rejoice","rejuvenate","relax","reliance","rely","remarkable","renew","renowned","replenish","resolution","resound","resources","respect","restore","revere","revolutionize","rewarding","rich","robust","rousing","safe","secure","see","sensation","serenity","shift","shine","show","silence","simple","sincerity","smart","smile","smooth","solution","soul","sparkling","spirit","spirited","spiritual","splendid","spontaneous","still","stir","strong","style","success","sunny","support","sure","surprise","sustain","synchronized","team","thankful","therapeutic","thorough","thrilled","thrive","today","together","tranquil","transform","triumph","trust","truth","unity","unusual","unwavering","upbeat","value","vary","venerate","venture","very","vibrant","victory","vigorous","vision","visualize","vital","vivacious","voyage","wealthy","welcome","well","whole","wholesome","willing","wonder","wonderful","wondrous","xanadu","yes","yippee","young","youth","youthful","zeal","zest","zing","zip"]
    
    sentimental_words = ["good", "excellent", "well"]
    targ_lang_embd = get_GloVe_embeddings(targ_lang.vocab, EMBEDDING_DIM)
    sentimental_words_embd = get_GloVe_embeddings(
        sentimental_words, EMBEDDING_DIM)
    sim_scores = np.dot(sentimental_words_embd, np.transpose(targ_lang_embd))
    print(sim_scores.shape)
    #max_prob_ids = np.argmax(sim_scores, axis=1)
    # print(max_prob_ids)
    # print(targ_lang.word2idx)
    # print(targ_lang.idx2word(max_prob_ids[1]))

    optimizer = tf.train.AdamOptimizer()

    checkpoint_dir = './training_checkpoints'
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, seq2seq=model)

    for episode in range(EPISODES):

        # Start of Episode
        start = time.time()
        total_loss = 0
        for (batch, (inp, targ)) in enumerate(dataset):
            with tf.GradientTape() as tape:

                hidden = tf.zeros((BATCH_SIZE, UNITS))
                enc_hidden = model.encoder(inp, hidden)
                dec_hidden = enc_hidden
                dec_input = tf.expand_dims(
                    [targ_lang.word2idx[BEGIN_TAG]] * BATCH_SIZE, 1)

                loss = 0  # loss for decoder
                pg_loss = 0  # loss for semantic

                result = ''
                for t in range(1, targ.shape[1]):
                    actions = []
                    probs = []
                    rewards = []
                    predictions, dec_hidden = model.decoder(
                        dec_input, dec_hidden)
                    '''
                    predicted_id = tf.argmax(predictions[0]).numpy()
                    if targ_lang.idx2word[predicted_id] == END_TAG:
                        print("result: ", result)
                    else:
                        result += ' ' + targ_lang.idx2word[predicted_id]
                    '''
                    # using teacher forcing
                    dec_input = tf.expand_dims(targ[:, t], 1)
                    for ps in predictions:
                        # action = tf.distributions.Categorical(ps).sample(1)[0]
                        top_k_indices = tf.nn.top_k(ps, TOP_K).indices.numpy()
                        action = np.random.choice(top_k_indices, 1)[0]
                        actions.append(action)
                        prob = ps.numpy()[action]
                        probs.append(prob)
                        reward = np.max(sim_scores[1:, action])
                        print(targ_lang.idx2word[action], reward)
                        # print(targ_lang.idx2word[action], reward)
                        rewards.append(reward)

                        # normalize reward
                        reward_mean = np.mean(rewards)
                        reward_std = np.std(rewards)
                        norm_rewards = [(r - reward_mean) /
                                        reward_std for r in rewards]

                    if targ_lang.idx2word[actions[0]] == END_TAG:
                        print("result: ", result)
                    else:
                        result += ' ' + targ_lang.idx2word[actions[0]]

                    onehot_labels = tf.keras.utils.to_categorical(
                        y=actions, num_classes=len(targ_lang.word2idx))

                    norm_rewards = tf.convert_to_tensor(
                        norm_rewards, dtype="float32")
                    # print(onehot_labels.shape)
                    # print(predictions.shape)
                    loss += model.loss_function(targ[:, t], predictions)
                    # print("------")
                    # print(loss)
                    # print(probs)
                    #pg_loss_cross = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=onehot_labels, logits=targ[:, t]))
                    pg_loss_cross = model.loss_function(
                        targ[:, t],  )
                    # pg_loss_cross = tf.reduce_mean(
                    #     pg_loss_cross * norm_rewards)
                    pg_loss_cross = tf.reduce_mean(
                        pg_loss_cross * rewards)
                    # print(pg_loss_cross)
                    # print("------")
                    # print(pg_loss_cross)
                    pg_loss += pg_loss_cross
                # End of Episode
                # Update policy
                batch_loss = ((loss + pg_loss) / int(targ.shape[1]))
                total_loss += batch_loss
                variables = model.encoder.variables + model.decoder.variables
                gradients = tape.gradient(loss, variables)
                optimizer.apply_gradients(zip(gradients, variables))
                if batch % 10 == 0:
                    print('batch {} training loss {:.4f}'.format(
                        batch, total_loss.numpy()))

        # saving (checkpoint) the model every 100 epochs
        #if (episode + 1) % 100 == 0:
            #checkpoint.save(file_prefix=checkpoint_prefix)

        print('Time taken for {} episode {} sec\n'.format(
            episode, time.time() - start))
    # Creating training and validation sets using an 80-20 split
    input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(
        input_tensor, target_tensor, test_size=0.2)

    BUFFER_SIZE = len(input_tensor_train)
    dataset = tf.data.Dataset.from_tensor_slices(
        (input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    EVAL_BUFFER_SIZE = len(input_tensor_val)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (input_tensor_val, target_tensor_val)).shuffle(EVAL_BUFFER_SIZE)
    val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)
    N_BATCH = BUFFER_SIZE // BATCH_SIZE

    model: seq2seq.Seq2Seq = utils.load_trained_model(BATCH_SIZE,
                                                      EMBEDDING_DIM, units,
                                                      tf.train.AdamOptimizer())
    '''
    model = seq2seq.Seq2Seq(
        vocab_inp_size, vocab_tar_size, EMBEDDING_DIM, units, BATCH_SIZE,
        inp_lang=inp_lang, targ_lang=targ_lang,max_length_tar=max_length_tar,
        use_GloVe=USE_GLOVE,
        mode=BASIC,
        use_bilstm=True
    )
    '''
    checkpoint_dir = './training_checkpoints'
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, seq2seq=model)

    for epoch in range(EPOCHS):