Example #1
0
            (max_depth, min_samples), {
                'max_depth': max_depth,
                'min_samples_leaf': min_samples
            })
print

algo, options, name = best_classifier
print "Best overall: %s with %.5f" % (name, best_score)

# re-train best algo with whole training set
classifier = algo(**options)
classifier.fit(X, Y)

output_predictions(classifier, '04_submission.csv', formatting_functions,
                   features)
plot_learning_curve(name, algo, options, X, Y, min_size=50, n_steps=50)

print '=' * 100
print

# Random forests

best_score = 0.0
best_classifier = ()

test_algo(RandomForestClassifier, X, Y, "Random Forest with 10 trees")
test_algo(RandomForestClassifier, X, Y, "Random Forest with 50 trees",
          {'n_estimators': 50})
test_algo(RandomForestClassifier, X, Y, "Random Forest with 100 trees",
          {'n_estimators': 100})
test_algo(RandomForestClassifier, X, Y,
Example #2
0
    best_score = env.reward_range[0]
    score_hist = []

    env.render("rgb_array")

    for i in range(n_game):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent1.action_choose(observation)
            nw_observation, reward, done, _ = env.step(action)
            score += reward
            agent1.rem_transition(observation, action, reward, nw_observation,
                                  done)
            agent1.learning()
            observation = nw_observation

        score_hist.append(score)
        avg_score = np.mean(score_hist[-100:])

        if avg_score > best_score:
            best_score = avg_score
            agent1.model_save()

        print('episode :', i, 'score %.1f :' % score,
              'avg score %.1f :' % avg_score)

    x = [i + 1 for i in range(n_game)]
    plot_learning_curve(x, score_hist, fig_file, n_game)
Example #3
0
win_pct_list = []
scores = []
eps_history = []

for i in range(n_episodes):
    done = False
    score = 0
    s = env.reset()
    done = False

    while not done:
        a = A.pick_action(s)
        s_, r, done, info = env.step(a)
        A.learn(s, a, r, s_)
        score += r
        s = s_

    scores.append(score)
    eps_history.append(A.eps)
    if i % 100 == 0:
        win_pct = np.mean(scores[-100:])
        win_pct_list.append(win_pct)
        if i % 1000 == 0:
            print('episode', i, 'win pct %.2f' % win_pct, 'eps %2.f' % A.eps)

#plt.plot(win_pct_list)
#plt.show()

x = [i + 1 for i in range(n_episodes)]
plot_learning_curve(x, scores, eps_history)
Example #4
0
scores = []
eps_history = []

agent = Agent(lr=0.0001,
              input_dims=env.observation_space.shape,
              n_actions=env.action_space.n)

for i in range(n_games):
    score = 0
    done = False
    obs = env.reset()

    while not done:
        action = agent.choose_action(obs)
        resulted_obs, reward, done, info = env.step(action)
        score += reward
        agent.learn(obs, action, reward, resulted_obs)
        obs = resulted_obs
    scores.append(score)
    eps_history.append(agent.epsilon)

    if i % 100 == 0:
        avg_score = np.mean(scores[-100:])
        print(
            'episode', i, 'score: %.1f, avg_score: %.1f, eps: %.4f' %
            (score, avg_score, agent.epsilon))

filename = "CartePole_DQN.png"
x = [i + 1 for i in range(n_games)]
plot_learning_curve(x, scores, eps_history, filename)
Example #5
0
plot_roc_curve(n_classes, y_test_bin, y_score, filename)
"""Plot Decision Area"""
clf = ExtraTreesClassifier(n_estimators=ESTIMATORS,
                           max_features=0.42,
                           n_jobs=-1)
plot_decision_area(clf,
                   X_scaled[:, 2:4],
                   y,
                   title="Extra Trees Classifier",
                   filename=filename)
"""Plot Learning Curve"""
X_lc = X_scaled[:10000]
y_lc = y[:10000]
plot_learning_curve(clf,
                    "Extra Trees Classifier",
                    X_lc,
                    y_lc,
                    filename=filename)
"""Plot Validation Curve: max_depth"""
clf = ExtraTreesClassifier(n_estimators=ESTIMATORS, max_depth=8)
param_name = 'max_depth'
param_range = [1, 2, 4, 8, 16, 32, 100]
plot_validation_curve(clf,
                      X_lc,
                      y_lc,
                      param_name,
                      param_range,
                      scoring='roc_auc',
                      cv=n_classes,
                      filename=filename)
"""Plot Validation Curve: n_estimators"""
Example #6
0
                  gamma=0.99,
                  input_dims=[8],
                  n_actions=4,
                  fc1_dims=2048,
                  fc2_dims=1536)
    n_games = 2000

    filename = 'ACTOR_CRITIC_' + 'lunar_lander_' + str(agent.fc1_dims) + \
               '_fc1_dims_' + str(agent.fc2_dims) + '_fc2_dims_lr'\
               + str(agent.lr) + \
               '_' + str(n_games) + 'games'

    figure_file = 'plots/' + filename
    scores = []
    for i in range(n_games):
        done = False
        observation = env.reset()
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.learn(observation, reward, observation_, done)
            observation = observation_
        scores.append(score)
        avg = np.mean(scores[-100:])
        print('episode', i, 'score %.1f' % score, 'average %.1f' % avg)

    x = [i + 1 for i in range(n_games)]
    plot_learning_curve(x, scores, filename)
Example #7
0
def worker(name, input_shape, n_actions, global_agent, global_icm,
           optimizer, icm_optimizer, env_id, n_threads, icm=False):
    T_MAX = 20

    local_agent = ActorCritic(input_shape, n_actions)

    if icm:
        local_icm = ICM(input_shape, n_actions)
        algo = 'ICM'
    else:
        intrinsic_reward = T.zeros(1)
        algo = 'A3C'

    memory = Memory()

    env = gym.make(env_id)

    t_steps, max_eps, episode, scores, avg_score = 0, 1000, 0, [], 0

    while episode < max_eps:
        obs = env.reset()
        hx = T.zeros(1, 256)
        score, done, ep_steps = 0, False, 0
        while not done:
            state = T.tensor([obs], dtype=T.float)
            action, value, log_prob, hx = local_agent(state, hx)
            obs_, reward, done, info = env.step(action)
            t_steps += 1
            ep_steps += 1
            score += reward
            reward = 0  # turn off extrinsic rewards
            memory.remember(obs, action, reward, obs_, value, log_prob)
            obs = obs_
            if ep_steps % T_MAX == 0 or done:
                states, actions, rewards, new_states, values, log_probs = \
                        memory.sample_memory()
                if icm:
                    intrinsic_reward, L_I, L_F = \
                            local_icm.calc_loss(states, new_states, actions)

                loss = local_agent.calc_loss(obs, hx, done, rewards, values,
                                             log_probs, intrinsic_reward)

                optimizer.zero_grad()
                hx = hx.detach_()
                if icm:
                    icm_optimizer.zero_grad()
                    (L_I + L_F).backward()

                loss.backward()
                T.nn.utils.clip_grad_norm_(local_agent.parameters(), 40)

                for local_param, global_param in zip(
                                        local_agent.parameters(),
                                        global_agent.parameters()):
                    global_param._grad = local_param.grad
                optimizer.step()
                local_agent.load_state_dict(global_agent.state_dict())

                if icm:
                    for local_param, global_param in zip(
                                            local_icm.parameters(),
                                            global_icm.parameters()):
                        global_param._grad = local_param.grad
                    icm_optimizer.step()
                    local_icm.load_state_dict(global_icm.state_dict())
                memory.clear_memory()

        if name == '1':
            scores.append(score)
            avg_score = np.mean(scores[-100:])
            print('{} episode {} thread {} of {} steps {:.2f}M score {:.2f} '
                  'intrinsic_reward {:.2f} avg score (100) {:.1f}'.format(
                      algo, episode, name, n_threads,
                      t_steps/1e6, score,
                      T.sum(intrinsic_reward),
                      avg_score))
        episode += 1
    if name == '1':
        x = [z for z in range(episode)]
        fname = algo + '_CartPole_no_rewards.png'
        plot_learning_curve(x, scores, fname)
Example #8
0
# print(utils.mean_cross_score(rf, tfidf_uni_bigram_train, train_cats,
# 							 cv=5, n_jobs=-1, scoring=recall_scorer))


# utils.plot_learning_curve(rf, 'random forest f1 score', tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=f1_scorer)
# utils.plot_learning_curve(rf, 'random forest precision score', tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=precision_scorer)
# utils.plot_learning_curve(rf, 'random forest accuracy score', tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=accuracy_scorer)
# utils.plot_learning_curve(rf, 'random forest hamming loss', tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=hamming_losser)
# utils.plot_learning_curve(rf, 'random forest jaccard score', tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=jaccard_scorer)
utils.plot_learning_curve(rf, 'random forest matthews corr coef scorer', tfidf_uni_bigram_train,
						  train_cats, cv=5, n_jobs=-1,
						  scorer=matthews_corrcoef_scorer)
utils.plot_learning_curve(rf, 'random forest recall score', tfidf_uni_bigram_train,
						  train_cats, cv=5, n_jobs=-1, scorer=recall_scorer)


def evaluate():
	train_uni_bigram, test_uni_bigram, train_cats, test_cats = \
		feature_vecs.do_uni_bigram()

	tfidf_uni_bigram_train = feature_vecs.tfidf_matrix(train_uni_bigram)
	tfidf_uni_bigram_test = feature_vecs.tfidf_matrix(test_uni_bigram)

	# tfidf_uni_bigram_train, tfidf_uni_bigram_test, train_cats, test_cats = feature_vecs.do_means_embeddings()

	rf = RandomForestClassifier(n_jobs=-1)
    def train(self, env, rollouts, policy, params, use_hints=False):
        rollout_time, update_time = AverageMeter(), AverageMeter()  # Loggers
        rewards, deck_avgs, av_ds = [], [], []

        print("Training model with {} parameters...".format(policy.num_params))
        '''
        Training Loop
        '''
        train_start_time = time.time()

        for j in range(params.num_updates):
            ## Initialization
            avg_eps_reward = AverageMeter()
            #minigrid resets the game after each rollout, we should either make rollout size big enough to reach endgame, or not
            done = False
            game_state, prev_obs = self.reset_game(env)
            prev_obs = torch.tensor(prev_obs, dtype=torch.float32)
            prev_eval = env.game.num_playable()  #used to calculate reward
            eps_reward = 0.
            start_time = time.time()
            deck_end_sizes = []
            ## Collect rollouts
            for step in range(rollouts.rollout_size):
                if done:
                    # # Store episode statistics
                    avg_eps_reward.update(eps_reward)
                    deck_end_sizes.append(len(env.game.state['drawpile']))

                    # Reset Environment
                    game_state, obs = self.reset_game(env)
                    obs = torch.tensor(obs, dtype=torch.float32)
                    prev_eval = env.game.num_playable(
                    )  #used to calculate reward
                    eps_reward = 0.
                else:
                    obs = prev_obs

                #agent action
                #action, log_prob = agents[state['current_player']].act(state)
                curr_player = env.game.state['current_player']
                original_legal_actions = env.game.state['legal_actions'][
                    curr_player]

                legal_actions = original_legal_actions

                if len(env.game.state['players']) <= 1:
                    hints_tensor = torch.zeros(33, dtype=torch.float32)
                else:
                    next_player = curr_player + 1
                    next_player = next_player if next_player < env.game.num_players else 0
                    hints_tensor = torch.tensor(
                        env.game.state['hints'][next_player],
                        dtype=torch.float32)

                if use_hints:
                    action, log_prob = policy.act(
                        obs, legal_actions,
                        hints_tensor)  # 1-curr_player for 2 player game
                else:
                    action, log_prob = policy.act(obs, legal_actions)

                if original_legal_actions[int(action)]:
                    state, next_player = env.step(action)

                obs = env.get_encoded_state()

                if original_legal_actions[int(action)]:
                    #if our play reduces us by more than 5 playable cards, negatives reward, else positive
                    curr_eval = env.game.num_playable()
                    reward = (curr_eval - prev_eval) / 5 + 1
                    prev_eval = curr_eval
                else:
                    reward = -20
                done = env.game.is_over()

                rollouts.insert(step, torch.tensor(
                    (done), dtype=torch.float32), action, log_prob,
                                torch.tensor((reward),
                                             dtype=torch.float32), prev_obs,
                                torch.tensor(legal_actions), hints_tensor,
                                torch.tensor(obs, dtype=torch.float32))

                if isinstance(policy, DuelDQN):
                    policy.total_t += 1

                prev_obs = torch.tensor(obs, dtype=torch.float32)
                eps_reward += reward

            ############################## TODO: YOUR CODE BELOW ###############################
            ### 4. Use the rollout buffer's function to compute the returns for all          ###
            ###    stored rollout steps. Discount factor is given in 'params'                ###
            ### HINT: This requires just 1 line of code.                                     ###
            ####################################################################################
            rollouts.compute_returns(params.discount)
            ################################# END OF YOUR CODE #################################

            rollout_done_time = time.time()
            if use_hints:
                policy.update(rollouts, use_hints)
            else:
                policy.update(rollouts)
            update_done_time = time.time()
            rollouts.reset()

            ## log metrics
            rewards.append(avg_eps_reward.avg)
            avg_deck_end_size = np.sum(deck_end_sizes) / len(deck_end_sizes)
            deck_avgs.append(avg_deck_end_size)
            rollout_time.update(rollout_done_time - start_time)
            update_time.update(update_done_time - rollout_done_time)
            av_ds.append(avg_deck_end_size)
            print(
                'it {}: avgR: {:.3f} -- rollout_time: {:.3f}sec -- update_time: {:.3f}sec'
                .format(j, avg_eps_reward.avg, rollout_time.avg,
                        update_time.avg))

            if (j + 1) % params.plotting_iters == 0 and j != 0:
                plot_learning_curve(av_ds, j + 1)
            # if j % self.params['plotting_iters'] == 0 and j != 0:
            #     plot_learning_curve(rewards, success_rate, params.num_updates)
            #     log_policy_rollout(policy, params['env_name'], pytorch_policy=True)
            print('av deck size: {:.3f}, games_played: {}'.format(
                avg_deck_end_size, len(deck_end_sizes)))

        return rewards, deck_avgs
Example #10
0
        return {'rwd': reward_history, 'rwd_avg': reward_avg}


if __name__ == '__main__':
    genv = gym.make('CartPole-v1')
    obs_size = np.prod(list(genv.observation_space.shape))
    print("obs_size:{}, obs_shape:{}".format(obs_size,
                                             genv.observation_space.shape))

    # init. TF session
    tf_sess_config = {
        'allow_soft_placement': True,
        'intra_op_parallelism_threads': 8,
        'inter_op_parallelism_threads': 4,
    }

    training = False
    dqn = DQN_Policy("dqn_cartpole", genv, 0.99, 64, [32, 32], training,
                     tf_sess_config)
    if training:
        # dqn training
        config = TrainConfig()
        dqn.build()
        reward_dict = dqn.train(config)
        plot_learning_curve(os.path.join(dqn.figs_dir, 'dqn_train.png'),
                            reward_dict,
                            xlabel='step')
    else:
        dqn.build()
        if dqn.load_checkpoint():
            dqn.eval(5)
Example #11
0
def question_1():
    global input_shape, X_train, y_train_labels, y_train, X_test, y_test_labels, y_test

    print(
        "------------------------------------------------------------------------"
    )
    print("Baseline Model")
    print(
        "------------------------------------------------------------------------"
    )
    model1 = baseline_model(input_shape, num_classes)
    loss_callback_1 = LossHistory((X_test, y_test))
    model1.fit(X_train,
               y_train,
               batch_size=batch_size,
               epochs=epochs,
               verbose=1,
               validation_data=(X_test, y_test),
               callbacks=[loss_callback_1])
    model1.save('model1.h5')
    plot_learning_curve(
        [loss_callback_1.train_indices, loss_callback_1.test_indices],
        [loss_callback_1.train_losses, loss_callback_1.test_losses],
        colors=['g-', 'm-'],
        labels=['Train loss', 'Test loss'],
        title="Loss evolution for Baseline Model",
        path="../outputs/q1/plots/train_test_loss_baseline.png",
        axlabels=["Iterations", "Loss"])
    plot_learning_curve([loss_callback_1.test_indices],
                        [loss_callback_1.test_acc],
                        colors=['c-'],
                        labels=['Test Accuracy'],
                        title="Accuracy evolution for Baseline Model",
                        path="../outputs/q1/plots/test_acc_baseline.png",
                        axlabels=["Iterations", "Accuracy"])

    print(
        "------------------------------------------------------------------------"
    )
    print("2 conv layer model")
    print(
        "------------------------------------------------------------------------"
    )
    model2 = two_conv_layer_model(input_shape, num_classes)
    loss_callback_2 = LossHistory((X_test, y_test))
    model2.fit(X_train,
               y_train,
               batch_size=batch_size,
               epochs=epochs,
               verbose=1,
               validation_data=(X_test, y_test),
               callbacks=[loss_callback_2])
    model2.save('model2.h5')
    plot_learning_curve(
        [loss_callback_2.train_indices, loss_callback_2.test_indices],
        [loss_callback_2.train_losses, loss_callback_2.test_losses],
        colors=['g-', 'm-'],
        labels=['Train loss', 'Test loss'],
        title="Loss evolution for 2 conv layered Model",
        path="../outputs/q1/plots/train_test_loss_2_conv.png",
        axlabels=["Iterations", "Loss"])
    plot_learning_curve([loss_callback_1.test_indices],
                        [loss_callback_1.test_acc],
                        colors=['c-'],
                        labels=['Test Accuracy'],
                        title="Accuracy evolution for 2 conv layered Model",
                        path="../outputs/q1/plots/test_acc_2_conv.png",
                        axlabels=["Iterations", "Accuracy"])

    print(
        "------------------------------------------------------------------------"
    )
    print("2 conv layer + 1 hidden dense layer model")
    print(
        "------------------------------------------------------------------------"
    )
    model3 = two_conv_one_dense_layer_model(input_shape, num_classes)
    loss_callback_3 = LossHistory((X_test, y_test))
    model3.fit(X_train,
               y_train,
               batch_size=batch_size,
               epochs=epochs,
               verbose=1,
               validation_data=(X_test, y_test),
               callbacks=[loss_callback_3])
    model3.save('model3.h5')
    plot_learning_curve(
        [loss_callback_3.train_indices, loss_callback_3.test_indices],
        [loss_callback_3.train_losses, loss_callback_3.test_losses],
        colors=['g-', 'm-'],
        labels=['Train loss', 'Test loss'],
        title="Loss evolution for 2 Conv + 1 Dense layer config",
        path="../outputs/q1/plots/train_test_loss_2_conv_1_dense.png",
        axlabels=["Iterations", "Loss"])
    plot_learning_curve([loss_callback_3.test_indices],
                        [loss_callback_3.test_acc],
                        colors=['c-'],
                        labels=['Test Accuracy'],
                        title="Accuracy evolution for 2 conv + 1 dense config",
                        path="../outputs/q1/plots/test_acc_2_conv_1_dense.png",
                        axlabels=["Iterations", "Accuracy"])

    ids = np.random.choice(X_test.shape[0], 20)
    X_samples = X_train[ids]
    pred_samples_1 = model1.predict(X_samples)
    generate_image_outputs(X_samples,
                           np.argmax(pred_samples_1, axis=1),
                           path="../outputs/q1/predictions/baseline")
    pred_samples_2 = model2.predict(X_samples)
    generate_image_outputs(X_samples,
                           np.argmax(pred_samples_2, axis=1),
                           path="../outputs/q1/predictions/2_conv")
    pred_samples_3 = model3.predict(X_samples)
    generate_image_outputs(X_samples,
                           np.argmax(pred_samples_3, axis=1),
                           path="../outputs/q1/predictions/2_conv_1_dense")
Example #12
0
def question_3():
    global input_shape, X_train, y_train_labels, y_train, X_test, y_test_labels, y_test
    model = load_model('model3.h5')
    model.trainable = False

    # Custom model that inputs 28 x 28 matrices and outputs logits (without softmax)
    visualize_model = Model(inputs=model.input,
                            outputs=model.get_layer("logits").output)

    for _label in range(0, num_classes):
        print(
            "------------------------------------------------------------------------"
        )
        print("Synthetic image visualization for label " + str(_label))
        print(
            "------------------------------------------------------------------------"
        )
        y_temp = [_label]
        y_temp = to_categorical(y_temp, num_classes)

        # Setting cost to be the respective output neurons
        cost = visualize_model.output[:, _label]
        # Gradient calculation for the cost
        grad = K.mean(K.gradients(cost, visualize_model.input)[0], axis=0)

        # Custom keras backend function that inputs the images and returns the cost and gradient
        custom_iterate = K.function([model.input],
                                    [visualize_model.output[:, _label], grad])

        # Initializing a gaussian distribution centred around 128
        X_init = np.random.normal(loc=128., scale=50., size=(1, 28, 28, 1))
        X_init /= 255.

        costs = []
        iter_indices = []

        # Batch wise gradient ascent for learning X_init
        for i in range(num_iter):
            cost, grads = custom_iterate([X_init])
            sigma = (i + 1) * 4 / (num_iter + 0.5)
            step_size = 1.0 / np.std(grads)
            costs.append(cost[0])
            iter_indices.append(i)

            # Smoothening using a Gaussian filter
            grads = gaussian_filter(grads, sigma)
            # Gradient update
            X_init = (1 - 0.0001) * X_init + step_size * np.array(grads)

            line = ("Iteration " + str(i + 1).rjust(int(log10(num_iter) + 1)) +
                    "/" + str(num_iter) +
                    " complete.       Cost: %0.10f       " % cost[0])
            print(line)

        # Visualizing the input image
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.imshow(X_init.reshape(28, 28), interpolation='nearest', cmap="gray")
        plt.savefig("../outputs/q3/visualizations/max_output_" + str(_label) +
                    ".png")
        plt.close()

        plot_learning_curve(
            [iter_indices], [costs],
            colors=['b-'],
            labels=['Cost'],
            title="Cost evolution over optimization iterations",
            path="../outputs/q3/plots/cost_output_" + str(_label) + ".png",
            axlabels=["Iterations", "Cost"])

    # Custom model that inputs 28 x 28 image matrices and outputs 2nd maxpooling layer
    visualize_model = Model(inputs=model.input,
                            outputs=model.get_layer("maxpooling2").output)
    for _id in range(15):
        print(
            "------------------------------------------------------------------------"
        )
        print("Synthetic image visualization for central neuron of filter " +
              str(_id))
        print(
            "------------------------------------------------------------------------"
        )

        # Setting cost as the central neuron of maxpooling layer
        # Since row size and column size (7, 7) is odd, we do row/2 and column/2
        cost = visualize_model.output[:,
                                      visualize_model.output.get_shape()[1] /
                                      2,
                                      visualize_model.output.get_shape()[2] /
                                      2, _id]
        grad = K.mean(K.gradients(cost, visualize_model.input)[0], axis=0)
        custom_iterate = K.function([model.input], [cost, grad])
        X_init = np.random.normal(loc=128., scale=50., size=(1, 28, 28, 1))
        X_init /= 255.

        # Batch wise gradient ascent for learning X_init
        for i in range(num_iter):
            cost, grads = custom_iterate([X_init])
            sigma = (i + 1) * 4 / (num_iter + 0.5)
            step_size = 1.0 / np.std(grads)
            grads = gaussian_filter(grads, sigma)

            # Gradient update
            X_init = (1 - 0.0001) * X_init + step_size * np.array(grads)

            line = ("Iteration " + str(i + 1).rjust(int(log10(num_iter) + 1)) +
                    "/" + str(num_iter) +
                    " complete.       Cost: %0.10f       " % cost[0])
            print(line)

        # Plotting X_init for each of the filter optimizations
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.imshow(X_init.reshape(28, 28), interpolation='nearest', cmap="gray")
        plt.text(0.5,
                 0.05,
                 'Filter: ' + str(_id),
                 fontsize=28,
                 horizontalalignment='center',
                 verticalalignment='center',
                 transform=ax.transAxes,
                 color='white')
        plt.savefig("../outputs/q3/visualizations/max_filter_" + str(_id) +
                    ".png")
        plt.close()
Example #13
0
def question_2():
    global input_shape, X_train, y_train_labels, y_train, X_test, y_test_labels, y_test
    model3 = load_model('model3.h5')
    model3.trainable = False
    learning_rate = 0.01
    validation_interval = 10

    # Iterating over each of the 10 classes for generating adversarial examples
    for _label in range(0, num_classes):
        print(
            "------------------------------------------------------------------------"
        )
        print("Adversarial examples for label " + str(_label))
        print(
            "------------------------------------------------------------------------"
        )

        # y_eval is a dummy matrix useful for evaluating categorical crossentropy loss
        y_eval = to_categorical(np.full((batch_size, 1), _label, dtype=int),
                                num_classes=num_classes)
        # y_fool is the duplicate label meant to fool the network and generate adversarial examples
        y_fool = to_categorical(np.full((y_train_labels.shape[0], 1),
                                        _label,
                                        dtype=int),
                                num_classes=num_classes)

        batch = get_iter_batch(X_test, y_fool, batch_size, num_iter)

        # initializing a 28 x 28 matrix for noise
        noise = np.zeros((1, 28, 28, 1))

        # new functional model to add noise and predict output using existing trained model
        input1 = Input(shape=(img_rows, img_cols, 1))
        input2 = Input(shape=(img_rows, img_cols, 1))
        sum_inp = keras.layers.add([input1, input2])
        op = model3(sum_inp)
        noise_model = Model(inputs=[input1, input2], outputs=op)

        # calculating gradient
        a_loss = K.categorical_crossentropy(noise_model.output, y_eval)
        grad = K.gradients(a_loss, noise_model.input[1])[0]
        grad = K.mean(normalize_tensor(grad), axis=0)

        # custom keras backend function that takes in two inputs and yields noise output,
        # loss and gradient
        custom_iterate = K.function([input1, input2],
                                    [noise_model.output, a_loss, grad])

        train_indices, train_loss, test_indices, test_loss, test_acc = [], [], [], [], []
        ctr = 0

        # Batch wise manual gradient descent for learning adversarial noise
        for _batch in batch:
            X_actual, y_actual = _batch
            output, loss, grads = custom_iterate([X_actual, noise])

            # Validating at specific intervals
            if (ctr % validation_interval == 0):
                noise_test = np.zeros(X_test.shape) + noise[0]
                preds_test = noise_model.predict([X_test, noise_test])
                _test_acc = float(
                    np.where(np.argmax(preds_test, axis=1) == _label)
                    [0].shape[0]) / float(preds_test.shape[0])
                _test_loss = np.mean(loss)
                test_indices.append(ctr)
                test_loss.append(_test_loss)
                test_acc.append(_test_acc)

            train_indices.append(ctr)
            train_loss.append(np.mean(loss))

            # Gradient update
            noise = noise - learning_rate * np.array(grads)

            line = (
                "Iteration " + str(ctr + 1).rjust(int(log10(num_iter) + 1)) +
                "/" + str(num_iter) +
                " complete.       Train Loss: %0.10f       " % np.mean(loss))
            print(line)
            ctr = ctr + 1

        noise_test = np.zeros(X_test.shape) + noise[0]
        preds = noise_model.predict([X_test, noise_test])
        print("Accuracy: " + str(
            float(np.where(np.argmax(preds, axis=1) == _label)[0].shape[0]) /
            float(preds.shape[0])))

        # Visualizing each of the generated noises
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.imshow(noise.reshape(28, 28), interpolation='nearest', cmap="gray")
        plt.savefig("../outputs/q2/visualizations/sample_" + str(_label) +
                    ".png")
        plt.close()

        # Plotting loss and accuracy evolution
        plot_learning_curve(
            [train_indices, test_indices], [train_loss, test_loss],
            colors=['c-', 'm-'],
            labels=['Train loss', 'Test loss'],
            title="Loss evolution for adversarial noise training",
            path="../outputs/q2/plots/train_test_loss_adversarial_noise_" +
            str(_label) + ".png",
            axlabels=["Iterations", "Loss"])
        plot_learning_curve(
            [test_indices], [test_acc],
            colors=['r-'],
            labels=['Test Accuracy'],
            title="Accuracy evolution for adversarial noise training",
            path="../outputs/q2/plots/test_acc_adversarial_noise_" +
            str(_label) + ".png",
            axlabels=["Iterations", "Accuracy"])

        # Predicting for a random set of 9 adversarial images
        ids = np.random.choice(X_test.shape[0], 9)
        X_samples = X_test[ids]
        noise_sample = np.zeros(X_samples.shape) + noise[0]
        pred_samples = noise_model.predict([X_samples, noise_sample])
        actual_samples = model3.predict(X_samples)
        generate_noisy_outputs(X_samples + noise_sample,
                               np.argmax(actual_samples, axis=1),
                               np.argmax(pred_samples, axis=1),
                               path="../outputs/q2/predictions/" + str(_label))
Example #14
0
    load_checkpoint = False 

    if load_checkpoint:
        agent.load_models()

    for i in range(n_games):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            if not load_checkpoint:
                agent.learn(observation, action, reward, observation_, done)
            observation = observation_
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if avg_score > best_score:
            best_score = avg_score
            if not load_checkpoint:
                agent.save_models()

        print('episode ', i, 'score %.1f' % score, 'avg_score %.1f' % avg_score)

    if not load_checkpoint:
        x = [i+1 for i in range(n_games)]
        plot_learning_curve(x, score_history, figure_file)

                  learning_rate_decay_interval=250,
                  l2_reg_lambda=0.005,
                  momentum=0.9)
    model_1.train(X_train,
                  y_train,
                  batch_size=64,
                  iterations=8000,
                  validation=True,
                  validation_data=(X_test, y_test),
                  validation_interval=200)
    model_1.eval(X_test, y_test)
    train_loss_sigmoid_1, test_loss_sigmoid_1 = model_1.loss_stats()
    plot_learning_curve(
        [train_loss_sigmoid_1[:, 0], test_loss_sigmoid_1[:, 0]],
        [train_loss_sigmoid_1[:, 1], test_loss_sigmoid_1[:, 1]],
        colors=['g-', 'b-'],
        labels=['Train loss', 'Test loss'],
        title="Loss evolution for Sigmoid activation; alpha = 1e-2",
        path="../outputs/train_test_loss_sigmoid_1e-2.png")

    print(
        "------------------------------------------------------------------------"
    )
    print("MLP with Sigmoid activation; learning rate=1e-3; No scheduling")
    print(
        "------------------------------------------------------------------------"
    )
    model_2 = MLP([784, 1000, 500, 250, 10],
                  activation_types=["sigmoid", "sigmoid", "sigmoid", "linear"],
                  learning_rate=1e-3,
                  learning_rate_decay=1.00,
def train_model(model, optimizer, criterion, train_data_loader, valid_data_loader, test_data_loader, opt):
    generator = SequenceGenerator(model,
                                  eos_id=opt.word2id[pykp.io.EOS_WORD],
                                  beam_size=opt.beam_size,
                                  max_sequence_length=opt.max_sent_length
                                  )

    logging.info('======================  Checking GPU Availability  =========================')
    if torch.cuda.is_available():
        if isinstance(opt.gpuid, int):
            opt.gpuid = [opt.gpuid]
        logging.info('Running on GPU! devices=%s' % str(opt.gpuid))
        # model = nn.DataParallel(model, device_ids=opt.gpuid)
    else:
        logging.info('Running on CPU!')

    logging.info('======================  Start Training  =========================')

    checkpoint_names        = []
    train_history_losses    = []
    valid_history_losses    = []
    test_history_losses     = []
    # best_loss = sys.float_info.max # for normal training/testing loss (likelihood)
    best_loss               = 0.0 # for f-score
    stop_increasing         = 0

    train_losses = []
    total_batch = 0
    early_stop_flag = False

    if opt.train_from:
        state_path = opt.train_from.replace('.model', '.state')
        logging.info('Loading training state from: %s' % state_path)
        if os.path.exists(state_path):
            (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses,
                        test_history_losses) = torch.load(open(state_path, 'rb'))
            opt.start_epoch = epoch

    for epoch in range(opt.start_epoch , opt.epochs):
        if early_stop_flag:
            break

        progbar = Progbar(title='Training', target=len(train_data_loader), batch_size=train_data_loader.batch_size,
                          total_examples=len(train_data_loader.dataset))

        for batch_i, batch in enumerate(train_data_loader):
            model.train()
            batch_i += 1 # for the aesthetics of printing
            total_batch += 1
            one2many_batch, one2one_batch = batch
            src, trg, trg_target, trg_copy_target, src_ext, oov_lists = one2one_batch
            max_oov_number = max([len(oov) for oov in oov_lists])

            print("src size - ",src.size())
            print("target size - ",trg.size())

            if torch.cuda.is_available():
                src = src.cuda()
                trg = trg.cuda()
                trg_target = trg_target.cuda()
                trg_copy_target = trg_copy_target.cuda()
                src_ext = src_ext.cuda()

            optimizer.zero_grad()

            '''
            Training with Maximum Likelihood (word-level error)
            '''
            decoder_log_probs, _, _ = model.forward(src, trg, src_ext, oov_lists)

            # simply average losses of all the predicitons
            # IMPORTANT, must use logits instead of probs to compute the loss, otherwise it's super super slow at the beginning (grads of probs are small)!
            start_time = time.time()

            if not opt.copy_model:
                ml_loss = criterion(
                    decoder_log_probs.contiguous().view(-1, opt.vocab_size),
                    trg_target.contiguous().view(-1)
                )
            else:
                ml_loss = criterion(
                    decoder_log_probs.contiguous().view(-1, opt.vocab_size + max_oov_number),
                    trg_copy_target.contiguous().view(-1)
                )

            '''
            Training with Reinforcement Learning (instance-level reward f-score)
            '''
            src_list, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch

            if torch.cuda.is_available():
                src_list = src_list.cuda()
                src_oov_map_list = src_oov_map_list.cuda()
            rl_loss = get_loss_rl()

            start_time = time.time()
            ml_loss.backward()
            print("--backward- %s seconds ---" % (time.time() - start_time))

            if opt.max_grad_norm > 0:
                pre_norm = torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm)
                after_norm = (sum([p.grad.data.norm(2) ** 2 for p in model.parameters() if p.grad is not None])) ** (1.0 / 2)
                logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm))

            optimizer.step()

            train_losses.append(ml_loss.data[0])

            progbar.update(epoch, batch_i, [('train_loss', ml_loss.data[0]), ('PPL', ml_loss.data[0])])

            if batch_i > 1 and batch_i % opt.report_every == 0:
                logging.info('======================  %d  =========================' % (batch_i))

                logging.info('Epoch : %d Minibatch : %d, Loss=%.5f' % (epoch, batch_i, np.mean(ml_loss.data[0])))
                sampled_size = 2
                logging.info('Printing predictions on %d sampled examples by greedy search' % sampled_size)

                if torch.cuda.is_available():
                    src                 = src.data.cpu().numpy()
                    decoder_log_probs   = decoder_log_probs.data.cpu().numpy()
                    max_words_pred      = decoder_log_probs.argmax(axis=-1)
                    trg_target          = trg_target.data.cpu().numpy()
                    trg_copy_target     = trg_copy_target.data.cpu().numpy()
                else:
                    src                 = src.data.numpy()
                    decoder_log_probs   = decoder_log_probs.data.numpy()
                    max_words_pred      = decoder_log_probs.argmax(axis=-1)
                    trg_target          = trg_target.data.numpy()
                    trg_copy_target     = trg_copy_target.data.numpy()

                sampled_trg_idx     = np.random.random_integers(low=0, high=len(trg) - 1, size=sampled_size)
                src                 = src[sampled_trg_idx]
                oov_lists           = [oov_lists[i] for i in sampled_trg_idx]
                max_words_pred      = [max_words_pred[i] for i in sampled_trg_idx]
                decoder_log_probs   = decoder_log_probs[sampled_trg_idx]
                if not opt.copy_model:
                    trg_target      = [trg_target[i] for i in sampled_trg_idx] # use the real target trg_loss (the starting <BOS> has been removed and contains oov ground-truth)
                else:
                    trg_target      = [trg_copy_target[i] for i in sampled_trg_idx]

                for i, (src_wi, pred_wi, trg_i, oov_i) in enumerate(zip(src, max_words_pred, trg_target, oov_lists)):
                    nll_prob = -np.sum([decoder_log_probs[i][l][pred_wi[l]] for l in range(len(trg_i))])
                    find_copy       = np.any([x >= opt.vocab_size for x in src_wi])
                    has_copy        = np.any([x >= opt.vocab_size for x in trg_i])

                    sentence_source = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in src_wi]
                    sentence_pred   = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in pred_wi]
                    sentence_real   = [opt.id2word[x] if x < opt.vocab_size else oov_i[x-opt.vocab_size] for x in trg_i]

                    sentence_source = sentence_source[:sentence_source.index('<pad>')] if '<pad>' in sentence_source else sentence_source
                    sentence_pred   = sentence_pred[:sentence_pred.index('<pad>')] if '<pad>' in sentence_pred else sentence_pred
                    sentence_real   = sentence_real[:sentence_real.index('<pad>')] if '<pad>' in sentence_real else sentence_real

                    logging.info('==================================================')
                    logging.info('Source: %s '          % (' '.join(sentence_source)))
                    logging.info('\t\tPred : %s (%.4f)' % (' '.join(sentence_pred), nll_prob) + (' [FIND COPY]' if find_copy else ''))
                    logging.info('\t\tReal : %s '       % (' '.join(sentence_real)) + (' [HAS COPY]' + str(trg_i) if has_copy else ''))

            if total_batch > 1 and total_batch % opt.run_valid_every == 0:
                logging.info('*' * 50)
                logging.info('Run validing and testing @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch))
                # valid_losses    = _valid_error(valid_data_loader, model, criterion, epoch, opt)
                # valid_history_losses.append(valid_losses)
                valid_score_dict  = evaluate_beam_search(generator, valid_data_loader, opt, title='valid', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch))
                test_score_dict   = evaluate_beam_search(generator, test_data_loader, opt, title='test', epoch=epoch, save_path=opt.exp_path + '/epoch%d_batch%d_total_batch%d' % (epoch, batch_i, total_batch))

                checkpoint_names.append('epoch=%d-batch=%d-total_batch=%d' % (epoch, batch_i, total_batch))
                train_history_losses.append(copy.copy(train_losses))
                valid_history_losses.append(valid_score_dict)
                test_history_losses.append(test_score_dict)
                train_losses = []

                scores = [train_history_losses]
                curve_names = ['Training Error']
                scores += [[result_dict[name] for result_dict in valid_history_losses] for name in opt.report_score_names]
                curve_names += ['Valid-'+name for name in opt.report_score_names]
                scores += [[result_dict[name] for result_dict in test_history_losses] for name in opt.report_score_names]
                curve_names += ['Test-'+name for name in opt.report_score_names]

                scores = [np.asarray(s) for s in scores]
                # Plot the learning curve
                plot_learning_curve(scores=scores,
                                    curve_names=curve_names,
                                    checkpoint_names=checkpoint_names,
                                    title='Training Validation & Test',
                                    save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_test_curve.png' % (epoch, batch_i, total_batch))

                '''
                determine if early stop training (whether f-score increased, before is if valid error decreased)
                '''
                valid_loss      = np.average(valid_history_losses[-1][opt.report_score_names[0]])
                is_best_loss    = valid_loss > best_loss
                rate_of_change  = float(valid_loss - best_loss) / float(best_loss) if float(best_loss) > 0 else 0.0

                # valid error doesn't increase
                if rate_of_change <= 0:
                    stop_increasing += 1
                else:
                    stop_increasing = 0

                if is_best_loss:
                    logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        best_loss, valid_loss, rate_of_change * 100))
                else:
                    logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        stop_increasing, best_loss, valid_loss, rate_of_change * 100))

                best_loss = max(valid_loss, best_loss)

                # only store the checkpoints that make better validation performances
                if total_batch > 1 and (total_batch % opt.save_model_every == 0 or is_best_loss): #epoch >= opt.start_checkpoint_at and
                    # Save the checkpoint
                    logging.info('Saving checkpoint to: %s' % os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model'))
                    torch.save(
                        model.state_dict(),
                        open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb')
                    )
                    torch.save(
                        (epoch, total_batch, best_loss, stop_increasing, checkpoint_names, train_history_losses, valid_history_losses, test_history_losses),
                        open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.state'), 'wb')
                    )

                if stop_increasing >= opt.early_stop_tolerance:
                    logging.info('Have not increased for %d epoches, early stop training' % stop_increasing)
                    early_stop_flag = True
                    break
                logging.info('*' * 50)
Example #17
0
def knn_diabetes():
    # 加载数据集
    data = pd.read_csv('./pima-indians-diabetes/diabetes.csv')
    # print('dataset shape {}'.format(data.shape))
    # print(data.head())
    # print(data.groupby('Outcome').size())

    # 处理数据集
    X = data.iloc[:, :8]
    Y = data.iloc[:, 8]
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
    # print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
    '''
    # 模型比较
    models = []
    models.append(("KNN", KNeighborsClassifier(n_neighbors=2)))
    models.append(("KNN with weights", KNeighborsClassifier(
        n_neighbors=2, weights="distance")))
    models.append(("Radius Neighbors", RadiusNeighborsClassifier(
        n_neighbors=2, radius=500.0)))
    results = []
    for name, model in models:
        kfold = KFold(n_splits=10)
        cv_result = cross_val_score(model, X, Y, cv=kfold)
        results.append((name, cv_result))
    for i in range(len(results)):
        print("name: {}; cross val score: {}".format(
            results[i][0], results[i][1].mean()))
    '''
    # 模型训练及分析
    # knn = KNeighborsClassifier(n_neighbors=2)
    # knn.fit(X_train, Y_train)
    # train_score = knn.score(X_train, Y_train)
    # test_score = knn.score(X_test, Y_test)
    # print("train score: {}\ntest score: {}".format(train_score, test_score))
    knn = KNeighborsClassifier(n_neighbors=2)
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
    plt.figure(figsize=(10, 6))
    plot_learning_curve(plt,
                        knn,
                        "Learn Curve for KNN Diabetes",
                        X,
                        Y,
                        ylim=(0.0, 1.01),
                        cv=cv)
    plt.show()

    # 特征选择及数据可视化
    selector = SelectKBest(k=2)
    X_new = selector.fit_transform(X, Y)
    print('X_new.shape {}'.format(X_new.shape))
    plt.figure(figsize=(10, 6))
    plt.ylabel("BMI")
    plt.xlabel("Glucose")
    plt.scatter(X_new[Y == 0][:, 0],
                X_new[Y == 0][:, 1],
                c='r',
                s=20,
                marker='o')
    #画出样本
    plt.scatter(X_new[Y == 1][:, 0],
                X_new[Y == 1][:, 1],
                c='g',
                s=20,
                marker='^')
    #画出样本
    plt.show()
Example #18
0
    # env_name=Env(),  # we are using a tiny environment here for testing
)

NUM_OF_PLAYERS = 1

config = {
    'num_players': NUM_OF_PLAYERS,
    'log_filename': './logs/policy_agent.log',
    'static_drawpile': False,
}
logging.basicConfig(filename=config['log_filename'],
                    filemode='w',
                    level=logging.INFO)
env = Env(config)

rollouts, dueling_dqn = instantiate(params)
trainer = Trainer()
rewards, deck_ends = trainer.train(env, rollouts, dueling_dqn, params)
print("Training completed!")

torch.save(dueling_dqn.Q.state_dict(), './models/duelingDQN.pt')

evaluations = []
num_iter = 50
for i in range(num_iter):  # lets play 50 games
    env.run_PG(dueling_dqn)
    evaluations.append(env.get_num_cards_in_drawpile())
print('GAME OVER!')
plot_learning_curve(deck_ends, params.num_updates)
plot_testing(evaluations, num_iter)
Example #19
0
    x_train_pca, x_test_pca = utils.pca(x_train, x_test,
                                        featrue_preserve_radio)
    '''x_train_pca, x_test_pca = utils.pca_with_model(pca_model_name='pca_model.sav',
                                                   scaler_model_name='scaler_model.sav',
                                                   x_train=x_train, x_test=x_test)'''
    clf = SVC(kernel='rbf', gamma=0.0005, C=1)
    tic = time.time()
    clf.fit(x_train_pca, y_train)
    toc = time.time()
    score = clf.score(x_test_pca, y_test)
    print("train time: " + str(1000 * (toc - tic)) + "ms")
    print("score: {:.6f}".format(score))

    y_pred = clf.predict(x_test_pca)
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred,
                                target_names=utils.label_names))

    # save model
    pickle.dump(clf, open('svm_model.sav', 'wb'))

    # plot learning curve
    train_sizes, train_scores, test_scores = learning_curve(
        estimator=clf,
        X=x_train_pca,
        y=y_train,
        train_sizes=np.linspace(0.1, 1.0, 10),
        cv=4,
        n_jobs=4)
    utils.plot_learning_curve(train_sizes, train_scores, test_scores)
Example #20
0
    def train(self,
              save_dir='./tmp',
              transfer_dir=None,
              details=False,
              verbose=True,
              show_each_step=False,
              show_percentage=True,
              **kwargs):
        train_size = self.train_set.num_examples
        num_steps_per_epoch = np.ceil(train_size / self.batch_size).astype(int)
        self.steps_per_epoch = num_steps_per_epoch
        num_steps = num_steps_per_epoch * self.num_epochs
        self.total_steps = num_steps

        validation_frequency = kwargs.get('validation_frequency', None)
        summary_frequency = kwargs.get('summary_frequency', None)
        if validation_frequency is None:
            validation_frequency = num_steps_per_epoch
        if summary_frequency is None:
            summary_frequency = num_steps_per_epoch

        num_validations = num_steps // validation_frequency
        last_val_iter = num_validations * validation_frequency

        if transfer_dir is not None:  # Transfer learning setup
            model_to_load = kwargs.get('model_to_load', None)
            blocks_to_load = kwargs.get('blocks_to_load', None)
            load_moving_average = kwargs.get('load_moving_average', False)
            start_epoch = kwargs.get('start_epoch', 0)
            start_step = num_steps_per_epoch * start_epoch

            if not os.path.isdir(transfer_dir):
                ckpt_to_load = transfer_dir
            elif model_to_load is None:  # Find a model to be transferred
                ckpt_to_load = tf.train.latest_checkpoint(transfer_dir)
            elif isinstance(model_to_load, str):
                ckpt_to_load = os.path.join(transfer_dir, model_to_load)
            else:
                fp = open(os.path.join(transfer_dir, 'checkpoints.txt'), 'r')
                ckpt_list = fp.readlines()
                fp.close()
                ckpt_to_load = os.path.join(transfer_dir,
                                            ckpt_list[model_to_load].rstrip())

            reader = pywrap_tensorflow.NewCheckpointReader(
                ckpt_to_load)  # Find variables to be transferred
            var_to_shape_map = reader.get_variable_to_shape_map()
            var_names = [var for var in var_to_shape_map.keys()]

            var_list = []
            if blocks_to_load is None:
                for blk in self.model.block_list:
                    var_list += self.model.get_collection(
                        'block_{}/variables'.format(blk))
                    var_list += self.model.get_collection(
                        'block_{}/ema_variables'.format(blk))
            else:
                for blk in blocks_to_load:
                    var_list += self.model.get_collection(
                        'block_{}/variables'.format(blk))
                    var_list += self.model.get_collection(
                        'block_{}/ema_variables'.format(blk))

            variables_not_loaded = []
            if load_moving_average:
                variables = {}
                for var in var_list:
                    var_name = var.name.rstrip(':0')
                    ema_name = var.name.rstrip(
                        ':0') + '/ExponentialMovingAverage'
                    if ema_name in var_to_shape_map:
                        if var.get_shape() == var_to_shape_map[ema_name]:
                            variables[ema_name] = var
                            if var_name in var_names:
                                var_names.remove(ema_name)
                        else:
                            print('<{}> was not loaded due to shape mismatch'.
                                  format(var_name))
                            variables_not_loaded.append(var_name)
                    elif var_name in var_to_shape_map:
                        if var.get_shape() == var_to_shape_map[var_name]:
                            variables[var_name] = var
                            if var_name in var_names:
                                var_names.remove(var_name)
                        else:
                            print('<{}> was not loaded due to shape mismatch'.
                                  format(var_name))
                            variables_not_loaded.append(var_name)
                    else:
                        variables_not_loaded.append(var_name)
            else:
                variables = []
                for var in var_list:
                    var_name = var.name.rstrip(':0')
                    if var_name in var_to_shape_map:
                        if var.get_shape() == var_to_shape_map[var_name]:
                            variables.append(var)
                            var_names.remove(var_name)
                        else:
                            print('<{}> was not loaded due to shape mismatch'.
                                  format(var_name))
                            variables_not_loaded.append(var_name)
                    else:
                        variables_not_loaded.append(var_name)

            saver_transfer = tf.train.Saver(variables)

            self.model.session.run(tf.global_variables_initializer())
            saver_transfer.restore(self.model.session, ckpt_to_load)

            if verbose:
                print('')
                print(
                    'Variables have been initialized using the following checkpoint:'
                )
                print(ckpt_to_load)
                print(
                    'The following variables in the checkpoint were not used:')
                print(var_names)
                print(
                    'The following variables do not exist in the checkpoint, so they were initialized randomly:'
                )
                print(variables_not_loaded)
                print('')

            pkl_file = os.path.join(transfer_dir,
                                    'learning_curve-result-1.pkl')
            pkl_loaded = False
            if os.path.exists(pkl_file):
                train_steps = start_step if show_each_step else start_step // validation_frequency
                eval_steps = start_step // validation_frequency
                with open(pkl_file, 'rb') as fo:
                    prev_results = pkl.load(fo)
                prev_results[0] = prev_results[0][:train_steps]
                prev_results[1] = prev_results[1][:train_steps]
                prev_results[2] = prev_results[2][:eval_steps]
                prev_results[3] = prev_results[3][:eval_steps]
                train_len = len(prev_results[0])
                eval_len = len(prev_results[2])
                if train_len == train_steps and eval_len == eval_steps:
                    train_losses, train_scores, eval_losses, eval_scores = prev_results
                    pkl_loaded = True
                else:
                    train_losses, train_scores, eval_losses, eval_scores = [], [], [], []
            else:
                train_losses, train_scores, eval_losses, eval_scores = [], [], [], []
        else:
            start_epoch = 0
            start_step = 0
            self.model.session.run(tf.global_variables_initializer())
            train_losses, train_scores, eval_losses, eval_scores = [], [], [], []
            pkl_loaded = False

        max_to_keep = kwargs.get('max_to_keep', 5)
        log_trace = kwargs.get('log_trace', False)
        saver = tf.train.Saver(max_to_keep=max_to_keep)
        saver.export_meta_graph(
            filename=os.path.join(save_dir, 'model.ckpt.meta'))

        kwargs[
            'monte_carlo'] = False  # Turn off monte carlo dropout for validation

        with tf.device('/cpu:{}'.format(self.model.cpu_offset)):
            with tf.variable_scope('summaries'):  # TensorBoard summaries
                tf.summary.scalar('Loss', self.model.loss)
                tf.summary.scalar('Learning Rate', self.learning_rate)
                for i, val in enumerate(self.model.debug_values):
                    tf.summary.scalar('Debug_{}-{}'.format(i, val.name), val)

                tf.summary.image('Input Images',
                                 tf.cast(self.model.input_images * 255,
                                         dtype=tf.uint8),
                                 max_outputs=4)
                tf.summary.image('Augmented Input Images',
                                 tf.cast(self.model.X_all * 255,
                                         dtype=tf.uint8),
                                 max_outputs=4)
                for i, img in enumerate(self.model.debug_images):
                    tf.summary.image('Debug_{}-{}'.format(i, img.name),
                                     tf.cast(img * 255, dtype=tf.uint8),
                                     max_outputs=4)

                tf.summary.histogram('Image Histogram', self.model.X_all)
                for blk in self.model.block_list:
                    weights = self.model.get_collection(
                        'block_{}/weight_variables'.format(blk))
                    if len(weights) > 0:
                        tf.summary.histogram(
                            'Block {} Weight Histogram'.format(blk),
                            weights[0])
                weights = self.model.get_collection('weight_variables')
                with tf.variable_scope('weights_l1'):
                    weights_l1 = tf.math.accumulate_n(
                        [tf.reduce_sum(tf.math.abs(w)) for w in weights])
                    tf.summary.scalar('Weights L1 Norm', weights_l1)
                with tf.variable_scope('weights_l2'):
                    weights_l2 = tf.global_norm(weights)
                    tf.summary.scalar('Weights L2 Norm', weights_l2)
                tail_scores_5 = []
                tail_scores_1 = []
                with tf.variable_scope('weights_tail_score'):
                    for w in weights:
                        w_size = tf.size(w, out_type=tf.float32)
                        w_std = tf.math.reduce_std(w)
                        w_abs = tf.math.abs(w)
                        tail_threshold_5 = 1.96 * w_std
                        tail_threshold_1 = 2.58 * w_std
                        num_weights_5 = tf.math.reduce_sum(
                            tf.cast(tf.math.greater(w_abs, tail_threshold_5),
                                    dtype=tf.float32))
                        num_weights_1 = tf.math.reduce_sum(
                            tf.cast(tf.math.greater(w_abs, tail_threshold_1),
                                    dtype=tf.float32))
                        tail_scores_5.append(num_weights_5 / (0.05 * w_size))
                        tail_scores_1.append(num_weights_1 / (0.01 * w_size))
                    tail_score_5 = tf.math.accumulate_n(tail_scores_5) / len(
                        tail_scores_5)
                    tail_score_1 = tf.math.accumulate_n(tail_scores_1) / len(
                        tail_scores_1)
                    tf.summary.scalar('Weights Tail Score 5p', tail_score_5)
                    tf.summary.scalar('Weights Tail Score 1p', tail_score_1)
                with tf.variable_scope('gradients_l2'):
                    gradients_l2 = tf.global_norm(self.avg_grads)
                    tf.summary.scalar('Gradients L2 Norm', gradients_l2)
                merged = tf.summary.merge_all()
                train_writer = tf.summary.FileWriter(
                    os.path.join(save_dir, 'logs'), self.model.session.graph)

        train_results = dict()

        if verbose:
            print('Running training loop...')
            print('Batch size: {}'.format(self.batch_size))
            print('Number of epochs: {}'.format(self.num_epochs))
            print('Number of training iterations: {}'.format(num_steps))
            print('Number of iterations per epoch: {}'.format(
                num_steps_per_epoch))

        if show_each_step:
            step_losses, step_scores = [], []
        else:
            step_losses, step_scores = 0, 0
        eval_loss, eval_score = np.inf, 0
        annotations = []

        self.train_set.initialize(
            self.model.session)  # Initialize training iterator
        handles = self.train_set.get_string_handles(
            self.model.session)  # Get a string handle from training iterator
        # if self.val_set is not None:
        #     self.val_set.initialize(self.model.session)  # Initialize validation iterator
        with tf.variable_scope('calc/'):
            step_init_op = self.model.global_step.assign(
                start_step, name='init_global_step')
        self.model.session.run([step_init_op] + self.model.init_ops)
        tf.get_default_graph().finalize()

        # self._test_drive(save_dir=save_dir)  # Run test code

        self.curr_epoch += start_epoch
        self.curr_step += start_step
        step_loss, step_score = 0, 0
        start_time = time.time()
        for i in range(num_steps - start_step):  # Training iterations
            self._update_learning_rate()

            try:
                step_loss, step_Y_true, step_Y_pred = self._step(
                    handles,
                    merged=merged,
                    writer=train_writer,
                    summary=i % summary_frequency == 0,
                    log_trace=log_trace and i % summary_frequency == 1)
                step_score = self.evaluator.score(step_Y_true, step_Y_pred)
            except tf.errors.OutOfRangeError:
                if verbose:
                    remainder_size = train_size - (self.steps_per_epoch -
                                                   1) * self.batch_size
                    print(
                        'The last iteration ({} data) has been ignored'.format(
                            remainder_size))

            if show_each_step:
                step_losses.append(step_loss)
                step_scores.append(step_score)
            else:
                step_losses += step_loss
                step_scores += step_score
            self.curr_step += 1

            if (
                    i + 1
            ) % validation_frequency == 0:  # Validation every validation_frequency iterations
                if self.val_set is not None:
                    _, eval_Y_true, eval_Y_pred, eval_loss = self.model.predict(
                        self.val_set,
                        verbose=False,
                        return_images=False,
                        run_init_ops=False,
                        **kwargs)
                    eval_score = self.evaluator.score(eval_Y_true, eval_Y_pred)
                    eval_scores.append(eval_score)
                    eval_losses.append(eval_loss)

                    del eval_Y_true, eval_Y_pred

                    curr_score = eval_score

                    self.model.save_results(self.val_set,
                                            os.path.join(save_dir, 'results'),
                                            self.curr_epoch,
                                            max_examples=kwargs.get(
                                                'num_examples_to_save', None),
                                            **kwargs)
                else:
                    curr_score = np.mean(
                        step_scores
                    ) if show_each_step else step_scores / validation_frequency

                if self.evaluator.is_better(curr_score, self.best_score,
                                            **kwargs):  # Save best model
                    self.best_score = curr_score
                    saver.save(self.model.session,
                               os.path.join(save_dir, 'model.ckpt'),
                               global_step=self.model.global_step,
                               write_meta_graph=False)

                    if show_each_step:
                        annotations.append((self.curr_step, curr_score))
                    else:
                        annotations.append(
                            (self.curr_step // validation_frequency,
                             curr_score))
                    annotations = annotations[-max_to_keep:]
                elif self.curr_step == last_val_iter:  # Save latest model
                    saver.save(self.model.session,
                               os.path.join(save_dir, 'model.ckpt'),
                               global_step=self.model.global_step,
                               write_meta_graph=False)

                    if show_each_step:
                        annotations.append((self.curr_step, curr_score))
                    else:
                        annotations.append(
                            (self.curr_step // validation_frequency,
                             curr_score))
                    annotations = annotations[-max_to_keep:]

                ckpt_list = saver.last_checkpoints[::-1]
                fp = open(os.path.join(save_dir, 'checkpoints.txt'), 'w')
                for fname in ckpt_list:
                    fp.write(fname.split(os.sep)[-1] + '\n')
                fp.close()

                if show_each_step:
                    train_losses += step_losses
                    train_scores += step_scores
                    step_losses, step_scores = [], []
                else:
                    step_loss = step_losses / validation_frequency
                    step_score = step_scores / validation_frequency
                    train_losses.append(step_loss)
                    train_scores.append(step_score)
                    step_losses, step_scores = 0, 0

            if (
                    i + 1
            ) % num_steps_per_epoch == 0:  # Print and plot results every epoch
                self.train_set.initialize(
                    self.model.session
                )  # Initialize training iterator every epoch
                if show_each_step:
                    val_freq = validation_frequency
                    start = 0 if pkl_loaded else start_step
                else:
                    val_freq = 1
                    start = 0 if pkl_loaded else start_epoch
                if self.val_set is not None:
                    if verbose:
                        if show_percentage:
                            print(
                                '[epoch {}/{}]\tTrain loss: {:.5f}  |Train score: {:2.3%}  '
                                '|Eval loss: {:.5f}  |Eval score: {:2.3%}  |LR: {:.7f}  '
                                '|Elapsed time: {:5.0f} sec'.format(
                                    self.curr_epoch, self.num_epochs,
                                    step_loss, step_score, eval_loss,
                                    eval_score, self.init_learning_rate *
                                    self.curr_multiplier,
                                    time.time() - start_time))
                        else:
                            print(
                                '[epoch {}/{}]\tTrain loss: {:.5f}  |Train score: {:.5f}  '
                                '|Eval loss: {:.5f}  |Eval score: {:.5f}  |LR: {:.7f}  '
                                '|Elapsed time: {:5.0f} sec'.format(
                                    self.curr_epoch, self.num_epochs,
                                    step_loss, step_score, eval_loss,
                                    eval_score, self.init_learning_rate *
                                    self.curr_multiplier,
                                    time.time() - start_time))
                    if len(eval_losses) > 0:
                        if self.model.num_classes is None:
                            loss_thres = min(eval_losses) * 2
                        else:
                            if self.model.num_classes > 1:
                                loss_thres = max([
                                    2 * np.log(self.model.num_classes),
                                    min(eval_losses) * 2
                                ])
                            else:
                                loss_thres = min(eval_losses) * 2
                        plot_learning_curve(train_losses,
                                            train_scores,
                                            eval_losses=eval_losses,
                                            eval_scores=eval_scores,
                                            name=self.evaluator.name,
                                            loss_threshold=loss_thres,
                                            mode=self.evaluator.mode,
                                            img_dir=save_dir,
                                            annotations=annotations,
                                            start_step=start,
                                            validation_frequency=val_freq)

                else:
                    if verbose:
                        if show_percentage:
                            print(
                                '[epoch {}/{}]\tTrain loss: {:.5f}  |Train score: {:2.3%}  |LR: {:.7f}  '
                                '|Elapsed time: {:5.0f} sec'.format(
                                    self.curr_epoch, self.num_epochs,
                                    step_loss, step_score,
                                    self.init_learning_rate *
                                    self.curr_multiplier,
                                    time.time() - start_time))
                        else:
                            print(
                                '[epoch {}/{}]\tTrain loss: {:.5f}  |Train score: {:.5f}  |LR: {:.7f}  '
                                '|Elapsed time: {:5.0f} sec'.format(
                                    self.curr_epoch, self.num_epochs,
                                    step_loss, step_score,
                                    self.init_learning_rate *
                                    self.curr_multiplier,
                                    time.time() - start_time))
                    if self.model.num_classes is None:
                        loss_thres = min(train_losses) * 2
                    else:
                        if self.model.num_classes > 1:
                            loss_thres = max([
                                2 * np.log(self.model.num_classes),
                                min(train_losses) * 2
                            ])
                        else:
                            loss_thres = min(train_losses) * 2
                    plot_learning_curve(train_losses,
                                        train_scores,
                                        eval_losses=None,
                                        eval_scores=None,
                                        name=self.evaluator.name,
                                        loss_threshold=loss_thres,
                                        mode=self.evaluator.mode,
                                        img_dir=save_dir,
                                        annotations=annotations,
                                        start_step=start,
                                        validation_frequency=val_freq)

                self.curr_epoch += 1
                plt.close()

        train_writer.close()
        if verbose:
            print('Total training time: {:.2f} sec'.format(time.time() -
                                                           start_time))
            print('Best {} {}: {:.4f}'.format(
                'evaluation' if self.val_set is not None else 'training',
                self.evaluator.name, self.best_score))

        print('Done.')

        if details:
            train_results['step_losses'] = step_losses
            train_results['step_scores'] = step_scores
            if self.val_set is not None:
                train_results['eval_losses'] = eval_losses
                train_results['eval_scores'] = eval_scores

            return train_results
Example #21
0
## HEART - SEP. TARGET AND FEATURES
heart_x = heart.iloc[:,:-1].values
heart_y = heart.iloc[:,-1].values

## ABALONE - SEP. TARGET AND FEATURES
abalone_x = abalone.drop('target',axis=1).values
scaler = StandardScaler()
abalone_x = scaler.fit_transform(abalone_x)
abalone_y = abalone['target'].values
data_all = [(abalone_x, abalone_y), (heart_x, heart_y)]

plot_learning_curve(estimator=abalone_dt, 
                    title='DT Learning Curve - Abalone', 
                    X = abalone_x, 
                    y = abalone_y, 
                    cv = 3, 
                    train_sizes = np.linspace(0.15, 1.0, 5),
                    n_jobs = -1,
                    save_as = 'abalone_dt_lc',
                    scoring = 'roc_auc')

plot_learning_curve(estimator = heart_dt, 
                    title = 'DT Learning Curve - Heart',
                    X = heart_x, 
                    y = heart_y, 
                    train_sizes = np.linspace(0.15, 1.0, 5),
                    cv = 3, 
                    n_jobs = -1, 
                    save_as = 'heart_dt_lc',
                    scoring = 'roc_auc'
                   )
    def train(self,
              n_episodes=500,
              annealing_episodes=450,
              every_episode=10,
              **kwargs):
        if self.training is False:
            raise Exception(
                'prohibited to call train() for a non-training model')

        reward_history = [0.0]
        reward_averaged = []
        lr = self.lr
        eps = self.epsilon
        annealing_episodes = annealing_episodes or n_episodes
        eps_drop = (self.epsilon - self.epsilon_final) / annealing_episodes
        print "eps_drop: {}".format(eps_drop)
        step = 0

        # calling the property method of BaseTFModel to start a session
        self.sess.run(self.init_vars)
        self.__init_target_q_net()

        for n_episode in range(n_episodes):
            ob = self.env.reset()
            done = False
            traj = []
            reward = 0.
            while not done:
                a = self.act(ob, eps)
                assert a >= 0
                new_ob, r, done, _ = self.env.step(a)
                step += 1
                reward += r
                traj.append(Transition(ob, a, r, new_ob, done))
                ob = new_ob

                # No enough samples in the buffer yet.
                if self.memory.size < self.batch_size:
                    continue
                # Training with a mini batch of samples
                batch_data = self.memory.sample(self.batch_size)
                feed_dict = {
                    self.learning_rate: lr,
                    self.states: batch_data['s'],
                    self.actions: batch_data['a'],
                    self.rewards: batch_data['r'],
                    self.states_next: batch_data['s_next'],
                    self.done_flags: batch_data['done']
                }

                if self.double_q:
                    actions_next = self.sess.run(
                        self.actions_selected_by_q,
                        {self.states: batch_data['s_next']})
                    feed_dict.update({self.actions_next: actions_next})

                _, q_val, q_target_val, loss, summ_str = self.sess.run(
                    [
                        self.optimizer, self.q, self.q_target, self.loss,
                        self.merged_summary
                    ],
                    feed_dict=feed_dict)
                self.writer.add_summary(summ_str, step)

                # update the target q net if necessary
                self.__update_target_q_net(step)

            self.memory.add(traj)
            reward_history.append(reward)
            reward_averaged.append(np.mean(reward_history[-10:]))

            # Annealing the learning and exploration rate after every episode
            lr *= self.lr_decay
            if eps > self.epsilon_final:
                eps -= eps_drop

            if reward_history and every_episode and n_episode % every_episode == 0:
                print "[episodes: {}/step: {}], best: {}, avg: {:.2f}:{}, lr: {:.4f}, eps: {:.4f}".format(
                    n_episode, step, np.max(reward_history),
                    np.mean(reward_history[-10:]), reward_history[-5:], lr,
                    eps)

        self.save_model(step=step)
        print "[training completed] episodes: {}, Max reward: {}, Average reward: {}".format(
            len(reward_history), np.max(reward_history),
            np.mean(reward_history))

        fig_path = os.path.join(self.model_path, 'figs')
        makedirs(fig_path)
        fig_file = os.path.join(
            fig_path, '{n}-{t}.png'.format(n=self.name, t=int(time.time())))
        plot_learning_curve(fig_file, {
            'reward': reward_history,
            'reward_avg': reward_averaged
        },
                            xlabel='episode')
Example #23
0
    def work(self, sess, n_episodes=2000, plot_learning_curve=False):
        try:
            self.final_returns
        except AttributeError:
            self.reset_network(sess)

        with tf.variable_scope(
                self.name), sess.as_default(), sess.graph.as_default():
            for episode in range(n_episodes):
                last_state = self.env.reset()

                # plot the learning curve
                if plot_learning_curve:
                    if episode % self.plot_learning_curve_freq == 0:
                        utils.plot_learning_curve(
                            self.final_returns, self.plot_learning_curve_freq)

                for i in range(1000):
                    self.env.render()

                    # get curreny epsilon
                    if self.learning_step_counter < self.n_exploration_steps:
                        epsilon = self.exploration_eps[
                            self.learning_step_counter]
                    else:
                        epsilon = self.epsilon_final

                    # choose an action according to epsilon-greedy policy
                    random_helper = np.random.rand()
                    if random_helper >= epsilon:
                        q_vals = sess.run(self.dqn.q_eval,
                                          feed_dict={self.dqn.s: [last_state]})
                        action = np.argmax(q_vals)
                    else:
                        action = self.env.action_space.sample()

                    # get and store the transition
                    next_state, reward, done, info = self.env.step(action)

                    self.replay_memory.push(last_state, action, next_state,
                                            reward, done)

                    states, actions, next_states, rewards, dones = self.replay_memory.sample(
                        self.minibatch_size)

                    sess.run(self.dqn.update_gradient_op,
                             feed_dict={
                                 self.dqn.s: states,
                                 self.dqn.s_next: next_states,
                                 self.dqn.a: actions,
                                 self.dqn.r: rewards,
                                 self.dqn.done: dones
                             })

                    self.learning_step_counter += 1

                    if self.learning_step_counter % self.target_net_update_freq == 0:
                        sess.run(self.dqn.target_replace_op)

                    if done:
                        self.final_returns.append(i)
                        break
                    else:
                        last_state = next_state
Example #24
0
    n_games = 2000
    env = gym.make('LunarLander-v2')
    agent = ACAgent(input_dims=[8],
                    action_size=4,
                    lr=5e-6,
                    gamma=0.99,
                    fc1Dm=2048,
                    fc2Dm=1536)
    fname = 'Actor_cretic_' + 'Lunar_Lander_'+ str(agent.fc1Dm)+ 'fc1_dims_' + \
       str(agent.fc2Dm) + 'fc2Dm_lr'+ str(agent.lr)+ '_' + str(n_games) + 'games'
    figure_file = 'plots/' + fname + '.png'
    scores = []
    for episode in range(n_games):
        state = env.reset()
        score = 0
        done = False
        while not done:
            action = agent.choose_action(state)
            new_state, reward, done, info = env.step(action)
            agent.learn(state, reward, new_state, done)
            score += reward
            state = new_state

        scores.append(score)
        avg_score = np.mean(scores[-100:])
        print('episode ', episode, 'score %.2f' % score,
              'avg_score %.2f' % avg_score)

    x = [i + 1 for i in range(len(scores))]
    plot_learning_curve(scores, x, figure_file)
Example #25
0
                  input_dims=[8],
                  n_actions=4,
                  fc1_dims=2048,
                  fc2_dims=1536)
    n_games = 3000

    fname = 'ACTOR_CRITIC_' + 'lunar_lander_' + str(agent.fc1_dims) + \
            '_fc1_dims_' + str(agent.fc2_dims) + '_fc2_dims_lr' + str(agent.lr) +\
            '_' + str(n_games) + 'games'
    figure_file = 'plots/' + fname + '.png'

    scores = []
    for i in range(n_games):
        done = False
        observation = env.reset()
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.learn(observation, reward, observation_, done)
            observation = observation_
        scores.append(score)

        avg_score = np.mean(scores[-100:])
        print('episode ', i, 'score %.1f' % score,
              'average score %.1f' % avg_score)

    x = [i + 1 for i in range(n_games)]
    plot_learning_curve(x, scores, figure_file)
Example #26
0
setproctitle("(hwijeen) word drop")
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%m/%d/%Y %H:%M:%S',
    level=logging.INFO)
logger = logging.getLogger(__name__)

if __name__ == "__main__":
    DATA_DIR = '/home/nlpgpu5/hwijeen/MulDocSumm/data/'
    FILE = 'rottentomatoes_prepared'
    DEVICE = torch.device('cuda:1')
    EPOCH = 50
    WORD_DROP = 0.2

    data = MulSumData(DATA_DIR, FILE, 99, DEVICE)
    selfattnCVAE = build_SelfAttnCVAE(len(data.vocab),
                                      hidden_dim=600,
                                      latent_dim=300,
                                      enc_bidirectional=True,
                                      word_drop=WORD_DROP,
                                      device=DEVICE)
    trainer = Trainer(selfattnCVAE,
                      data,
                      lr=0.001,
                      to_record=['recon_loss', 'kl_loss'])
    results = trainer.train(num_epoch=EPOCH, verbose=True)

    plot_learning_curve(results['train_losses'], results['valid_losses'])
    plot_metrics(results['train_metrics'], results['valid_metrics'])
    plot_kl_loss(trainer.stats.stats['kl_loss'])
Example #27
0
                  'best score %.4f' % best_score,
                  'epsilon %.4f' % agent.epsilon, 'game steps', n_game_steps,
                  'steps', n_steps)

            if avg_score > best_score:
                if not args.load_checkpoint:
                    agent.save_models()
                best_score = avg_score
        else:
            print(
                'episode: ', i, 'score: ', score,
                ' average score ** best score ** epsilon %.4f' % agent.epsilon,
                'game steps', n_game_steps, 'steps', n_steps)

        eps_history.append(agent.epsilon)
        # if args.load_checkpoint and n_steps >= 18000:
        #    break

        if save_data:
            print('saving data...')
            save_data = False
            x = [i + 1 for i in range(len(scores))]
            plot_learning_curve(steps_array, scores, eps_history,
                                figure_file + '_step_' + str(n_steps) + '.png')
            np.save(scores_file + '_step_' + str(n_steps) + '.npy',
                    np.array(scores))
    x = [i + 1 for i in range(len(scores))]
    plot_learning_curve(steps_array, scores, eps_history,
                        figure_file + '_step_final.png')
    np.save(scores_file + '_step_final.npy', np.array(scores))
Example #28
0
# utils.plot_learning_curve(best_svm, 'best svm accuracy score',
# 						  tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=accuracy_scorer)
# utils.plot_learning_curve(best_svm, 'best svm hamming loss',
# 						  tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=hamming_losser)
# utils.plot_learning_curve(best_svm, 'best svm jaccard score',
# 						  tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1, scorer=jaccard_scorer)
# utils.plot_learning_curve(best_svm, 'best svm matthews corr coef scorer', tfidf_uni_bigram_train,
# 						  train_cats, cv=5, n_jobs=-1,
# 						  scorer=matthews_corrcoef_scorer)
utils.plot_learning_curve(best_svm,
                          'best svm recall score',
                          tfidf_uni_bigram_train,
                          train_cats,
                          cv=5,
                          n_jobs=-1,
                          scorer=recall_scorer)


def evaluate():
    train_uni_bigram, test_uni_bigram, train_cats, test_cats = \
     feature_vecs.do_uni_bigram()

    tfidf_uni_bigram_train = feature_vecs.tfidf_matrix(train_uni_bigram)
    tfidf_uni_bigram_test = feature_vecs.tfidf_matrix(test_uni_bigram)
    # tfidf_uni_bigram_train, tfidf_uni_bigram_test, train_cats, test_cats = feature_vecs.do_means_embeddings()

    svm_classifier = LinearSVC(max_iter=5000, dual=False)
Example #29
0
def main():
    Hyper.init()
    env = make_env(
        Constants.env_id)  # See wrapper code for environment in atari_image.py
    Hyper.n_actions = env.action_space.n
    shape = (env.observation_space.shape)
    agent = Agent(input_dims=shape, env=env, n_actions=env.action_space.n)
    filename = f"{Constants.env_id}_games{Hyper.n_games}_alpha{Hyper.alpha}.png"
    figure_file = f'plots/{filename}'

    best_ave_score = env.reward_range[0]
    best_score = 0
    score_history = []
    load_checkpoint = False
    if load_checkpoint:
        agent.load_models()
        env.render(mode='human')
    total_steps = 0
    game_id = 0
    for i in range(Hyper.n_games):
        game_id += 1
        if game_id % 20 == 0:
            Hyper.alpha = Hyper.alpha * 1.2
            Hyper.beta = Hyper.beta * 1.2
        observation = env.reset()
        done = False
        steps = 0
        score = 0
        while not done:
            # Sample action from the policy
            action = agent.choose_action(observation)

            # Sample transition from the environment
            new_observation, reward, done, info = env.step(action)
            steps += 1
            total_steps += 1

            # Store transition in the replay buffer
            agent.remember(observation, action, reward, new_observation, done)
            if not load_checkpoint:
                agent.learn()
            score += reward
            observation = new_observation
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        if score > best_score:
            best_score = score

        if avg_score > best_ave_score:
            best_ave_score = avg_score
            if not load_checkpoint:
                agent.save_models()

        episode = i + 1
        print(
            f"episode {episode}: score {score}, best_score {best_score}, best ave score {best_ave_score}, trailing 100 games avg {avg_score}, steps {steps}, total steps {total_steps}"
        )

    print(f"total number of steps taken: {total_steps}")
    if not load_checkpoint:
        x = [i + 1 for i in range(Hyper.n_games)]
        plot_learning_curve(x, score_history, figure_file)
Example #30
0
        observation = env.reset()

        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward

            if not load_checkpoint:
                agent.store_transition(observation, action, reward,
                                       observation_, int(done))
                agent.learn()
            observation = observation_
            n_steps += 1
        score.append(score)
        steps_array.append(n_steps)

        avg_score = np.mean(scores[-100:])
        print(
            f'episode {i} score: {score},'
            f'average score {avg_score:.1f} best score {best_score:.1f} epsilon {agent.epsilon:.2f}'
            f'steps {n_steps}')

        if avg_score > best_score:
            if not load_checkpoint:
                agent.save_models()
            best_score = avg_score

        eps_history.append(agent.epsilon)

    plot_learning_curve(steps_array, scores, eps_history, figure_file)
Example #31
0
def train_model(model, optimizer, criterion, training_data_loader,
                validation_data_loader, opt):
    logging.info(
        '======================  Checking GPU Availability  ========================='
    )
    if torch.cuda.is_available():
        if isinstance(opt.gpuid, int):
            opt.gpuid = [opt.gpuid]
        logging.info('Running on GPU! devices=%s' % str(opt.gpuid))
        model = model.cuda()
        model = nn.DataParallel(model, device_ids=opt.gpuid)
        criterion.cuda()
    else:
        logging.info('Running on CPU!')

    logging.info(
        '======================  Start Training  =========================')

    train_history_losses = []
    valid_history_losses = []
    best_loss = sys.float_info.max

    train_losses = []
    total_batch = 0
    early_stop_flag = False

    for epoch in range(opt.start_epoch, opt.epochs):
        if early_stop_flag:
            break

        progbar = Progbar(title='Training',
                          target=len(training_data_loader),
                          batch_size=opt.batch_size,
                          total_examples=len(training_data_loader.dataset))
        model.train()

        for batch_i, batch in enumerate(training_data_loader):
            batch_i += 1  # for the aesthetics of printing
            total_batch += 1
            src = batch.src
            trg = batch.trg

            # print("src size - ",src.size())
            # print("target size - ",trg.size())
            if torch.cuda.is_available():
                src.cuda()
                trg.cuda()

            optimizer.zero_grad()
            decoder_logits, _, _ = model.forward(src,
                                                 trg,
                                                 must_teacher_forcing=False)

            start_time = time.time()

            # remove the 1st word in trg to let predictions and real goal match
            loss = criterion(
                decoder_logits.contiguous().view(-1, opt.vocab_size),
                trg[:, 1:].contiguous().view(-1))
            print("--loss calculation- %s seconds ---" %
                  (time.time() - start_time))

            start_time = time.time()
            loss.backward()
            print("--backward- %s seconds ---" % (time.time() - start_time))

            if opt.max_grad_norm > 0:
                pre_norm = torch.nn.utils.clip_grad_norm(
                    model.parameters(), opt.max_grad_norm)
                after_norm = (sum([
                    p.grad.data.norm(2)**2 for p in model.parameters()
                    if p.grad is not None
                ]))**(1.0 / 2)
                logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm))

            optimizer.step()

            train_losses.append(loss.data[0])
            perplexity = np.math.exp(loss.data[0])

            progbar.update(epoch, batch_i, [('train_loss', loss.data[0]),
                                            ('perplexity', perplexity)])

            if batch_i > 1 and batch_i % opt.report_every == 0:
                logging.info(
                    '======================  %d  =========================' %
                    (batch_i))

                logging.info(
                    'Epoch : %d Minibatch : %d, Loss=%.5f, PPL=%.5f' %
                    (epoch, batch_i, np.mean(loss.data[0]), perplexity))
                sampled_size = 2
                logging.info(
                    'Printing predictions on %d sampled examples by greedy search'
                    % sampled_size)

                # softmax logits to get probabilities (batch_size, trg_len, vocab_size)
                # decoder_probs = torch.nn.functional.softmax(decoder_logits.view(trg.size(0) * trg.size(1), -1)).view(*trg.size(), -1)

                if torch.cuda.is_available():
                    src = src.data.cpu().numpy()
                    decoder_logits = decoder_logits.data.cpu().numpy()
                    max_words_pred = decoder_logits.argmax(axis=-1)
                    trg = trg.data.cpu().numpy()
                else:
                    src = src.data.numpy()
                    decoder_logits = decoder_logits.data.numpy()
                    max_words_pred = decoder_logits.argmax(axis=-1)
                    trg = trg.data.numpy()

                sampled_trg_idx = np.random.random_integers(low=0,
                                                            high=len(trg) - 1,
                                                            size=sampled_size)
                src = src[sampled_trg_idx]
                max_words_pred = [max_words_pred[i] for i in sampled_trg_idx]
                decoder_logits = decoder_logits[sampled_trg_idx]
                trg = [trg[i][1:] for i in sampled_trg_idx
                       ]  # the real target has removed the starting <BOS>

                for i, (src_wi, pred_wi,
                        real_wi) in enumerate(zip(src, max_words_pred, trg)):
                    nll_prob = -np.sum(
                        np.log2([
                            decoder_logits[i][l][pred_wi[l]]
                            for l in range(len(real_wi))
                        ]))
                    sentence_source = [opt.id2word[x] for x in src_wi]
                    sentence_pred = [opt.id2word[x] for x in pred_wi]
                    sentence_real = [opt.id2word[x] for x in real_wi]

                    logging.info(
                        '==================================================')
                    logging.info('Source: %s ' % (' '.join(sentence_source)))
                    logging.info('\t\tPred : %s (%.4f)' %
                                 (' '.join(sentence_pred), nll_prob))
                    logging.info('\t\tReal : %s ' % (' '.join(sentence_real)))

            if total_batch > 1 and total_batch % opt.run_valid_every == 0:
                logging.info('*' * 50)
                logging.info(
                    'Run validation test @Epoch=%d,#(Total batch)=%d' %
                    (epoch, total_batch))
                valid_losses = _valid(validation_data_loader,
                                      model,
                                      criterion,
                                      optimizer,
                                      epoch,
                                      opt,
                                      is_train=False)

                train_history_losses.append(copy.copy(train_losses))
                valid_history_losses.append(valid_losses)
                train_losses = []

                # Plot the learning curve
                plot_learning_curve(
                    train_history_losses,
                    valid_history_losses,
                    'Training and Validation',
                    curve1_name='Training Error',
                    curve2_name='Validation Error',
                    save_path=opt.exp_path +
                    '/[epoch=%d,batch=%d,total_batch=%d]train_valid_curve.png'
                    % (epoch, batch_i, total_batch))
                '''
                determine if early stop training
                '''
                valid_loss = np.average(valid_history_losses[-1])
                is_best_loss = valid_loss < best_loss
                rate_of_change = float(valid_loss -
                                       best_loss) / float(best_loss)

                # only store the checkpoints that make better validation performances
                if total_batch > 1 and epoch >= opt.start_checkpoint_at and (
                        total_batch % opt.save_model_every == 0
                        or is_best_loss):
                    # Save the checkpoint
                    logging.info('Saving checkpoint to: %s' % os.path.join(
                        opt.save_path,
                        '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' %
                        (opt.exp, epoch, batch_i, total_batch, valid_loss) +
                        '.model'))
                    torch.save(
                        model.state_dict(),
                        open(
                            os.path.join(
                                opt.save_path,
                                '%s.epoch=%d.batch=%d.total_batch=%d' %
                                (opt.exp, epoch, batch_i, total_batch) +
                                '.model'), 'wb'))

                # valid error doesn't decrease
                if rate_of_change >= 0:
                    stop_increasing += 1
                else:
                    stop_increasing = 0

                if is_best_loss:
                    logging.info(
                        'Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f'
                        % (best_loss, valid_loss, rate_of_change * 100))
                else:
                    logging.info(
                        'Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f'
                        % (stop_increasing, best_loss, valid_loss,
                           rate_of_change * 100))

                best_loss = min(valid_loss, best_loss)
                if stop_increasing >= opt.early_stop_tolerance:
                    logging.info(
                        'Have not increased for %d epoches, early stop training'
                        % stop_increasing)
                    early_stop_flag = True
                    break
                logging.info('*' * 50)
def display_learning_curves(clf, X, y, title="Learning Curves"):
    return plot_learning_curve(clf, title, X, y,
                               n_jobs=-1, train_sizes=np.linspace(0.1, 1.0, 5),
                               scoring=ndcg_scorer)
for min_samples in range(1,21):
	for max_depth in range(1,21):
		test_algo(DecisionTreeClassifier, X, Y, "Decision Tree with max_depth=%d and min_samples_leaf=%d" % 
			(max_depth, min_samples), {'max_depth': max_depth, 'min_samples_leaf': min_samples})
print


algo, options, name = best_classifier
print "Best overall: %s with %.5f" % (name, best_score)

# re-train best algo with whole training set
classifier = algo(**options)
classifier.fit(X, Y)

output_predictions(classifier, '04_submission.csv', formatting_functions, features)
plot_learning_curve(name, algo, options, X, Y, min_size=50, n_steps=50)

print '='*100
print

# Random forests

best_score = 0.0
best_classifier = ()

test_algo(RandomForestClassifier, X, Y, "Random Forest with 10 trees")
test_algo(RandomForestClassifier, X, Y, "Random Forest with 50 trees", {'n_estimators': 50})
test_algo(RandomForestClassifier, X, Y, "Random Forest with 100 trees", {'n_estimators': 100})
test_algo(RandomForestClassifier, X, Y, "Random Forest with 10 trees, max_depth=6 and min_samples_leaf=6", 
			{'max_depth': 6, 'min_samples_leaf': 6})
test_algo(RandomForestClassifier, X, Y, "Random Forest with 50 trees, max_depth=6 and min_samples_leaf=6", 
def train_model(model, optimizer, criterion, training_data_loader, validation_data_loader, opt):
    logging.info('======================  Checking GPU Availability  =========================')
    if torch.cuda.is_available():
        if isinstance(opt.gpuid, int):
            opt.gpuid = [opt.gpuid]
        logging.info('Running on GPU! devices=%s' % str(opt.gpuid))
        model = model.cuda()
        model = nn.DataParallel(model, device_ids=opt.gpuid)
        criterion.cuda()
    else:
        logging.info('Running on CPU!')

    logging.info('======================  Start Training  =========================')

    train_history_losses = []
    valid_history_losses = []
    best_loss = sys.float_info.max

    train_losses = []
    total_batch = 0
    early_stop_flag = False

    for epoch in range(opt.start_epoch , opt.epochs):
        if early_stop_flag:
            break

        progbar = Progbar(title='Training', target=len(training_data_loader), batch_size=opt.batch_size,
                          total_examples=len(training_data_loader.dataset))
        model.train()

        for batch_i, batch in enumerate(training_data_loader):
            batch_i += 1 # for the aesthetics of printing
            total_batch += 1
            src = batch.src
            trg = batch.trg

            # print("src size - ",src.size())
            # print("target size - ",trg.size())
            if torch.cuda.is_available():
                src.cuda()
                trg.cuda()

            optimizer.zero_grad()
            decoder_logits, _, _ = model.forward(src, trg, must_teacher_forcing=False)

            start_time = time.time()

            # remove the 1st word in trg to let predictions and real goal match
            loss = criterion(
                decoder_logits.contiguous().view(-1, opt.vocab_size),
                trg[:, 1:].contiguous().view(-1)
            )
            print("--loss calculation- %s seconds ---" % (time.time() - start_time))

            start_time = time.time()
            loss.backward()
            print("--backward- %s seconds ---" % (time.time() - start_time))

            if opt.max_grad_norm > 0:
                pre_norm = torch.nn.utils.clip_grad_norm(model.parameters(), opt.max_grad_norm)
                after_norm = (sum([p.grad.data.norm(2) ** 2 for p in model.parameters() if p.grad is not None])) ** (1.0 / 2)
                logging.info('clip grad (%f -> %f)' % (pre_norm, after_norm))

            optimizer.step()

            train_losses.append(loss.data[0])
            perplexity = np.math.exp(loss.data[0])

            progbar.update(epoch, batch_i, [('train_loss', loss.data[0]), ('perplexity', perplexity)])

            if batch_i > 1 and batch_i % opt.report_every == 0:
                logging.info('======================  %d  =========================' % (batch_i))

                logging.info('Epoch : %d Minibatch : %d, Loss=%.5f, PPL=%.5f' % (epoch, batch_i, np.mean(loss.data[0]), perplexity))
                sampled_size = 2
                logging.info('Printing predictions on %d sampled examples by greedy search' % sampled_size)

                # softmax logits to get probabilities (batch_size, trg_len, vocab_size)
                # decoder_probs = torch.nn.functional.softmax(decoder_logits.view(trg.size(0) * trg.size(1), -1)).view(*trg.size(), -1)

                if torch.cuda.is_available():
                    src = src.data.cpu().numpy()
                    decoder_logits = decoder_logits.data.cpu().numpy()
                    max_words_pred = decoder_logits.argmax(axis=-1)
                    trg = trg.data.cpu().numpy()
                else:
                    src = src.data.numpy()
                    decoder_logits = decoder_logits.data.numpy()
                    max_words_pred = decoder_logits.argmax(axis=-1)
                    trg = trg.data.numpy()

                sampled_trg_idx = np.random.random_integers(low=0, high=len(trg) - 1, size=sampled_size)
                src             = src[sampled_trg_idx]
                max_words_pred  = [max_words_pred[i] for i in sampled_trg_idx]
                decoder_logits  = decoder_logits[sampled_trg_idx]
                trg = [trg[i][1:] for i in sampled_trg_idx] # the real target has removed the starting <BOS>

                for i, (src_wi, pred_wi, real_wi) in enumerate(zip(src, max_words_pred, trg)):
                    nll_prob = -np.sum(np.log2([decoder_logits[i][l][pred_wi[l]] for l in range(len(real_wi))]))
                    sentence_source = [opt.id2word[x] for x in src_wi]
                    sentence_pred   = [opt.id2word[x] for x in pred_wi]
                    sentence_real   = [opt.id2word[x] for x in real_wi]

                    logging.info('==================================================')
                    logging.info('Source: %s '          % (' '.join(sentence_source)))
                    logging.info('\t\tPred : %s (%.4f)' % (' '.join(sentence_pred), nll_prob))
                    logging.info('\t\tReal : %s '       % (' '.join(sentence_real)))

            if total_batch > 1 and total_batch % opt.run_valid_every == 0:
                logging.info('*' * 50)
                logging.info('Run validation test @Epoch=%d,#(Total batch)=%d' % (epoch, total_batch))
                valid_losses = _valid(validation_data_loader, model, criterion, optimizer, epoch, opt, is_train=False)

                train_history_losses.append(copy.copy(train_losses))
                valid_history_losses.append(valid_losses)
                train_losses = []

                # Plot the learning curve
                plot_learning_curve(train_history_losses, valid_history_losses, 'Training and Validation',
                                    curve1_name='Training Error', curve2_name='Validation Error',
                                    save_path=opt.exp_path + '/[epoch=%d,batch=%d,total_batch=%d]train_valid_curve.png' % (epoch, batch_i, total_batch))

                '''
                determine if early stop training
                '''
                valid_loss = np.average(valid_history_losses[-1])
                is_best_loss = valid_loss < best_loss
                rate_of_change = float(valid_loss - best_loss) / float(best_loss)

                # only store the checkpoints that make better validation performances
                if total_batch > 1 and epoch >= opt.start_checkpoint_at and (total_batch % opt.save_model_every == 0 or is_best_loss):
                    # Save the checkpoint
                    logging.info('Saving checkpoint to: %s' % os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d.error=%f' % (opt.exp, epoch, batch_i, total_batch, valid_loss) + '.model'))
                    torch.save(
                        model.state_dict(),
                        open(os.path.join(opt.save_path, '%s.epoch=%d.batch=%d.total_batch=%d' % (opt.exp, epoch, batch_i, total_batch) + '.model'), 'wb')
                    )

                # valid error doesn't decrease
                if rate_of_change >= 0:
                    stop_increasing += 1
                else:
                    stop_increasing = 0

                if is_best_loss:
                    logging.info('Validation: update best loss (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        best_loss, valid_loss, rate_of_change * 100))
                else:
                    logging.info('Validation: best loss is not updated for %d times (%.4f --> %.4f), rate of change (ROC)=%.2f' % (
                        stop_increasing, best_loss, valid_loss, rate_of_change * 100))

                best_loss = min(valid_loss, best_loss)
                if stop_increasing >= opt.early_stop_tolerance:
                    logging.info('Have not increased for %d epoches, early stop training' % stop_increasing)
                    early_stop_flag = True
                    break
                logging.info('*' * 50)
Example #35
0
plot_matrix(etc_3, X_test, y_test, filename)

"""Plot ROC Curve"""
y_score = etc_3.predict_proba(X_test)
y_test_bin = np.array(label_binarize(y_test, classes=np.unique(y)))
n_classes = y_test_bin.shape[1]
plot_roc_curve(n_classes, y_test_bin, y_score, filename)

"""Plot Decision Area"""
clf = ExtraTreesClassifier(n_estimators=ESTIMATORS, max_features=0.42, n_jobs=-1)
plot_decision_area(clf, X_scaled[:, 2:4], y, title="Extra Trees Classifier", filename=filename)

"""Plot Learning Curve"""
X_lc = X_scaled[:10000]
y_lc = y[:10000]
plot_learning_curve(clf, "Extra Trees Classifier", X_lc, y_lc, filename=filename)

"""Plot Validation Curve: max_depth"""
clf = ExtraTreesClassifier(n_estimators=ESTIMATORS, max_depth=8)
param_name = "max_depth"
param_range = [1, 2, 4, 8, 16, 32, 100]
plot_validation_curve(clf, X_lc, y_lc, param_name, param_range, scoring="roc_auc", cv=n_classes, filename=filename)

"""Plot Validation Curve: n_estimators"""
# clf = ExtraTreesClassifier(n_estimators=ESTIMATORS ,max_features=0.42, max_depth=16)
# param_name = 'n_estimators'
# param_range = [1, 2, 4, 10, 30]
# plot_validation_curve(clf, X_scaled, y,
#                   param_name, param_range,
#                   scoring='accuracy', cv=n_classes,
#                   filename)