Ejemplo n.º 1
0
def gather_data4(env, epochs, data_points, train=True, unpack=False):
    if env.spec.id in ['Pendulum-v0', 'MountainCarContinuous-v0']:
        return gather_data(env, epochs=epochs, unpack=unpack)
    elif train == True:
        return gather_data3(env, data_points=data_points, unpack=unpack)
    else:
        data = []
        count = 0
        while True:
            state = env.reset()
            while True:
                action = np.random.uniform(low=env.action_space.low,
                                           high=env.action_space.high)
                next_state, reward, done, _ = env.step(action)
                data.append([state, action, reward, next_state, done])
                state = np.copy(next_state)
                if done:
                    count += 1
                    break
            if count == epochs:
                break
        if unpack == False:
            return data
        else:
            states, actions, rewards, next_states = [
                np.stack(ele, axis=0) for ele in zip(*data)[:-1]
            ]
            return states, actions, rewards[..., np.newaxis], next_states
Ejemplo n.º 2
0
def main_loop():
    parser = argparse.ArgumentParser()
    parser.add_argument("--environment", type=str, default='Pendulum-v0')
    parser.add_argument("--unroll_steps", type=int, default=200)
    parser.add_argument("--discount_factor", type=float, default=.995)
    parser.add_argument("--gather_data_epochs",
                        type=int,
                        default=3,
                        help='Epochs for initial data gather.')
    parser.add_argument("--train_hp_iterations", type=int, default=2000 * 10)
    parser.add_argument("--train_policy_batch_size", type=int, default=30)
    parser.add_argument("--no_samples", type=int, default=1)
    parser.add_argument("--basis_dim", type=int, default=256)
    parser.add_argument("--hidden_dim", type=int, default=32)
    parser.add_argument("--rffm_seed", type=int, default=1)
    parser.add_argument("--Agent",
                        type=str,
                        choices=['', '2', '3'],
                        default='')
    parser.add_argument("--learn_reward", type=int, choices=[0, 1], default=1)
    parser.add_argument("--max_train_hp_datapoints", type=int, default=20000)
    parser.add_argument("--matern_param_reward", type=float, default=np.inf)
    parser.add_argument("--basis_dim_reward", type=int, default=600)
    parser.add_argument("--use_mean_reward", type=int, default=0)
    parser.add_argument("--update_hyperstate", type=int, default=1)
    parser.add_argument("--policy_use_hyperstate", type=int, default=1)
    parser.add_argument("--cma_maxiter", type=int, default=1000)
    parser.add_argument("--learn_diff", type=int, choices=[0, 1], default=0)
    args = parser.parse_args()

    print sys.argv
    print args
    from blr_regression2_sans_hyperstate import Agent2
    from blr_regression2_tf import Agent3

    env = gym.make(args.environment)

    regression_wrappers = [
        RegressionWrapper(input_dim=env.observation_space.shape[0] +
                          env.action_space.shape[0],
                          basis_dim=args.basis_dim,
                          length_scale=1.,
                          signal_sd=1.,
                          noise_sd=5e-4,
                          prior_sd=1.,
                          rffm_seed=args.rffm_seed,
                          train_hp_iterations=args.train_hp_iterations)
        for _ in range(env.observation_space.shape[0])
    ]
    if args.learn_reward == 1:
        regression_wrappers.append(
            RegressionWrapperReward(
                environment=args.environment,
                input_dim=env.observation_space.shape[0] +
                env.action_space.shape[0],
                basis_dim=args.basis_dim_reward,
                length_scale=1.,
                signal_sd=1.,
                noise_sd=5e-4,
                prior_sd=1.,
                rffm_seed=args.rffm_seed,
                train_hp_iterations=args.train_hp_iterations,
                matern_param=args.matern_param_reward))
    agent = eval('Agent' + args.Agent)(
        environment=env.spec.id,
        x_dim=env.observation_space.shape[0] + env.action_space.shape[0],
        y_dim=env.observation_space.shape[0],
        state_dim=env.observation_space.shape[0],
        action_dim=env.action_space.shape[0],
        observation_space_low=env.observation_space.low,
        observation_space_high=env.observation_space.high,
        action_space_low=env.action_space.low,
        action_space_high=env.action_space.high,
        unroll_steps=args.unroll_steps,
        no_samples=args.no_samples,
        discount_factor=args.discount_factor,
        random_matrices=[rw.random_matrix for rw in regression_wrappers],
        biases=[rw.bias for rw in regression_wrappers],
        basis_dims=[rw.basis_dim for rw in regression_wrappers],
        hidden_dim=args.hidden_dim,
        learn_reward=args.learn_reward,
        use_mean_reward=args.use_mean_reward,
        update_hyperstate=args.update_hyperstate,
        policy_use_hyperstate=args.policy_use_hyperstate,
        learn_diff=args.learn_diff)

    flag = False
    data_buffer = gather_data(env, args.gather_data_epochs)
    data_buffer = scrub_data(args.environment, data_buffer, True)

    init_states = np.stack(
        [env.reset() for _ in range(args.train_policy_batch_size)], axis=0)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if args.environment == 'Pendulum-v0' and args.learn_reward == 0:
            weights = pickle.load(
                open('../custom_environments/weights/pendulum_reward.p', 'rb'))
            sess.run(agent.assign_ops0,
                     feed_dict=dict(zip(agent.placeholders_reward, weights)))
        for epoch in range(1000):
            #Train hyperparameters and update systems model.
            states_actions, states, rewards, next_states = unpack(data_buffer)
            targets = np.concatenate([
                next_states - states if args.learn_diff else next_states,
                rewards
            ],
                                     axis=-1)
            for i in range(env.observation_space.shape[0] + args.learn_reward):
                if flag == False:
                    regression_wrappers[i]._train_hyperparameters(
                        states_actions, targets[:, i:i + 1])
                    regression_wrappers[i]._reset_statistics(
                        states_actions, targets[:, i:i + 1])
                else:
                    regression_wrappers[i]._update(states_actions,
                                                   targets[:, i:i + 1])
            if len(data_buffer) >= args.max_train_hp_datapoints: flag = True
            if flag: data_buffer = []
            tmp_data_buffer = []

            #Fit policy network.
            XX, Xy, hyperparameters = zip(*[[rw.XX, rw.Xy, rw.hyperparameters]
                                            for rw in regression_wrappers])
            agent._fit(args.cma_maxiter, np.copy(init_states),
                       [np.copy(ele)
                        for ele in XX], [np.copy(ele) for ele in Xy],
                       [np.copy(ele) for ele in hyperparameters], sess)

            #Get hyperstate & hyperparameters
            hyperstate = zip(*[[
                scipy.linalg.cholesky(np.copy(rw.XX) +
                                      (rw.noise_sd / rw.prior_sd)**2 *
                                      np.eye(rw.basis_dim),
                                      lower=True)[np.newaxis, ...],
                np.copy(rw.Xy)[np.newaxis, ...]
            ] for rw in regression_wrappers])

            total_rewards = 0.
            state = env.reset()
            while True:
                #env.render()
                action = agent._forward(agent.thetas, state[np.newaxis, ...],
                                        hyperstate)[0]
                next_state, reward, done, _ = env.step(action)

                #hyperstate = update_hyperstate_old(agent, XX, hyperstate, hyperparameters, [state, action, reward, next_state, done], agent.state_dim+agent.learn_reward, args.learn_diff)
                hyperstate = update_hyperstate(
                    agent, hyperstate, hyperparameters,
                    [state, action, reward, next_state, done],
                    agent.state_dim + agent.learn_reward, args.learn_diff)

                tmp_data_buffer.append(
                    [state, action, reward, next_state, done])
                total_rewards += float(reward)
                state = np.copy(next_state)
                if done:
                    print 'epoch:', epoch, 'total_rewards:', total_rewards
                    data_buffer.extend(
                        scrub_data(args.environment, tmp_data_buffer, False))
                    break
Ejemplo n.º 3
0
def plotting_experiments():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--environment",
        type=str,
        choices=[
            'Pendulum-v0', 'MountainCarContinuous-v0', 'MinitaurBulletEnv-v0',
            'CartPoleBulletEnv-v0', 'HumanoidBulletEnv-v0', 'AntBulletEnv-v0',
            'HopperBulletEnv-v0', 'HalfCheetahBulletEnv-v0',
            'Walker2DBulletEnv-v0', 'InvertedPendulumBulletEnv-v0',
            'InvertedDoublePendulumBulletEnv-v0',
            'InvertedPendulumSwingupBulletEnv-v0'
        ],
        default='Pendulum-v0')
    parser.add_argument("--train-hp-iterations", type=int, default=2000)
    parser.add_argument("--basis-dim", type=int, default=256)
    parser.add_argument("--basis-dim-reward", type=int, default=600)
    parser.add_argument("--matern-param", type=float, default=np.inf)
    parser.add_argument("--matern-param-reward", type=float, default=np.inf)

    parser.add_argument(
        "--train-hit-wall", type=int,
        default=0)  #Only used when --environment=MountainCarContinuous-v0
    parser.add_argument(
        "--train-reach-goal", type=int,
        default=0)  #Only used when --environment=MountainCarContinuous-v0
    parser.add_argument(
        "--test-hit-wall", type=int,
        default=0)  #Only used when --environment=MountainCarContinuous-v0
    parser.add_argument(
        "--test-reach-goal", type=int,
        default=0)  #Only used when --environment=MountainCarContinuous-v0

    parser.add_argument("--update-hyperstate", type=int, default=0)

    args = parser.parse_args()
    print args

    import matplotlib.pyplot as plt
    from utils import get_mcc_policy

    if args.environment == 'MountainCarContinuous-v0':
        train_set_size = 1
    else:
        train_set_size = 3

    env = gym.make(args.environment)

    predictors = []
    for i in range(env.observation_space.shape[0]):
        predictors.append(
            RegressionWrapper2(input_dim=env.observation_space.shape[0] +
                               env.action_space.shape[0],
                               basis_dim=args.basis_dim,
                               length_scale=1.,
                               signal_sd=1.,
                               noise_sd=5e-4,
                               prior_sd=1.,
                               rffm_seed=1,
                               train_hp_iterations=args.train_hp_iterations,
                               matern_param=args.matern_param))
    predictors.append(
        RegressionWrapperReward2(args.environment,
                                 input_dim=env.observation_space.shape[0] +
                                 env.action_space.shape[0],
                                 basis_dim=args.basis_dim_reward,
                                 length_scale=1.,
                                 signal_sd=1.,
                                 noise_sd=5e-4,
                                 prior_sd=1.,
                                 rffm_seed=1,
                                 train_hp_iterations=args.train_hp_iterations,
                                 matern_param=args.matern_param_reward))

    if args.environment == 'MountainCarContinuous-v0':
        states, actions, rewards, next_states = get_mcc_policy(
            env,
            hit_wall=bool(args.train_hit_wall),
            reach_goal=bool(args.train_reach_goal),
            train=True)
    else:
        states, actions, rewards, next_states = gather_data(env,
                                                            train_set_size,
                                                            unpack=True)
    states_actions = np.concatenate([states, actions], axis=-1)

    for i in range(env.observation_space.shape[0]):
        predictors[i]._train_hyperparameters(states_actions,
                                             next_states[:, i:i + 1])
    predictors[-1]._train_hyperparameters(states_actions, rewards)

    while True:
        for i in range(env.observation_space.shape[0]):
            predictors[i]._reset_statistics(states_actions,
                                            next_states[:, i:i + 1],
                                            bool(args.update_hyperstate))
        predictors[-1]._reset_statistics(states_actions, rewards,
                                         bool(args.update_hyperstate))

        if args.environment == 'MountainCarContinuous-v0':
            states2, actions2, rewards2, next_states2 = get_mcc_policy(
                env,
                hit_wall=bool(args.test_hit_wall),
                reach_goal=bool(args.test_reach_goal),
                train=False)
        else:
            states2, actions2, rewards2, next_states2 = gather_data(
                env, 1, unpack=True, test=True)
        states_actions2 = np.concatenate([states2, actions2], axis=-1)

        plt.figure()
        for i in range(env.observation_space.shape[0]):
            plt.subplot(4, env.observation_space.shape[0], i + 1)

            predict_mu, predict_sigma = predictors[i]._predict(
                states_actions2, False)

            plt.plot(np.arange(len(next_states2[:, i:i + 1])),
                     next_states2[:, i:i + 1])
            plt.errorbar(np.arange(len(predict_mu)),
                         predict_mu,
                         yerr=np.sqrt(predict_sigma),
                         color='m',
                         ecolor='g')
            plt.grid()

        traj_reward = []
        traj = []
        no_lines = 50
        state = np.tile(np.copy(states2[0:1, ...]), [no_lines, 1])
        for a in actions2:
            action = np.tile(a[np.newaxis, ...], [no_lines, 1])
            state_action = np.concatenate([state, action], axis=-1)

            mu_reward, sigma_reward = predictors[-1]._predict(
                state_action, bool(args.update_hyperstate))
            reward = np.stack([
                np.random.normal(loc=mu, scale=sigma)
                for mu, sigma in zip(mu_reward, sigma_reward)
            ],
                              axis=0)
            traj_reward.append(reward)

            mu_vec = []
            sigma_vec = []
            for i in range(env.observation_space.shape[0]):
                predict_mu, predict_sigma = predictors[i]._predict(
                    state_action, bool(args.update_hyperstate))
                mu_vec.append(predict_mu)
                sigma_vec.append(predict_sigma)

            mu_vec = np.concatenate(mu_vec, axis=-1)
            sigma_vec = np.concatenate(sigma_vec, axis=-1)

            state = np.stack([
                np.random.multivariate_normal(mu, np.diag(sigma))
                for mu, sigma in zip(mu_vec, sigma_vec)
            ],
                             axis=0)
            state = np.clip(state, env.observation_space.low,
                            env.observation_space.high)
            traj.append(np.copy(state))

            for i in range(env.observation_space.shape[0]):
                predictors[i]._update_hyperstate(state_action, state[:,
                                                                     i:i + 1],
                                                 bool(args.update_hyperstate))
            predictors[-1]._update_hyperstate(state_action, reward,
                                              bool(args.update_hyperstate))

        traj_reward = np.stack(traj_reward, axis=-1)
        traj = np.stack(traj, axis=-1)

        plt.subplot(4, 1, 4)
        for j in range(no_lines):
            y = traj_reward[j, 0, :]
            plt.plot(np.arange(len(y)), y, color='r')
        plt.plot(np.arange(len(rewards2)), rewards2)
        plt.grid()

        for i in range(env.observation_space.shape[0]):
            plt.subplot(4, env.observation_space.shape[0],
                        env.observation_space.shape[0] + i + 1)
            for j in range(no_lines):
                y = traj[j, i, :]
                plt.plot(np.arange(len(y)), y, color='r')

            plt.plot(np.arange(len(next_states2[..., i])), next_states2[...,
                                                                        i])
            plt.grid()

        plt.subplot(4, 1, 3)
        predict_mu, predict_sigma = predictors[-1]._predict(
            states_actions2, False)
        plt.plot(np.arange(len(rewards2)), rewards2)
        plt.errorbar(np.arange(len(predict_mu)),
                     predict_mu,
                     yerr=np.sqrt(predict_sigma),
                     color='m',
                     ecolor='g')
        plt.grid()

        plt.show(block=True)
Ejemplo n.º 4
0
def main_loop():
    parser = argparse.ArgumentParser()
    parser.add_argument("--environment", type=str, default='Pendulum-v0')
    parser.add_argument("--unroll_steps", type=int, default=200)
    parser.add_argument("--discount_factor", type=float, default=.995)
    parser.add_argument("--gather_data_epochs",
                        type=int,
                        default=3,
                        help='Epochs for initial data gather.')
    parser.add_argument("--train_hp_iterations", type=int, default=2000 * 10)
    parser.add_argument("--train_policy_batch_size", type=int, default=30)
    parser.add_argument("--no_samples", type=int, default=1)
    parser.add_argument("--basis_dim", type=int, default=256)
    parser.add_argument("--hidden_dim", type=int, default=32)
    parser.add_argument("--rffm_seed", type=int, default=1)
    parser.add_argument("--Agent", type=str, choices=['', '2'], default='')
    parser.add_argument("--learn_reward", type=int, choices=[0, 1], default=1)
    parser.add_argument("--max_train_hp_datapoints", type=int, default=20000)
    parser.add_argument("--matern_param_reward", type=float, default=np.inf)
    parser.add_argument("--basis_dim_reward", type=int, default=600)
    parser.add_argument("--use_mean_reward", type=int, default=0)
    parser.add_argument("--update_hyperstate", type=int, default=1)
    parser.add_argument("--policy_use_hyperstate", type=int, default=1)
    parser.add_argument("--cma_maxiter", type=int, default=1000)
    parser.add_argument("--learn_diff", type=int, choices=[0, 1], default=0)
    parser.add_argument("--dump_model", type=int, choices=[0, 1], default=0)
    args = parser.parse_args()

    print(sys.argv)
    print(args)
    from blr_regression2_sans_hyperstate_multioutput import Agent2

    env = gym.make(args.environment)

    regression_wrapper_state = MultiOutputRegressionWrapper(
        input_dim=env.observation_space.shape[0] + env.action_space.shape[0],
        output_dim=env.observation_space.shape[0],
        basis_dim=args.basis_dim,
        length_scale=1.,
        signal_sd=1.,
        noise_sd=5e-4,
        prior_sd=1.,
        rffm_seed=args.rffm_seed,
        train_hp_iterations=args.train_hp_iterations)
    regression_wrapper_reward = RegressionWrapperReward(
        environment=args.environment,
        input_dim=env.observation_space.shape[0] + env.action_space.shape[0],
        basis_dim=args.basis_dim_reward,
        length_scale=1.,
        signal_sd=1.,
        noise_sd=5e-4,
        prior_sd=1.,
        rffm_seed=args.rffm_seed,
        train_hp_iterations=args.train_hp_iterations,
        matern_param=args.matern_param_reward)
    agent = eval('Agent' + args.Agent)(
        environment=env.spec.id,
        x_dim=env.observation_space.shape[0] + env.action_space.shape[0],
        y_dim=env.observation_space.shape[0],
        state_dim=env.observation_space.shape[0],
        action_dim=env.action_space.shape[0],
        observation_space_low=env.observation_space.low,
        observation_space_high=env.observation_space.high,
        action_space_low=env.action_space.low,
        action_space_high=env.action_space.high,
        unroll_steps=args.unroll_steps,
        no_samples=args.no_samples,
        discount_factor=args.discount_factor,
        random_matrix_state=regression_wrapper_state.random_matrix,
        bias_state=regression_wrapper_state.bias,
        basis_dim_state=regression_wrapper_state.basis_dim,
        random_matrix_reward=regression_wrapper_reward.random_matrix,
        bias_reward=regression_wrapper_reward.bias,
        basis_dim_reward=regression_wrapper_reward.basis_dim,

        #random_matrices=[rw.random_matrix for rw in regression_wrappers],
        #biases=[rw.bias for rw in regression_wrappers],
        #basis_dims=[rw.basis_dim for rw in regression_wrappers],
        hidden_dim=args.hidden_dim,
        learn_reward=args.learn_reward,
        use_mean_reward=args.use_mean_reward,
        update_hyperstate=args.update_hyperstate,
        policy_use_hyperstate=args.policy_use_hyperstate,
        learn_diff=args.learn_diff,
        dump_model=args.dump_model)

    #I have to work on the classes before working on the code below.
    flag = False
    data_buffer = gather_data(env, args.gather_data_epochs)
    data_buffer = scrub_data(args.environment, data_buffer, True)

    init_states = np.stack(
        [env.reset() for _ in range(args.train_policy_batch_size)], axis=0)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if args.environment == 'Pendulum-v0' and args.learn_reward == 0:
            weights = pickle.load(
                open('../custom_environments/weights/pendulum_reward.p', 'rb'))
            sess.run(agent.assign_ops0,
                     feed_dict=dict(zip(agent.placeholders_reward, weights)))
        for epoch in range(1000):
            if epoch == 0:
                #Train hyperparameters and update systems model.
                states_actions, states, rewards, next_states = unpack(
                    data_buffer)

                next_states_train = next_states.copy() - states.copy(
                ) if args.learn_diff else next_states.copy()
                rewards_train = rewards.copy()

                if flag == False:
                    regression_wrapper_state._train_hyperparameters(
                        states_actions, next_states_train)
                    regression_wrapper_state._reset_statistics(
                        states_actions, next_states_train)
                    regression_wrapper_reward._train_hyperparameters(
                        states_actions, rewards_train)
                    regression_wrapper_reward._reset_statistics(
                        states_actions, rewards_train)
                else:
                    regression_wrapper_state._update(states_actions,
                                                     next_states_train)
                    regression_wrapper_reward._update(states_actions,
                                                      rewards_train)

                if len(data_buffer) >= args.max_train_hp_datapoints:
                    flag = True
                if flag: data_buffer = []
                tmp_data_buffer = []

            #Fit policy network.
            #XX, Xy, hyperparameters = zip(*[[rw.XX, rw.Xy, rw.hyperparameters] for rw in regression_wrappers])
            #eval('agent.'+args.fit_function)(args.cma_maxiter, np.copy(init_states), [np.copy(ele) for ele in XX], [np.copy(ele) for ele in Xy], [np.copy(ele) for ele in hyperparameters], sess)
            if epoch == 0:
                agent._fit(args.cma_maxiter, init_states.copy(),
                           regression_wrapper_state.XX.copy(),
                           regression_wrapper_state.Xy.copy(),
                           regression_wrapper_state.hyperparameters.copy(),
                           regression_wrapper_reward.XX.copy(),
                           regression_wrapper_reward.Xy.copy(),
                           regression_wrapper_reward.hyperparameters.copy(),
                           sess)

            #Get hyperstate & hyperparameters
            hyperstate_params = [
                regression_wrapper_state.Llower.copy()[None, ...],
                regression_wrapper_state.Xy.copy()[None, ...],
                regression_wrapper_reward.Llower.copy()[None, ...],
                regression_wrapper_reward.Xy.copy()[None, ...]
            ]
            total_rewards = 0.
            state = env.reset()
            while True:
                #env.render()
                action = agent._forward(agent.thetas, state[np.newaxis, ...],
                                        hyperstate_params)[0]
                next_state, reward, done, _ = env.step(action)
                if env.spec.id == 'InvertedPendulumBulletEnv-v0':
                    reward = next_state[2]

                hyperstate_params = update_hyperstate(
                    agent, hyperstate_params,
                    regression_wrapper_state.hyperparameters.copy(),
                    regression_wrapper_reward.hyperparameters.copy(),
                    [state, action, reward, next_state, done], args.learn_diff)

                tmp_data_buffer.append(
                    [state, action, reward, next_state, done])
                total_rewards += float(reward)
                state = np.copy(next_state)
                if done:
                    print('epoch:', epoch, 'total_rewards:', total_rewards)
                    #This for reward shaping...
                    if env.spec.id == 'InvertedPendulumBulletEnv-v0':
                        for _ in range(10):
                            action = np.random.uniform(
                                low=env.action_space.low,
                                high=env.action_space.high)
                            next_state, _, done, _ = env.step(action)
                            tmp_data_buffer.append([
                                state, action, next_state[2], next_state, done
                            ])
                            state = next_state.copy()
                    data_buffer.extend(
                        scrub_data(args.environment, tmp_data_buffer, False))
                    break
Ejemplo n.º 5
0
def main():
    param = {
        'N_time_step': 100,
        'N_quench': 0,
        'Ti': 0.04,
        'action_set': 0,
        'hx_initial_state': -2.0,
        'hx_final_state': 2.0,
        'delta_t': 0.001,
        'hx_i': -4.0,
        'RL_CONSTRAINT': True,
        'L': 6,
        'J': 1.00,
        'hz': 1.0,
        'symmetrize': False
    }
    file_name = ut.make_file_name(param)
    res = ut.gather_data(param, "../data/")
    print(compute_observable.Ed_Ad_OP(res['h_protocol'], 4))
    plotting.protocol(range(100), res['h_protocol'][0])
    #plotting.protocol(range(100),res['h_protocol'][1])
    #print(res['fid'])

    #print(res.keys())

    print(file_name)
    #with open('
    exit()
    import os

    #===========================================================================
    # pca=PCA(n_components=2)
    # param['N_time_step']=10
    # dc=ut.gather_data(param,'../data/')
    # pca.fit(dc['h_protocol']/4.)
    # X=pca.transform(dc['h_protocol']/4.)
    #
    # plt.scatter(X[:,0],X[:,1])
    # plt.title('PCA, $t=0.1$, continuous protocol')
    # plt.savefig("PCA_AS2_t-0p1.pdf")
    # plt.show()
    # exit()
    #===========================================================================

    #===========================================================================
    # dataBB8=[]
    # param['action_set']=0
    # param['N_time_step']=60
    #
    # param['delta_t']=0.5/60.
    # dc=ut.gather_data(param,'../data/')
    # pca=PCA(n_components=2)
    # pca.fit(dc['h_protocol']/4.)
    # print(pca.explained_variance_ratio_)
    # exit()
    #
    # param['delta_t']=3.0/60.
    # dc=ut.gather_data(param,'../data/')
    # X=pca.transform(dc['h_protocol']/4.)
    #
    # title='PCA$_{50}$, $t=3.0$, continuous protocol, nStep$=60$'
    # out_file="PCA_AS0_t-3p0_nStep-60.pdf"
    # plotting.visne_2D(X[:,0],X[:,1],dc['fid'],zlabel="Fidelity",out_file=out_file,title=title,show=True,xlabel='PCA-1',ylabel='PCA-2')
    #
    #===========================================================================
    #exit()
    #plt.scatter(X[:,0],X[:,1])
    #plt.title('PCA$_{50}$, $t=1.5$, continuous protocol, nStep$=60$')
    #plt.savefig("PCA_AS0_t-0p8_nStep-60.pdf")
    #plt.show()
    #exit()
    # exit()

    #===========================================================================
    # param['N_time_step']=2
    # param['action_set']=0
    # dc=ut.gather_data(param,'../data/')
    # print(dc['h_protocol'])
    # exit()
    # dataBB8=[]
    #===========================================================================
    #===============================================================================
    #
    #     param['action_set']=0
    #     param['N_time_step']=60
    #     param['delta_t']=0.5/60
    #
    #     dc=ut.gather_data(param,'../data/')
    #
    #     protocols=dc['h_protocol']
    #     #print(np.shape(dc['h_protocol']))
    #     sort_f=np.argsort(dc['fid'])[::-1]
    #
    #     print(sort_f[0])
    #
    #     #protocols[sort_f[0]]
    #
    #     best_prot=protocols[sort_f[0:10]]
    #     x=np.array(range(60))*1.0/60
    #     #print(best_prot.reshape)
    #     #print(x.shape)
    #     #print(np.array(range(60))*0.1/60)
    #     #print(best_prot)
    #     #print(np.shape(best_prot))
    #     #print(np.shape(np.arange(0.1,3.05,0.1)*0.05))
    #
    #     plotting.protocol(protocols[:2],x,labels=dc['fid'][:2],show=True)
    #
    #     exit()
    #
    #
    #===============================================================================

    param['N_time_step'] = 60
    param['action_set'] = 0

    dataBB8 = []
    compTime = []
    x = []

    for t in np.arange(0.1, 3.05, 0.1):
        dt = t / param['N_time_step']
        param['delta_t'] = dt
        # Changed it to be returning False if file is not found ...
        dc = ut.gather_data(param, '../data/')

        if dc is not False:
            eaop = compute_observable.Ed_Ad_OP(dc['h_protocol'], 4.0)
            print(t, eaop, dc['fid'].shape, '\t', np.mean(dc['n_fid']))
            compTime.append(np.mean(dc['n_fid']))
            dataBB8.append(eaop)
            x.append(t)
        else:
            print("Data not available for %.3f" % dt)

    y = compTime
    plotting.observable(y,
                        x,
                        title='Depth of search for bang-bang protocol',
                        ylabel='\# of fidelity evaluations',
                        xlabel='$T$',
                        marker="-",
                        labels=['Obtained time (SGD)'])
    exit()
    #===========================================================================
    # param['action_set']=0
    # param['delta_t']=0.01
    #===========================================================================
    #===========================================================================
    # for i in range(2,300,4):
    #     param['N_time_step']=i
    #     is_there,dc=ut.gather_data(param,'../data/')
    #     if is_there:
    #         eaop=compute_observable.Ed_Ad_OP(dc['h_protocol'],4.0)
    #         print(i,eaop,dc['fid'].shape,'\t',np.mean(dc['n_fid']))
    #         compTime.append(np.mean(dc['n_fid']))
    #         dataBB8.append(eaop)
    #         x.append(i)
    #     else:
    #         print("Data not available for %i"%i)
    #
    #===========================================================================

    #===========================================================================
    # param['N_time_step']=150
    # is_there,dc=ut.gather_data(param,'../data/')
    # x=np.arange(0,150*0.01,0.01)
    # plotting.protocol(dc['h_protocol'][:3],x,labels=dc['fid'][:3],show=True)
    # exit()
    # #x=np.array(range(2,300,4))*0.01
    #===========================================================================
    param['action_set'] = 0
    param['delta_t'] = 0.01
    mean_fid_BB = []
    h_protocol_BB = {}
    fid_BB = {}
    n_fid_BB = []
    x = []
    sigma_fid = []
    EA_OP = []

    for i in range(2, 300, 4):
        param['N_time_step'] = i
        data_is_available, dc = ut.gather_data(param, '../data/')
        if data_is_available:
            mean_fid_BB.append(np.mean(dc['fid']))
            sigma_fid.append(np.std(dc['fid']))
            fid_BB[i] = dc['fid']
            EA_OP.append(compute_observable.Ed_Ad_OP(dc['h_protocol'], 4.0))
            h_protocol_BB[i] = dc['h_protocol']
            n_fid_BB.append(np.mean(dc['n_fid']))
            x.append(i * param['delta_t'])

    #print(fid_BB[130])
    #mean=np.mean(fid_BB[130])
    #sns.distplot(fid_BB[130],bins=np.linspace(mean-0.005,mean+0.005,100))
    #plt.tick_params(labelleft='off')
    #plt.show()
    x = np.array(x)
    y = [
        n / (x[i] / param['delta_t'])
        for n, i in zip(n_fid_BB, range(len(n_fid_BB)))
    ]

    plotting.observable(y,
                        x,
                        title='Depth of search for bang-bang protocol',
                        ylabel='(\# of fidelity evaluations)/$N$',
                        xlabel='$T$',
                        marker="-",
                        labels=['Minimum time', 'Obtained time (SGD)'])

    #plotting.protocol(h_protocol_BB[130][20:25],np.arange(0,130,1)*param['delta_t'])

    exit()

    #pca.fit()
    #===========================================================================
    # dataCONT=[]
    # for t in range(2,300,4):
    #     print(t)
    #     param['N_time_step']=t
    #     dc=ut.gather_data(param,'../data/')
    #     #print(dc['h_protocol'].shape)
    #     eaop=compute_observable.Ed_Ad_OP(dc['h_protocol'],4.0)
    #     print(eaop)
    #     dataCONT.append(eaop)
    #
    # file="../data/EAOP_"+ut.make_file_name(param)
    # with open(file,'wb') as f:
    #     pickle.dump(dataCONT,f);f.close();
    #
    # exit()
    #
    #===========================================================================

    #===========================================================================
    # param['action_set']=0
    # dataBB8=[]
    # for t in range(2,300,4):
    #     print(t)
    #     param['N_time_step']=t
    #     dc=ut.gather_data(param,'../data/')
    #     eaop=compute_observable.Ed_Ad_OP(dc['h_protocol'],4.0)
    #     print(eaop)
    #     #print(dc['h_protocol'].shape)
    #     dataBB8.append(eaop)
    #
    # file="../data/EAOP_"+ut.make_file_name(param)
    # with open(file,'wb') as f:
    #     pickle.dump(dataBB8,f);f.close();
    #
    # exit()
    #===========================================================================

    #===========================================================================
    # param['N_time_step']=298
    # param['action_set']=0
    # file="../data/EAOP_"+ut.make_file_name(param)
    # with open(file,'rb') as f:
    #     dataBB8=pickle.load(f);f.close();
    #
    # param['action_set']=2
    # f="../data/EAOP_"+ut.make_file_name(param)
    # with open(f,'rb') as file:
    #     dataCONT=pickle.load(file);
    #
    # time_axis=np.array(range(2,300,4))*0.01
    # title="Edward-Anderson parameter ($n=400$) vs. evolution time for SGD\n with the different action protocols ($L=1$)"
    # plotting.observable([dataBB8,dataCONT],[time_axis,time_axis],title=title,
    #                      out_file="SGD_EAOPvsT_AS0-2.pdf",show=True,
    #                      ylabel="$q_{EA}$",xlabel="$t$",labels=['bang-bang8','continuous'])
    #===========================================================================

    #===========================================================================
    # param['N_time_step']=250
    # dc=ut.gather_data(param,'../data/')
    # sns.distplot(dc['fid'],kde=False,label='$t=%.3f$'%(param['N_time_step']*0.01))
    # plt.legend(loc='best')
    # plt.savefig('SGD_hist_fid_t2p5.pdf')
    # plt.show()
    # exit()
    #===========================================================================

    #===========================================================================
    # title="Fidelity ($n=400$) vs. evolution time for SGD\n with the different action protocols ($L=1$)"
    # plotting.observable(np.array(data),np.array(range(2,300,4))*0.01,title=title,
    #                      out_file="SGD_FvsT_AS2.pdf",show=True,
    #                      ylabel="$F$",xlabel="$t$",labels=['continuous'])
    #
    #===========================================================================

    exit()
import utils


csv_files = utils.get_all_reports()
df = utils.gather_data(csv_files)
output_df = utils.create_empty_df(df['date'])

months = utils.get_input_months(df['date'])
for (month, year) in months:
    income_sum = utils.income_month(df, year, month)
    output_df = utils.save_value_to_output(output_df, year, month, income_sum)

print(output_df)

# TODO create class data

# TODO celkem výdaje v měsící

# TODO součty každého měsíce podle budgetů
# TODO stav po každém měsíci podle budgetů

# TODO rozdíl v měsíci
# TODO procento neutracených peněz
# TODO měsíční zůstatek na účtu
# TODO měsíční průměr zůstatků na účtu
# TODO roční zůstatek po všech měsících
# TODO převést výpočty z sheetu
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--environment", type=str, default='Pendulum-v0')
    parser.add_argument("--train-hp-iterations", type=int, default=2000)
    args = parser.parse_args()

    print args

    env = gym.make(args.environment)

    states, actions, _, next_states = gather_data(env, 5, unpack=True)
    states_actions = np.concatenate([states, actions], axis=-1)

    output_dim = 128 * 2
    noise_sd_clip_threshold = 5e-5
    rffm = RandomFourierFeatureMapper(states_actions.shape[-1],
                                      int(output_dim))

    hyperparameters = []
    for i in range(env.observation_space.shape[0]):
        thetas0 = np.array([1., 1., 5e-4, 1.])
        options = {'maxiter': args.train_hp_iterations, 'disp': True}
        _res = minimize(log_marginal_likelihood,
                        thetas0,
                        method='nelder-mead',
                        args=(rffm, states_actions, next_states[:, i:i + 1],
                              output_dim, noise_sd_clip_threshold),
                        options=options)
        length_scale, signal_sd, noise_sd, prior_sd = _res.x
        hyperparameters.append([
            length_scale, signal_sd,
            np.maximum(noise_sd, noise_sd_clip_threshold), prior_sd
        ])
    print hyperparameters

    # Quick plotting experiment (for sanity check).
    import matplotlib.pyplot as plt
    if args.environment == 'Pendulum-v0':
        states2, actions2, _, next_states2 = gather_data(env, 1, unpack=True)
    elif args.environment == 'MountainCarContinuous-v0':
        from utils import mcc_get_success_policy
        states2, actions2, next_states2 = mcc_get_success_policy(env)
    states_actions2 = np.concatenate([states2, actions2], axis=-1)

    states3, actions3, _, next_states3 = gather_data(env, 3, unpack=True)
    states_actions3 = np.concatenate([states3, actions3], axis=-1)

    predictors = []
    for i in range(env.observation_space.shape[0]):
        length_scale, signal_sd, noise_sd, prior_sd = hyperparameters[i]
        predictors.append(
            predictor(output_dim,
                      length_scale=length_scale,
                      signal_sd=signal_sd,
                      noise_sd=noise_sd,
                      prior_sd=prior_sd))

    for sa, ns in zip([states_actions, states_actions3],
                      [next_states, next_states3]):
        plt.figure()
        for i in range(env.observation_space.shape[0]):
            plt.subplot(2, env.observation_space.shape[0], i + 1)
            length_scale, signal_sd, noise_sd, prior_sd = hyperparameters[i]

            predictors[i].update(rffm, sa, ns[:, i:i + 1])
            predict_mu, predict_sigma = predictors[i].predict(
                rffm, states_actions2)

            plt.plot(np.arange(len(next_states2[:, i:i + 1])),
                     next_states2[:, i:i + 1])
            plt.errorbar(np.arange(len(predict_mu)),
                         predict_mu,
                         yerr=np.sqrt(predict_sigma),
                         color='m',
                         ecolor='g')
            plt.grid()

        traj = []
        no_lines = 50
        state = np.tile(np.copy(states2[0:1, ...]), [no_lines, 1])
        for a in actions2:
            action = np.tile(a[np.newaxis, ...], [no_lines, 1])
            state_action = np.concatenate([state, action], axis=-1)

            mu_vec = []
            sigma_vec = []
            for i in range(env.observation_space.shape[0]):
                predict_mu, predict_sigma = predictors[i].predict(
                    rffm, state_action)
                mu_vec.append(predict_mu)
                sigma_vec.append(predict_sigma)

            mu_vec = np.concatenate(mu_vec, axis=-1)
            sigma_vec = np.concatenate(sigma_vec, axis=-1)

            state = np.stack([
                np.random.multivariate_normal(mu, np.diag(sigma))
                for mu, sigma in zip(mu_vec, sigma_vec)
            ],
                             axis=0)
            traj.append(np.copy(state))

        traj = np.stack(traj, axis=-1)

        for i in range(env.observation_space.shape[0]):
            plt.subplot(2, env.observation_space.shape[0],
                        env.observation_space.shape[0] + i + 1)
            for j in range(no_lines):
                y = traj[j, i, :]
                plt.plot(np.arange(len(y)), y, color='r')

            plt.plot(np.arange(len(next_states2[..., i])), next_states2[...,
                                                                        i])
            plt.grid()

        plt.show(block=False)
    raw_input("Press Enter to continue ...")
Ejemplo n.º 8
0
 model1_0=load_model('stack_model1/5_folds_stack_model0.h5')
 model1_1=load_model('stack_model1/5_folds_stack_model1.h5')
 model1_2=load_model('stack_model1/5_folds_stack_model2.h5')
 model1_3=load_model('stack_model1/5_folds_stack_model3.h5')
 model1_4=load_model('stack_model1/5_folds_stack_model4.h5')
 model2_0=load_model('stack_model2/5_folds_stack_model0.h5')
 model2_1=load_model('stack_model2/5_folds_stack_model1.h5')
 model2_2=load_model('stack_model2/5_folds_stack_model2.h5')
 model2_3=load_model('stack_model2/5_folds_stack_model3.h5')
 model2_4=load_model('stack_model2/5_folds_stack_model4.h5')
 
 rep=results.tolist() #转化成list
 final_results=[]
 for i in range(len(results)):
     if np.max(results[i])<0.95:
         initial_sample=gather_data(test[i:i+1]).reset_index(drop=True)
         new_sample=np.hstack((
                0.2*(model1_0.predict(initial_sample)+\
                    model1_1.predict(initial_sample)+\
                    model1_2.predict(initial_sample)+\
                    model1_3.predict(initial_sample)+\
                    model1_4.predict(initial_sample)),\
                0.2*(model2_0.predict(initial_sample)+\
                    model2_1.predict(initial_sample)+\
                    model2_2.predict(initial_sample)+\
                    model2_3.predict(initial_sample)+\
                    model2_4.predict(initial_sample)),\
                    feat_onehot(initial_sample,test.columns)
                   ))
         res=model.predict(new_sample)
         final_results.append(
Ejemplo n.º 9
0
def main2():

    parser = argparse.ArgumentParser()
    parser.add_argument("--environment", type=str, default='Pendulum-v0')
    #parser.add_argument("--path", type=str, default='')
    args = parser.parse_args()

    print args

    uid = str(uuid.uuid4())
    env = gym.make(args.environment)

    #states_actions, rewards, states_actions2, rewards2 = pickle.load(open(args.path, 'rb'))

    states, actions, rewards, _ = gather_data(env, 3, unpack=True)
    states_actions = np.concatenate([states, actions], axis=-1)

    #    rbf = RegressionWrappers(input_dim=states_actions.shape[-1], kern='rbf')
    #    rbf._train_hyperparameters(states_actions, rewards)
    #
    matern = RegressionWrappers(input_dim=states_actions.shape[-1],
                                kern=args.kernel)
    matern._train_hyperparameters(states_actions, rewards)
    #
    #    rq = RegressionWrappers(input_dim=states_actions.shape[-1], kern='rq')
    #    rq._train_hyperparameters(states_actions, rewards)

    states2, actions2, rewards2, _ = gather_data(env, 1, unpack=True)
    states_actions2 = np.concatenate([states2, actions2], axis=-1)

    pickle.dump([states_actions, rewards, states_actions2, rewards2],
                open(uid + '.p', 'wb'))

    #    mu, sigma = rbf._predict(states_actions2, states_actions, rewards)
    #
    #    mu = np.squeeze(mu, axis=-1)
    #    sd = np.sqrt(np.diag(sigma))
    #
    #    plt.errorbar(np.arange(len(mu)), mu, yerr=sd, color='m', ecolor='g')
    #
    mu, sigma = matern._predict(states_actions2, states_actions, rewards)

    mu = np.squeeze(mu, axis=-1)
    sd = np.sqrt(np.diag(sigma))

    plt.errorbar(np.arange(len(mu)), mu, yerr=sd, color='y', ecolor='c')
    #
    #    mu, sigma = rq._predict(states_actions2, states_actions, rewards)
    #
    #    mu = np.squeeze(mu, axis=-1)
    #    sd = np.sqrt(np.diag(sigma))
    #
    #    plt.errorbar(np.arange(len(mu)), mu, yerr=sd, color='b', ecolor='g')

    rwl = RWL(input_dim=states_actions.shape[-1], basis_dim=1024)
    rwl._train_hyperparameters(states_actions, rewards)
    rwl._reset_statistics(states_actions, rewards)

    mu, sigma = rwl._predict(states_actions2)
    plt.errorbar(np.arange(len(mu)),
                 mu,
                 yerr=np.sqrt(sigma),
                 color='r',
                 ecolor='k')

    rwl2 = RWL(input_dim=states_actions.shape[-1],
               basis_dim=1024,
               matern_param=0.)
    rwl2._train_hyperparameters(states_actions, rewards)
    rwl2._reset_statistics(states_actions, rewards)

    mu, sigma = rwl2._predict(states_actions2)
    plt.errorbar(np.arange(len(mu)),
                 mu,
                 yerr=np.sqrt(sigma),
                 color='g',
                 ecolor='b')

    plt.scatter(np.arange(len(rewards2)), rewards2)

    plt.grid()
    plt.title(uid)
    #plt.show()
    plt.savefig(uid + '.pdf')