Beispiel #1
0
def curiosity(world):
    world = ActionNoise(world, stddev=0.2)
    memory = Cache(max_size=100)

    log_dir = "__oracle"
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    agent = build_agent()
    agent_opt = Adams(np.random.randn(agent.n_params), lr=0.00015, memory=0.5)

    oracle = build_oracle()
    oracle_opt = Adam(np.random.randn(oracle.n_params) * 0.1,
                      lr=0.05,
                      memory=0.95)

    for episode in range(1000):
        agent.load_params(agent_opt.get_value())
        oracle.load_params(oracle_opt.get_value())

        agent_trajs = world.trajectories(agent, 4)
        for_oracle = [[(np.asarray([o1, o2, o3]).flatten(), a1, r1)
                       for (o1, a1, r1), (o2, a2,
                                          r2), (o3, a3,
                                                r3) in zip(t, t[1:], t[2:])]
                      for t in agent_trajs]
        memory.add_trajectories(for_oracle)

        predictions = retrace(for_oracle, model=oracle)
        save_plot(log_dir + "/%04d.png" % (episode + 1), agent_trajs,
                  predictions)
        np.save(log_dir + "/%04d.npy" % (episode + 1), agent_opt.get_value())

        curiosity_trajs = [[
            (o1, a1, np.log(np.mean(np.square((o2 - o1) - delta_p))))
            for (o1, a1, r1), (o2, a2, r2), delta_p in zip(t, t[10:], p)
        ] for t, p in zip(agent_trajs, predictions)]
        #curiosity_trajs = replace_rewards(curiosity_trajs,
        #    episode=lambda rs: np.max(rs))
        print_reward(curiosity_trajs, max_value=5000.0)
        print_reward(agent_trajs, max_value=90.0, episode=np.sum)

        curiosity_trajs = discount(curiosity_trajs, horizon=500)
        curiosity_trajs = normalize(curiosity_trajs)
        agent_trajs = discount(agent_trajs, horizon=500)
        agent_trajs = normalize(agent_trajs)
        agent_trajs = [traj[:-10] for traj in agent_trajs]
        agent_weight = 0.5  # + 0.4*(0.5 * (1 - np.cos(np.pi * episode / 20)))
        curiosity_weight = 1. - agent_weight
        comb_trajs = combine_rewards([curiosity_trajs, agent_trajs],
                                     [curiosity_weight, agent_weight])
        grad = policy_gradient(comb_trajs, policy=agent)
        agent_opt.apply_gradient(grad)

        oracle_trajs = [[(o1, (o2 - o1)[:2], 1.0)
                         for (o1, a1, r1), (o2, a2, r2) in zip(t, t[10:])]
                        for t in memory.trajectories(None, 4)]

        grad = policy_gradient(oracle_trajs, policy=oracle)
        oracle_opt.apply_gradient(grad)
def run():
    model = Input(4)
    model = Affine(model, 128)
    model = LReLU(model)
    model = Affine(model, 2)
    model = Softmax(model)

    world = StochasticPolicy(Gym(make_env, max_steps=500))

    opt = Adam(np.random.randn(model.n_params) * 0.1, lr=0.01)

    for _ in range(50):
        model.load_params(opt.get_value())

        trajs = world.trajectories(model, 16)
        print_reward(trajs, max_value=5000)

        trajs = discount(trajs, horizon=500)
        trajs = normalize(trajs)

        grad = policy_gradient(trajs, policy=model)
        opt.apply_gradient(grad)

    while True:
        world.render(model)
Beispiel #3
0
    def __init__(self, X, Y):

        # Normalize data
        self.Xmean, self.Xstd = X.mean(0), X.std(0)
        self.Ymean, self.Ystd = Y.mean(0), Y.std(0)
        X = (X - self.Xmean) / self.Xstd
        Y = (Y - self.Ymean) / self.Ystd

        self.X = X
        self.Y = Y
        self.n = X.shape[0]

        # Randomly initialize weights and noise variance
        w = np.random.randn(X.shape[1], Y.shape[1])
        sigma_sq = np.array([np.log([1e-3])])

        # Concatenate all parameters in a single vector
        self.theta = np.concatenate([w.flatten(), sigma_sq.flatten()])

        # Count total number of parameters
        self.num_params = self.theta.shape[0]

        # Define optimizer
        self.optimizer = Adam(self.num_params, lr=1e-3)

        # Define loss gradient function using autograd
        self.grad_loss = grad(self.loss)
Beispiel #4
0
    def __init__(self, X, layers_Q, layers_P):

        # Normalize data
        self.Xmean, self.Xstd = X.mean(0), X.std(0)
        X = (X - self.Xmean) / self.Xstd

        self.X = X

        self.layers_Q = layers_Q
        self.layers_P = layers_P

        self.X_dim = X.shape[1]
        self.Z_dim = layers_Q[-1]

        # Initialize encoder
        params = self.initialize_NN(layers_Q)
        self.idx_Q = np.arange(params.shape[0])

        # Initialize decoder
        params = np.concatenate([params, self.initialize_NN(layers_P)])
        self.idx_P = np.arange(self.idx_Q[-1] + 1, params.shape[0])

        self.params = params

        # Total number of parameters
        self.num_params = self.params.shape[0]

        # Define optimizer
        self.optimizer = Adam(self.num_params, lr=1e-3)

        # Define gradient function using autograd
        self.grad_elbo = grad(self.ELBO)
Beispiel #5
0
    def train_one(carrOpt):
        nonlocal oldTrajs
        classOpt = Adam(
            np.random.randn(classifier.n_params) * 1.,
            lr=0.5,
            memory=0.9,
        )
        if carrOpt == None:
            carrOpt = Adam(
                np.random.randn(curCarr.n_params),
                lr=0.10,
                memory=0.5,
            )
        curScore = 0.
        curAccuracy = 0.
        for i in range(250):
            classifier.load_params(classOpt.get_value())
            curCarr.load_params(carrOpt.get_value())

            oldTrajIdx = np.random.choice(len(oldTrajs), size=50)
            trajs = [oldTrajs[i] for i in oldTrajIdx]
            trajs += world.trajectories(curCarr, 50)
            trajsForClass = [tag_traj(traj, [1, 0]) for traj in trajs[:50]]
            trajsForClass += [tag_traj(traj, [0, 1]) for traj in trajs[50:]]
            plot_tagged_trajs(trajsForClass)
            accTrajs = accuracy(trajsForClass, model=classifier)
            print_reward(accTrajs,
                         max_value=1.0,
                         episode=np.mean,
                         label="Cla reward: ")
            curAccuracy = np.mean(get_rewards(accTrajs, episode=np.mean))
            if curAccuracy > 1. - i / 500:
                break

            grad = policy_gradient(trajsForClass, policy=classifier)
            classOpt.apply_gradient(grad)
            trajs2 = learn_from_classifier(classifier, trajs[50:], 1)
            print_reward(trajs2,
                         max_value=1.0,
                         episode=np.max,
                         label="Car reward: ")
            curScore = np.mean(get_rewards(trajs2, episode=np.max))
            trajs2 = replace_rewards(trajs2, episode=np.max)
            trajs2 = normalize(trajs2)
            grad2 = policy_gradient(trajs2, policy=curCarr)
            carrOpt.apply_gradient(grad2)
            if i % 10 == 0:
                print("%d episodes in." % i)
        oldTrajs += world.trajectories(curCarr, 800)
        world.render(curCarr)
        if curScore > 0.11:
            return carrOpt
        else:
            return None
Beispiel #6
0
    def train_one(carrOpt):
        if carrOpt == None:
            carrOpt = Adam(
                np.random.randn(curCarr.n_params),
                lr=0.10,
                memory=0.5,
            )
        nextBreak = 5
        for i in range(250):
            curCarr.load_params(carrOpt.get_value())

            realTrajs, curiosityTrajs = world.trajectories(curCarr, 50)
            curScore = np.mean(get_rewards(realTrajs, episode=np.sum)) / 90.
            print_reward(realTrajs,
                         max_value=90.0,
                         episode=np.sum,
                         label="Real reward:      ")
            print_reward(curiosityTrajs,
                         max_value=1.0,
                         episode=np.max,
                         label="Curiosity reward: ")
            curCuriosity = np.mean(get_rewards(curiosityTrajs, episode=np.max))
            if curCuriosity > 0.98:
                if nextBreak == 0:
                    break
                else:
                    nextBreak -= 1
            else:
                nextBreak = np.min([nextBreak + 1, 5])

            realTrajs = replace_rewards(realTrajs, episode=np.sum)
            realTrajs = normalize(realTrajs)
            curiosityTrajs = replace_rewards(curiosityTrajs, episode=np.max)
            #this is stupid, we should care more(?) if the costs are to high
            realWeight = 0.001 + np.max([np.min([curScore, 0.2]), 0.
                                         ]) * 0.998 / 0.2
            curiosityWeight = 1. - realWeight
            print('RWeight: %f, CWeight: %f' % (realWeight, curiosityWeight))
            trajs = combine_rewards([realTrajs, curiosityTrajs],
                                    [realWeight, curiosityWeight])
            trajs = normalize(trajs)
            grad = policy_gradient(trajs, policy=curCarr)
            carrOpt.apply_gradient(grad)
            if i % 10 == 0:
                print("%d episodes in." % i)
        world.remember_agent(curCarr)
        world.render(curCarr)
        if curScore > 0.01:
            return carrOpt
        else:
            return None
Beispiel #7
0
 def __init__(self,
              num_layers,
              units_list=None,
              initializer=None,
              optimizer='adam'):
     self.weight_num = num_layers - 1
     # 根据传入的初始化方法初始化参数,本次实验只实现xavier和全0初始化
     self.params = xavier(num_layers,
                          units_list) if initializer == 'xavier' else zero(
                              num_layers, units_list)
     self.optimizer = Adam(
         weights=self.params,
         weight_num=self.weight_num) if optimizer == 'adam' else SGD()
     self.bn_param = {}
Beispiel #8
0
def train(world, model):
    opt = Adam(np.random.randn(model.n_params), lr=0.3, memory=0.9)

    for _ in range(20):
        model.load_params(opt.get_value())
        trajs = world.trajectories(None, 100)
        grad = policy_gradient(trajs, policy=model)
        opt.apply_gradient(grad)

        trajs = cross_entropy(trajs, model=model)
        print_reward(trajs,
                     episode=np.mean,
                     label="Surprise/byte:",
                     max_value=8.0)
Beispiel #9
0
    def __init__(self, algorithm, config):
        self.config = config
        super(MujocoAgent, self).__init__(algorithm)

        weights = self.get_weights()
        assert len(
            weights) == 1, "There should be only one model in the algorithm."
        self.weights_name = list(weights.keys())[0]
        weights = list(weights.values())[0]
        self.weights_shapes = [x.shape for x in weights]
        self.weights_total_size = np.sum(
            [np.prod(x) for x in self.weights_shapes])

        self.optimizer = Adam(self.weights_total_size, self.config['stepsize'])
def run_autoencoder(optimizer):
    """ Runs the autoencoder model using the specified optimizer.

    Parameters
    ----------
    optimizer : RMSProp/Adam
        Optimization algorithm to be used for parameter learning

    """
    optimizer = Adam(learning_rate=0.03) if optimizer == 'adam' else RMSProp(
        learning_rate=0.05)
    train_matrix, val_matrix = get_training_and_val_data()
    model = Autoencoder(input_dim=train_matrix.shape[1])
    model.print_summary()
    model.compile(optimizer)
    errors = model.fit(train_matrix,
                       train_matrix,
                       num_epochs=60,
                       val_set=(val_matrix, val_matrix),
                       early_stopping=True)
    plot_losses(errors['training'], errors['validation'])
    neuron_num = model.model.layers[0].optimizer.reference_index
    learning_rates = model.model.layers[0].optimizer.learning_rates
    plot_learning_rates(learning_rates['weights'], learning_rates['bias'],
                        neuron_num)
Beispiel #11
0
 def remember(agent):
     nonlocal history, classOpt
     history.add_trajectory(*inner.trajectories(agent, history_length))
     classOpt = Adam(
         np.random.randn(classifier.n_params) * 1.,
         lr=0.06,
         memory=0.9,
     )
Beispiel #12
0
def curiosity(world):
    world = ActionNoise(world, stddev=0.1)
    memory = Cache(max_size=100)

    log_dir = "__oracle"
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    agent = build_agent()
    agent_opt = Adams(np.random.randn(agent.n_params), lr=0.00015, memory=0.5)

    oracle = build_oracle()
    oracle_opt = Adam(np.random.randn(oracle.n_params) * 0.1,
                      lr=0.05,
                      memory=0.95)

    for episode in range(1000):
        agent.load_params(agent_opt.get_value())
        oracle.load_params(oracle_opt.get_value())

        agent_trajs = world.trajectories(agent, 4)
        memory.add_trajectories(agent_trajs)

        predictions = retrace(agent_trajs, model=oracle)
        save_plot(log_dir + "/%04d.png" % (episode + 1), agent_trajs,
                  predictions)
        np.save(log_dir + "/%04d.npy" % (episode + 1), agent_opt.get_value())

        agent_trajs = [[
            (o1, a1, np.log(np.mean(np.square((o2 - o1) - delta_p))))
            for (o1, a1, r1), (o2, a2, r2), delta_p in zip(t, t[10:], p)
        ] for t, p in zip(agent_trajs, predictions)]
        agent_trajs = replace_rewards(agent_trajs,
                                      episode=lambda rs: np.max(rs) / len(rs))
        print_reward(agent_trajs, max_value=10.0)

        agent_trajs = normalize(agent_trajs)
        grad = policy_gradient(agent_trajs, policy=agent)
        agent_opt.apply_gradient(grad)

        oracle_trajs = [[(o1, o2 - o1, 1.0)
                         for (o1, a1, r1), (o2, a2, r2) in zip(t, t[10:])]
                        for t in memory.trajectories(None, 4)]

        grad = policy_gradient(oracle_trajs, policy=oracle)
        oracle_opt.apply_gradient(grad)
Beispiel #13
0
    def train_one():
        gaussOpt = Adam(
            [0., 0.],
            lr=0.010,
            memory=0.5,
        )
        classOpt = Adam(np.random.randn(classifier.n_params) * 0.1,
                        lr=0.5,
                        memory=0.99)
        gaussCenterer = Constant(2)
        gausses.append(gaussCenterer)
        curAccuracy = 0.
        while curAccuracy < 0.98:
            classifier.load_params(classOpt.get_value())
            gaussCenterer.load_params(gaussOpt.get_value())

            trajs = [[(gauss_observation(gausses[:-1]), [1, 0], 1.)]
                     for _ in range(500)]
            trajs += [[(gauss_observation(gausses[-1:]), [0, 1], 1.)]
                      for _ in range(500)]
            accTrajs = accuracy(trajs, model=classifier)
            print_reward(accTrajs, max_value=1.0)
            accs = [traj[0][2] for traj in accTrajs]
            curAccuracy = np.mean(accs)

            grad = policy_gradient(trajs, policy=classifier)
            classOpt.apply_gradient(grad)
            trajs2 = learn_from_classifier(classifier, trajs[500:], 1)
            trajs2 = normalize(trajs2)
            grad2 = policy_gradient(trajs2, policy=gaussCenterer)
            gaussOpt.apply_gradient(grad2)
            plt.clf()
            plt.grid()
            plt.gcf().axes[0].set_ylim([-1, 1])
            plt.gcf().axes[0].set_xlim([-1, 1])
            x, y = zip(*[o for ((o, _, _), ) in trajs[:500]])
            plt.scatter(x, y, color="blue")
            x, y = zip(*[o for ((o, _, _), ) in trajs[500:]])
            plt.scatter(x, y, color="red")
            plt.pause(0.01)
Beispiel #14
0
 def reset_agent():
     nonlocal agentOpt, trainTimeLeft, lastScores, curAgentId, motivation
     if agentOpt is not None:
         save_agent()
     print("Resetting agent %d." % curAgentId)
     agentOpt = Adam(
         np.random.randn(agent.n_params) * 1.5,
         lr=0.05,
         memory=0.9,
     )
     trainTimeLeft = MAX_TRAIN_TIME
     lastScores = [-0.4]
     curAgentId += 1
     motivation = MAX_MOTIVATION
Beispiel #15
0
    def __init__(self, X, Y, hidden_dim):

        # X has the form lags x data x dim
        # Y has the form data x dim

        self.X = X
        self.Y = Y
        self.X_dim = X.shape[-1]
        self.Y_dim = Y.shape[-1]
        self.hidden_dim = hidden_dim
        self.lags = X.shape[0]

        # Define and initialize neural network
        self.params = self.initialize_RNN()

        # Total number of parameters
        self.num_params = self.params.shape[0]

        # Define optimizer
        self.optimizer = Adam(self.num_params, lr=1e-3)

        # Define gradient function using autograd
        self.grad_loss = grad(self.loss)
Beispiel #16
0
    def init_optimizer_q_v(self, var_params_q_v):
        cfg_optimizer_q_v = self['optimizer_q_v']['args']

        return Adam([{
            'params': [var_params_q_v['mu']],
            'lr': cfg_optimizer_q_v['lr_mu']
        }, {
            'params': [var_params_q_v['log_var']],
            'lr': cfg_optimizer_q_v['lr_log_var']
        }, {
            'params': [var_params_q_v['u']],
            'lr': cfg_optimizer_q_v['lr_u']
        }],
                    lr_decay=cfg_optimizer_q_v['lr_decay'])
Beispiel #17
0
    def __init__(self, X, Y, layers):

        # Normalize data
        self.Xmean, self.Xstd = X.mean(0), X.std(0)
        self.Ymean, self.Ystd = Y.mean(0), Y.std(0)
        X = (X - self.Xmean) / self.Xstd
        Y = (Y - self.Ymean) / self.Ystd

        self.X = X
        self.Y = Y
        self.layers = layers

        # Define and initialize neural network
        self.params = self.initialize_NN(self.layers)

        # Total number of parameters
        self.num_params = self.params.shape[0]

        # Define optimizer
        self.optimizer = Adam(self.num_params, lr=1e-3)

        # Define gradient function using autograd
        self.grad_loss = grad(self.loss)
Beispiel #18
0
    def init_optimizer_reg(self, reg_loss):
        if self['optimizer_reg']['type'] != 'Adam':
            print(
                'only the Adam optimiser is supported for the regularisation, exiting..'
            )
            raise

        cfg_optimizer_reg = self['optimizer_reg']['args']

        if reg_loss.__class__.__name__ == 'RegLoss_LogNormal':
            optimizer_reg = Adam([{
                'params': [reg_loss.loc],
                'lr': cfg_optimizer_reg['lr_loc']
            }, {
                'params': [reg_loss.log_scale],
                'lr': cfg_optimizer_reg['lr_log_scale']
            }],
                                 lr_decay=cfg_optimizer_reg['lr_decay'])
        elif reg_loss.__class__.__name__ == 'RegLoss_L2':
            optimizer_reg = Adam(reg_loss.parameters(),
                                 lr=cfg_optimizer_reg['lr_log_w_reg'],
                                 lr_decay=cfg_optimizer_reg['lr_decay'])

        return optimizer_reg
Beispiel #19
0
 def init_from_str(self):
     r = r"([a-zA-Z]*)=([^,)]*)"
     opt_str = self.param.lower()
     kwargs = dict([(i, eval(j)) for (i, j) in re.findall(r, opt_str)])
     if "sgd" in opt_str:
         optimizer = SGD(**kwargs)
     elif "adagrad" in opt_str:
         optimizer = AdaGrad(**kwargs)
     elif "rmsprop" in opt_str:
         optimizer = RMSProp(**kwargs)
     elif "adam" in opt_str:
         optimizer = Adam(**kwargs)
     else:
         raise NotImplementedError("{}".format(opt_str))
     return optimizer
Beispiel #20
0
def model_builder(n_inputs, n_outputs):

    model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy)
    model.add(Dense(64, input_shape=(n_inputs, )))
    model.add(Activation('relu'))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dense(1))
    model.add(Activation('linear'))

    return model
Beispiel #21
0
    def init_optimizer_GMM(self, data_loss):
        if self['optimizer_GMM']['type'] != 'Adam':
            print(
                'only the Adam optimiser is supported for the GMM, exiting..')
            raise

        cfg_optimizer_GMM = self['optimizer_GMM']['args']

        return Adam([{
            'params': [data_loss.log_std],
            'lr': cfg_optimizer_GMM['lr_log_std']
        }, {
            'params': [data_loss.logits],
            'lr': cfg_optimizer_GMM['lr_logits']
        }],
                    lr_decay=cfg_optimizer_GMM['lr_decay'])
Beispiel #22
0
    def init_from_dict(self):
        O = self.param
        cc = O["cache"] if "cache" in O else None
        op = O["hyperparameters"] if "hyperparameters" in O else None

        if op is None:
            raise ValueError("Must have `hyperparemeters` key: {}".format(O))

        if op and op["id"] == "SGD":
            optimizer = SGD().set_params(op, cc)
        elif op and op["id"] == "RMSProp":
            optimizer = RMSProp().set_params(op, cc)
        elif op and op["id"] == "AdaGrad":
            optimizer = AdaGrad().set_params(op, cc)
        elif op and op["id"] == "Adam":
            optimizer = Adam().set_params(op, cc)
        elif op:
            raise NotImplementedError("{}".format(op["id"]))
        return optimizer
Beispiel #23
0
def run_GNN_Adam(train_data,
                 valid_data,
                 W,
                 A,
                 b,
                 B,
                 alpha=0.0001,
                 eps=0.001,
                 n_vector=8,
                 gnn_steps=2,
                 n_epochs=100):
    """GNN with Adamで学習, 評価を行う"""
    beta1 = 0.9
    beta2 = 0.999
    m_W = np.zeros(W.shape)
    m_A = np.zeros(A.shape)
    m_b = 0.0
    v_W = np.zeros(W.shape)
    v_A = np.zeros(A.shape)
    v_b = 0.0
    # loss, precisionの保存用
    params = []
    # W, A, bの保存用
    weights = []
    for epoch in range(n_epochs):
        W, A, b, loss_train = Adam(train_data, n_vector, B, W, A, b, gnn_steps,
                                   epoch, alpha, beta1, beta2, eps, m_W, m_A,
                                   m_b, v_W, v_A, v_b)
        precision_train = mean_precision(train_data, W, A, b, n_vector,
                                         gnn_steps)
        precision_val = mean_precision(valid_data, W, A, b, n_vector,
                                       gnn_steps)
        loss_val = valid_loss(data, W, A, b, n_vector, gnn_steps)
        print(
            'epoch: {}, train loss: {}, train precision: {}, valid loss: {}, valid precision: {}'
            .format(epoch + 1, loss_train, precision_train, loss_val,
                    precision_val))
        params.append((loss_train, precision_train, loss_val, precision_val))
        weights.append((W, A, b))
    return params, weights
Beispiel #24
0
def train(params):
        
    env = gym.make(params['env_name'])
    params['ob_dim'] = env.observation_space.shape[0]
    params['ac_dim'] = env.action_space.shape[0]

    m, v = 0, 0
        
    master = Learner(params)
        
    n_eps = 0
    n_iter = 0
    ts_cumulative = 0
    ts, rollouts, rewards = [], [], []
        
    while n_iter < params['max_iter']:
        
        reward = master.policy.rollout(env, params['steps'])
        rewards.append(reward)
        rollouts.append(n_eps)
        ts.append(ts_cumulative)
        
        print('Iter: %s, Eps: %s, R: %s' %(n_iter, n_eps, np.round(reward,4)))
            
        params['n_iter'] = n_iter
        gradient, timesteps = aggregate_rollouts(master, params)
        ts_cumulative += timesteps
        n_eps += 2 * params['sensings']

        gradient /= (np.linalg.norm(gradient) / master.policy.N + 1e-8)
        
        n_iter += 1
        update, m, v = Adam(gradient, m, v, params['learning_rate'], n_iter)
            
        master.policy.update(update)

        out = pd.DataFrame({'Rollouts': rollouts, 'Reward': rewards, 'Timesteps': ts})
        out.to_csv('data/%s/results/%s_Seed%s.csv' %(params['dir'], params['filename'], params['seed']), index=False) 
Beispiel #25
0
def train(model):
    world = Mnist()

    opt = Adam(np.random.randn(model.n_params), lr=0.1)

    for i in range(600):
        model.load_params(opt.get_value() +
                          np.random.randn(model.n_params) * 0.01)

        trajs = world.trajectories(None, 256)
        grad = policy_gradient(trajs, policy=model)
        opt.apply_gradient(grad)

        if i % 20 == 19:
            print("%4d) " % (i + 1), flush=True, end="")
            trajs = world.trajectories(None, 2000)
            trajs = accuracy(trajs, model=model, percent=True)
            print_reward(trajs, max_value=100, label="Train accuracy:")

    return opt.get_value()
Beispiel #26
0
class CVAE:
    def __init__(self, X, Y, layers_P, layers_Q, layers_R):

        # Normalize data
        self.Xmean, self.Xstd = X.mean(0), X.std(0)
        self.Ymean, self.Ystd = Y.mean(0), Y.std(0)
        X = (X - self.Xmean) / self.Xstd
        Y = (Y - self.Ymean) / self.Ystd

        self.X = X
        self.Y = Y

        self.layers_P = layers_P
        self.layers_Q = layers_Q
        self.layers_R = layers_R

        self.X_dim = X.shape[1]
        self.Y_dim = Y.shape[1]
        self.Z_dim = layers_Q[-1]

        # Initialize encoder
        params = self.initialize_NN(layers_P)
        self.idx_P = np.arange(params.shape[0])

        # Initialize decoder
        params = np.concatenate([params, self.initialize_NN(layers_Q)])
        self.idx_Q = np.arange(self.idx_P[-1] + 1, params.shape[0])

        # Initialize prior
        params = np.concatenate([params, self.initialize_NN(layers_R)])
        self.idx_R = np.arange(self.idx_Q[-1] + 1, params.shape[0])

        self.params = params

        # Total number of parameters
        self.num_params = self.params.shape[0]

        # Define optimizer
        self.optimizer = Adam(self.num_params, lr=1e-3)

        # Define gradient function using autograd
        self.grad_elbo = grad(self.ELBO)

    def initialize_NN(self, Q):
        hyp = np.array([])
        layers = len(Q)
        for layer in range(0, layers - 2):
            A = -np.sqrt(6.0 / (Q[layer] + Q[layer + 1])) + 2.0 * np.sqrt(
                6.0 / (Q[layer] + Q[layer + 1])) * np.random.rand(
                    Q[layer], Q[layer + 1])
            b = np.zeros((1, Q[layer + 1]))
            hyp = np.concatenate([hyp, A.ravel(), b.ravel()])

        A = -np.sqrt(6.0 / (Q[-2] + Q[-1])) + 2.0 * np.sqrt(
            6.0 / (Q[-2] + Q[-1])) * np.random.rand(Q[-2], Q[-1])
        b = np.zeros((1, Q[-1]))
        hyp = np.concatenate([hyp, A.ravel(), b.ravel()])

        A = -np.sqrt(6.0 / (Q[-2] + Q[-1])) + 2.0 * np.sqrt(
            6.0 / (Q[-2] + Q[-1])) * np.random.rand(Q[-2], Q[-1])
        b = np.zeros((1, Q[-1]))
        hyp = np.concatenate([hyp, A.ravel(), b.ravel()])

        return hyp

    def forward_pass(self, X, Q, params):
        H = X
        idx_3 = 0
        layers = len(Q)
        for layer in range(0, layers - 2):
            idx_1 = idx_3
            idx_2 = idx_1 + Q[layer] * Q[layer + 1]
            idx_3 = idx_2 + Q[layer + 1]
            A = np.reshape(params[idx_1:idx_2], (Q[layer], Q[layer + 1]))
            b = np.reshape(params[idx_2:idx_3], (1, Q[layer + 1]))
            H = np.tanh(np.matmul(H, A) + b)

        idx_1 = idx_3
        idx_2 = idx_1 + Q[-2] * Q[-1]
        idx_3 = idx_2 + Q[-1]
        A = np.reshape(params[idx_1:idx_2], (Q[-2], Q[-1]))
        b = np.reshape(params[idx_2:idx_3], (1, Q[-1]))
        mu = np.matmul(H, A) + b

        idx_1 = idx_3
        idx_2 = idx_1 + Q[-2] * Q[-1]
        idx_3 = idx_2 + Q[-1]
        A = np.reshape(params[idx_1:idx_2], (Q[-2], Q[-1]))
        b = np.reshape(params[idx_2:idx_3], (1, Q[-1]))
        Sigma = np.exp(np.matmul(H, A) + b)

        return mu, Sigma

    def ELBO(self, params):
        X = self.X_batch
        Y = self.Y_batch

        # Prior: p(z|x)
        mu_0, Sigma_0 = self.forward_pass(X, self.layers_R, params[self.idx_R])

        # Encoder: q(z|x,y)
        mu_1, Sigma_1 = self.forward_pass(np.concatenate([X, Y], axis=1),
                                          self.layers_Q, params[self.idx_Q])

        # Reparametrization trick
        epsilon = np.random.randn(X.shape[0], self.Z_dim)
        Z = mu_1 + epsilon * np.sqrt(Sigma_1)

        # Decoder: p(y|x,z)
        mu_2, Sigma_2 = self.forward_pass(np.concatenate([X, Z], axis=1),
                                          self.layers_P, params[self.idx_P])

        # Log-determinants
        log_det_0 = np.sum(np.log(Sigma_0))
        log_det_1 = np.sum(np.log(Sigma_1))
        log_det_2 = np.sum(np.log(Sigma_2))

        # KL[q(z|x,y) || p(z|x)]
        KL = 0.5 * (np.sum(Sigma_1 / Sigma_0) + np.sum(
            (mu_0 - mu_1)**2 / Sigma_0) - self.Z_dim + log_det_0 - log_det_1)

        # -log p(y|x,z)
        NLML = 0.5 * (np.sum((Y - mu_2)**2 / Sigma_2) + log_det_2 +
                      np.log(2. * np.pi) * self.Y_dim * X.shape[0])

        return NLML + KL

    # Fetches a mini-batch of data
    def fetch_minibatch(self, X, Y, N_batch):
        N = X.shape[0]
        idx = np.random.choice(N, N_batch, replace=False)
        X_batch = X[idx, :]
        Y_batch = Y[idx, :]
        return X_batch, Y_batch

    # Trains the model
    def train(self, nIter=10000, batch_size=100):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch minibatch
            self.X_batch, self.Y_batch = self.fetch_minibatch(
                self.X, self.Y, batch_size)

            # Evaluate loss using current parameters
            params = self.params
            elbo = self.ELBO(params)

            # Update parameters
            grad_params = self.grad_elbo(params)
            self.params = self.optimizer.step(params, grad_params)

            # Print
            if it % 10 == 0:
                elapsed = timeit.default_timer() - start_time
                print('It: %d, ELBO: %.3e, Time: %.2f' % (it, elbo, elapsed))
                start_time = timeit.default_timer()

    def generate_samples(self, X_star, N_samples):
        X_star = (X_star - self.Xmean) / self.Xstd
        # Encode X_star
        mu_0, Sigma_0 = self.forward_pass(X_star, self.layers_R,
                                          self.params[self.idx_R])

        # Reparametrization trick
        epsilon = np.random.randn(N_samples, self.Z_dim)
        Z = mu_0 + epsilon * np.sqrt(Sigma_0)

        # Decode
        mean_star, var_star = self.forward_pass(
            np.concatenate([X_star, Z], axis=1), self.layers_P,
            self.params[self.idx_P])

        # De-normalize
        mean_star = mean_star * self.Ystd + self.Ymean
        var_star = var_star * self.Ystd**2

        return mean_star, var_star
Beispiel #27
0
from keras.callbacks import ModelCheckpoint, CSVLogger, TensorBoard, LearningRateScheduler
from optimizers import Adam, schedule
from layers import get_loss_funcs, show_gpus
from model import thin_model
from dataloader import data_gen_train, data_gen_val
from config import logs_dir, weights_best_file, training_log, base_lr, max_iter, batch_size

show_gpus()

train_samples = data_gen_train.size()
val_samples = data_gen_val.size()
iterations_per_epoch = train_samples // batch_size
adam = Adam(lr=base_lr)

loss_funcs = get_loss_funcs(batch_size)
thin_model.compile(loss=loss_funcs, optimizer=adam, metrics=["accuracy"])

checkpoint = ModelCheckpoint(weights_best_file,
                             monitor='loss',
                             verbose=0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='min',
                             period=1)
csv_logger = CSVLogger(training_log, append=True)
tb = TensorBoard(log_dir=logs_dir,
                 histogram_freq=0,
                 write_graph=True,
                 write_images=False)
lrate = LearningRateScheduler(schedule)
callbacks_list = [checkpoint, csv_logger, tb, lrate]
Beispiel #28
0
# plt.subplot(1, 4, 2)
# plt.imshow(img[1])
# plt.subplot(1, 4, 3)
# plt.imshow(img[2])
# plt.subplot(1, 4, 4)
# plt.imshow(img[3])

model = MNISTNet()
loss = SoftmaxCrossEntropy(num_class=10)


# define your learning rate sheduler
def func(lr, iteration):
    if iteration % 1000 == 0:
        return lr * 0.5
    else:
        return lr


adam = Adam(lr=0.01, decay=0, sheduler_func=func)
l2 = L2(w=0.001)  # L2 regularization with lambda=0.001
model.compile(optimizer=adam, loss=loss, regularization=l2)
train_results, val_results, test_results = model.train(mnist,
                                                       train_batch=30,
                                                       val_batch=1000,
                                                       test_batch=1000,
                                                       epochs=2,
                                                       val_intervals=100,
                                                       test_intervals=300,
                                                       print_intervals=100)
Beispiel #29
0
train_y = convert_to_one_hot(train_y, num_classes)
test_x = np.reshape(test_x, (len(test_x), 1, img_rows, img_cols)).astype(skml_config.config.i_type)
test_y = convert_to_one_hot(test_y, num_classes)

train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y)


filters = 64
model = Sequential()
model.add(Convolution(filters, 3, input_shape=input_shape))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling(2))
model.add(Convolution(filters, 3))
model.add(BatchNormalization())
model.add(ReLU())
model.add(GlobalAveragePooling())
model.add(Affine(num_classes))
model.compile(SoftmaxCrossEntropy(), Adam())

train_batch_size = 100
valid_batch_size = 1
print("訓練開始: {}".format(datetime.now().strftime("%Y/%m/%d %H:%M")))
model.fit(train_x, train_y, train_batch_size, 20, validation_data=(valid_batch_size, valid_x, valid_y), validation_steps=1)
print("訓練終了: {}".format(datetime.now().strftime("%Y/%m/%d %H:%M")))

model.save(save_path)

loss, acc = model.evaluate(test_x, test_y)
print("Test loss: {}".format(loss))
print("Test acc: {}".format(acc))
Beispiel #30
0
class NeuralNetwork:
    # Initialize the class
    def __init__(self, X, Y, layers):

        # Normalize data
        self.Xmean, self.Xstd = X.mean(0), X.std(0)
        self.Ymean, self.Ystd = Y.mean(0), Y.std(0)
        X = (X - self.Xmean) / self.Xstd
        Y = (Y - self.Ymean) / self.Ystd

        self.X = X
        self.Y = Y
        self.layers = layers

        # Define and initialize neural network
        self.params = self.initialize_NN(self.layers)

        # Total number of parameters
        self.num_params = self.params.shape[0]

        # Define optimizer
        self.optimizer = Adam(self.num_params, lr=1e-3)

        # Define gradient function using autograd
        self.grad_loss = grad(self.loss)

    # Initializes the network weights and biases using Xavier initialization
    def initialize_NN(self, Q):
        params = np.array([])
        num_layers = len(Q)
        for layer in range(0, num_layers - 1):
            weights = -np.sqrt(6.0 /
                               (Q[layer] + Q[layer + 1])) + 2.0 * np.sqrt(
                                   6.0 /
                                   (Q[layer] + Q[layer + 1])) * np.random.rand(
                                       Q[layer], Q[layer + 1])
            biases = np.zeros((1, Q[layer + 1]))
            params = np.concatenate([params, weights.ravel(), biases.ravel()])
        return params

    # Evaluates the forward pass
    def forward_pass(self, X, Q, params):
        H = X
        idx_3 = 0
        num_layers = len(self.layers)
        # All layers up to last
        for layer in range(0, num_layers - 2):
            idx_1 = idx_3
            idx_2 = idx_1 + Q[layer] * Q[layer + 1]
            idx_3 = idx_2 + Q[layer + 1]
            weights = np.reshape(params[idx_1:idx_2], (Q[layer], Q[layer + 1]))
            biases = np.reshape(params[idx_2:idx_3], (1, Q[layer + 1]))
            H = np.tanh(np.matmul(H, weights) + biases)

        # Last linear layer
        idx_1 = idx_3
        idx_2 = idx_1 + Q[-2] * Q[-1]
        idx_3 = idx_2 + Q[-1]
        weights = np.reshape(params[idx_1:idx_2], (Q[-2], Q[-1]))
        biases = np.reshape(params[idx_2:idx_3], (1, Q[-1]))
        mu = np.matmul(H, weights) + biases

        return mu

    # Evaluates the mean square error loss
    def loss(self, params):
        X = self.X_batch
        Y = self.Y_batch
        mu = self.forward_pass(X, self.layers, params)
        return np.mean((Y - mu)**2)

    # Fetches a mini-batch of data
    def fetch_minibatch(self, X, Y, N_batch):
        N = X.shape[0]
        idx = np.random.choice(N, N_batch, replace=False)
        X_batch = X[idx, :]
        Y_batch = Y[idx, :]
        return X_batch, Y_batch

    # Trains the model by minimizing the MSE loss
    def train(self, nIter=10000, batch_size=100):

        start_time = timeit.default_timer()
        for it in range(nIter):
            # Fetch minibatch
            self.X_batch, self.Y_batch = self.fetch_minibatch(
                self.X, self.Y, batch_size)

            # Evaluate loss using current parameters
            params = self.params
            loss = self.loss(params)

            # Update parameters
            grad_params = self.grad_loss(params)
            self.params = self.optimizer.step(params, grad_params)

            # Print
            if it % 10 == 0:
                elapsed = timeit.default_timer() - start_time
                print('It: %d, Loss: %.3e, Time: %.2f' % (it, loss, elapsed))
                start_time = timeit.default_timer()

    # Evaluates predictions at test points
    def predict(self, X_star):
        # Normalize inputs
        X_star = (X_star - self.Xmean) / self.Xstd
        y_star = self.forward_pass(X_star, self.layers, self.params)
        # De-normalize outputs
        y_star = y_star * self.Ystd + self.Ymean
        return y_star