예제 #1
0
파일: rl.py 프로젝트: tsuberim/RL
class PolicyGradAgent(Agent):
    def __init__(self, policy=DenseBlock(), entropy_weight=1e-2, **kwargs):
        Agent.__init__(self, **kwargs)
        self.entropy_weight = entropy_weight

        self.policy = Sequential()
        self.policy.add(
            policy,
            DenseBlock(n_dims=self.action_space.n,
                       n_hidden_layers=1,
                       transform=softmax))

    def forward(self, ob):
        return self.policy(ob)

    def step(self, ob, rew):
        act_dist = self(ob)
        action = sample_multinomial(act_dist)
        return action, dict(entropy=entropy(act_dist))

    def loss(self, obs, acts, rews, infos):
        rets = normalize(returns(rews, self.discount))
        act_dists = self(obs)
        entropy_loss = entropy(act_dists)
        policy_grad_loss = -log(act_dists.pick(acts) + epsilon) * rets
        loss = policy_grad_loss - self.entropy_weight * entropy_loss
        return loss, dict(
            entropy=entropy_loss,
            policy_grad_loss=policy_grad_loss,
        )
예제 #2
0
파일: rl.py 프로젝트: tsuberim/RL
    def __init__(self, agent, predictor=DenseBlock(), **kwargs):
        super().__init__(agent, **kwargs)

        self.predictor = Sequential()
        self.predictor.add(predictor, DenseBlock(
            n_dims=1,
            n_hidden_layers=1,
        ))
예제 #3
0
파일: rl.py 프로젝트: tsuberim/RL
    def __init__(self, policy=DenseBlock(), entropy_weight=1e-2, **kwargs):
        Agent.__init__(self, **kwargs)
        self.entropy_weight = entropy_weight

        self.policy = Sequential()
        self.policy.add(
            policy,
            DenseBlock(n_dims=self.action_space.n,
                       n_hidden_layers=1,
                       transform=softmax))
예제 #4
0
파일: rl.py 프로젝트: tsuberim/RL
class WithHabits(AgentWrapper):
    def __init__(self,
                 agent,
                 predictor=DenseBlock(),
                 entropy_weight=1e-2,
                 **kwargs):
        super().__init__(agent, **kwargs)
        self.entropy_weight = entropy_weight

        self.predictor = Sequential()
        self.predictor.add(
            predictor,
            DenseBlock(n_dims=self.action_space.n,
                       n_hidden_layers=1,
                       transform=softmax))

    def reset(self):
        self.cum_rew = 0
        self.habit_steps = 0
        return super().reset()

    def step(self, ob, rew):
        act_dist = self.predictor(ob)
        ent = entropy(act_dist)
        habit_prob = 1 - ent
        if random.uniform(0, 1) < habit_prob:
            self.cum_rew += rew
            self.habit_steps += 1
            return sample_multinomial(act_dist), dict(
                habit=True,
                habit_prob=habit_prob,
                cum_rew=self.cum_rew,
                habit_steps=self.habit_steps)
        else:
            action, stats = super().step(ob, self.cum_rew)
            self.cum_rew = 0
            self.habit_steps = 0
            return action, dict(**stats,
                                habit=False,
                                habit_prob=habit_prob.asscalar(),
                                cum_rew=self.cum_rew,
                                habit_steps=self.habit_steps)

    def loss(self, obs, acts, rews, infos):
        pred_act_dists = self.predictor(obs)
        prediction_loss = -log(pred_act_dists.pick(acts) + epsilon)
        entropy_loss = entropy(pred_act_dists)
        loss, stats = super().loss(obs, acts, rews, infos)
        return loss + prediction_loss - self.entropy_weight * entropy_loss, dict(
            **stats,
            habit_prediction_loss=prediction_loss,
            habit_prediction_entropy=entropy_loss,
            habit_steps=len([info for info in infos if info['habit']]))
예제 #5
0
파일: rl.py 프로젝트: tsuberim/RL
    def __init__(self,
                 agent,
                 predictor=DenseBlock(),
                 entropy_weight=1e-2,
                 **kwargs):
        super().__init__(agent, **kwargs)
        self.entropy_weight = entropy_weight

        self.predictor = Sequential()
        self.predictor.add(
            predictor,
            DenseBlock(n_dims=self.action_space.n,
                       n_hidden_layers=1,
                       transform=softmax))
예제 #6
0
파일: rl.py 프로젝트: tsuberim/RL
    def __init__(self, n_dims=128, **kwargs):
        PersistentBlock.__init__(self, **kwargs)
        if n_dims < 16:
            raise ValueError('`n_dims` must be at least 16 (given: %d)' %
                             n_dims)

        self.encoder = Sequential()
        self.encoder.add(BatchNorm(), Conv2D(int(n_dims / 16), 6, (4, 3)),
                         Activation('relu'), Conv2D(int(n_dims / 8), 3),
                         Activation('relu'), Conv2D(int(n_dims / 2), 3),
                         BatchNorm(), MaxPool2D(), Activation('relu'),
                         Conv2D(int(n_dims),
                                3), MaxPool2D(), Activation('relu'),
                         Conv2D(int(n_dims), 3), MaxPool2D(),
                         Activation('relu'), Flatten())
예제 #7
0
파일: rl.py 프로젝트: tsuberim/RL
 def __init__(self,
              n_dims=16,
              n_hidden_units=16,
              n_hidden_layers=2,
              activation='relu',
              transform=(lambda x: x),
              **kwargs):
     PersistentBlock.__init__(self, **kwargs)
     self.transform = transform
     self.seq = Sequential()
     self.seq.add(
         Flatten(), *[
             Dense(n_hidden_units, activation=activation)
             for _ in range(n_hidden_layers)
         ], Dense(n_dims))
예제 #8
0
def main():
    ctx = mx.cpu()
    batch_size = 1024
    random.seed(47)

    mnist_train = MNIST(train=True)  # 加载训练
    tr_data = mnist_train._data.reshape((-1, 28 * 28))  # 数据
    tr_label = mnist_train._label  # 标签

    mnist_test = MNIST(train=False)  # 加载测试
    te_data = mnist_test._data.reshape((-1, 28 * 28))  # 数据
    te_label = mnist_test._label  # 标签

    def transform(data_, label_):
        return data_.astype(np.float32) / 255., label_.astype(np.float32)

    train_data = DataLoader(
        TripletDataset(rd=tr_data, rl=tr_label, transform=transform),
        batch_size, shuffle=True)

    test_data = DataLoader(
        TripletDataset(rd=te_data, rl=te_label, transform=transform),
        batch_size, shuffle=True)

    base_net = Sequential()
    with base_net.name_scope():
        base_net.add(Dense(256, activation='relu'))
        base_net.add(Dense(128, activation='relu'))

    base_net.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx)

    triplet_loss = gluon.loss.TripletLoss()  # TripletLoss损失函数
    trainer_triplet = gluon.Trainer(base_net.collect_params(), 'sgd', {'learning_rate': 0.05})

    for epoch in range(10):
        curr_loss = 0.0
        for i, (data, _) in enumerate(train_data):
            data = data.as_in_context(ctx)
            anc_ins, pos_ins, neg_ins = data[:, 0], data[:, 1], data[:, 2]
            with autograd.record():
                inter1 = base_net(anc_ins)
                inter2 = base_net(pos_ins)
                inter3 = base_net(neg_ins)
                loss = triplet_loss(inter1, inter2, inter3)  # Triplet Loss
            loss.backward()
            trainer_triplet.step(batch_size)
            curr_loss = mx.nd.mean(loss).asscalar()
            # print('Epoch: %s, Batch: %s, Triplet Loss: %s' % (epoch, i, curr_loss))
        print('Epoch: %s, Triplet Loss: %s' % (epoch, curr_loss))
        evaluate_net(base_net, test_data, ctx=ctx)

    # 数据可视化
    te_data, te_label = transform(te_data, te_label)
    tb_projector(te_data.asnumpy(), te_label, os.path.join(ROOT_DIR, 'logs', 'origin'))
    te_res = base_net(te_data)
    tb_projector(te_res.asnumpy(), te_label, os.path.join(ROOT_DIR, 'logs', 'triplet'))
예제 #9
0
파일: rl.py 프로젝트: tsuberim/RL
class WithValueEstimator(AgentWrapper):
    def __init__(self, agent, predictor=DenseBlock(), **kwargs):
        super().__init__(agent, **kwargs)

        self.predictor = Sequential()
        self.predictor.add(predictor, DenseBlock(
            n_dims=1,
            n_hidden_layers=1,
        ))

    def loss(self, obs, acts, rews, infos, **kwargs):
        return_pred_loss = (returns(rews, self.discount) -
                            self.predictor(obs))**2
        predicted_rest_return = self.predictor(obs[-1].expand_dims(axis=0))[0]
        new_rews = concat(rews[:-1], rews[-1] + predicted_rest_return, dim=0)
        loss, stats = super().loss(obs, acts, new_rews, infos, **kwargs)
        return loss + return_pred_loss, dict(**stats,
                                             return_pred_loss=return_pred_loss)
예제 #10
0
파일: rl.py 프로젝트: tsuberim/RL
class WithCuriousity(AgentWrapper):
    def __init__(self,
                 agent,
                 encoder=DenseBlock(),
                 forward_model=DenseBlock(),
                 inverse_model=DenseBlock(),
                 action_pred_weight=0.8,
                 curiosity_rews_weight=0.8,
                 **kwargs):
        super().__init__(agent, **kwargs)
        self.action_pred_weight = float(action_pred_weight)
        self.curiosity_rews_weight = float(curiosity_rews_weight)

        self.encoder = encoder
        self.forward_model = forward_model
        self.inverse_model = Sequential()
        self.inverse_model.add(
            inverse_model,
            DenseBlock(n_dims=self.action_space.n,
                       n_hidden_layers=1,
                       transform=softmax))
        self.softmax_loss = loss.SoftmaxCrossEntropyLoss()

    def loss(self, obs, acts, rews, infos, **kwargs):
        encs = self.encoder(obs)
        one_hot_acts = one_hot(acts, self.action_space.n)
        enc_preds = self.forward_model(concat(encs[:-1], one_hot_acts[:-1]))
        action_preds = self.inverse_model(concat(encs[:-1], encs[1:]))
        action_pred_loss = self.softmax_loss(action_preds, acts[:-1])
        surprise = cosine_distance(enc_preds, encs[1:])
        extra_rews = concat(surprise, array([0]), dim=0)
        total_rews = \
            (1 - self.curiosity_rews_weight)*rews + \
            self.curiosity_rews_weight*extra_rews
        loss, stats = super().loss(obs, acts, total_rews, infos, **kwargs)
        curiosity_loss = \
            (1-self.action_pred_weight)*surprise \
            + self.action_pred_weight*action_pred_loss
        return loss + curiosity_loss.sum(), dict(
            **stats,
            surprise=surprise,
            action_prediction_loss=action_pred_loss)
예제 #11
0
파일: rl.py 프로젝트: tsuberim/RL
    def __init__(self,
                 agent,
                 encoder=DenseBlock(),
                 forward_model=DenseBlock(),
                 inverse_model=DenseBlock(),
                 action_pred_weight=0.8,
                 curiosity_rews_weight=0.8,
                 **kwargs):
        super().__init__(agent, **kwargs)
        self.action_pred_weight = float(action_pred_weight)
        self.curiosity_rews_weight = float(curiosity_rews_weight)

        self.encoder = encoder
        self.forward_model = forward_model
        self.inverse_model = Sequential()
        self.inverse_model.add(
            inverse_model,
            DenseBlock(n_dims=self.action_space.n,
                       n_hidden_layers=1,
                       transform=softmax))
        self.softmax_loss = loss.SoftmaxCrossEntropyLoss()
예제 #12
0
def vgg_block(num_convs, num_channels):
    blk = Sequential()
    for _ in range(num_convs):
        blk.add(Conv2D(num_channels, kernel_size=3,
                       padding=1, activation='relu'))
    blk.add(MaxPool2D(pool_size=2, strides=2))
    return blk
예제 #13
0
파일: rl.py 프로젝트: tsuberim/RL
class AtariImageEncoder(PersistentBlock):
    def __init__(self, n_dims=128, **kwargs):
        PersistentBlock.__init__(self, **kwargs)
        if n_dims < 16:
            raise ValueError('`n_dims` must be at least 16 (given: %d)' %
                             n_dims)

        self.encoder = Sequential()
        self.encoder.add(BatchNorm(), Conv2D(int(n_dims / 16), 6, (4, 3)),
                         Activation('relu'), Conv2D(int(n_dims / 8), 3),
                         Activation('relu'), Conv2D(int(n_dims / 2), 3),
                         BatchNorm(), MaxPool2D(), Activation('relu'),
                         Conv2D(int(n_dims),
                                3), MaxPool2D(), Activation('relu'),
                         Conv2D(int(n_dims), 3), MaxPool2D(),
                         Activation('relu'), Flatten())

    def forward(self, img):
        transposed = img.transpose((0, 3, 1, 2))
        downscaled = transposed[:, :, ::2, ::2]
        normalized = (downscaled - 128) / 255
        enc = self.encoder(normalized)
        return enc
예제 #14
0
import d2lzh as d2l
from mxnet import gluon, init, nd
from mxnet.gluon.nn import Sequential, Conv2D, Dense, MaxPool2D, Dropout

net = Sequential()
net.add(Conv2D(channels=6, kernel_size=5, activation='sigmoid'),
        MaxPool2D(pool_size=2, strides=2),
        Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
        MaxPool2D(pool_size=2, strides=2), Dense(120, activation='sigmoid'),
        Dropout(0.05), Dense(84, activation='sigmoid'), Dropout(0.05),
        Dense(10))

batch_size = 256
train_iter, test_iter = d2l.load_data_mnist(batch_size=batch_size)

lr, num_epochs = 0.9, 20
ctx = d2l.try_gpu()

net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())

X = nd.random.uniform(shape=(1, 1, 28, 28))

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
예제 #15
0
def train(hyperparameters, channel_input_dirs, num_gpus, hosts):
    batch_size = hyperparameters.get("batch_size", 64)
    epochs = hyperparameters.get("epochs", 3)

    mx.random.seed(42)

    training_dir = channel_input_dirs['training']

    with open("{}/train/data.p".format(training_dir), "rb") as pickle:
        train_nd = load(pickle)
    with open("{}/validation/data.p".format(training_dir), "rb") as pickle:
        validation_nd = load(pickle)

    train_data = gluon.data.DataLoader(train_nd, batch_size, shuffle=True)
    validation_data = gluon.data.DataLoader(validation_nd,
                                            batch_size,
                                            shuffle=True)

    net = Sequential()
    # http: // gluon.mxnet.io / chapter03_deep - neural - networks / plumbing.html  # What's-the-deal-with-name_scope()?
    with net.name_scope():
        net.add(
            Conv2D(channels=32,
                   kernel_size=(3, 3),
                   padding=0,
                   activation="relu"))
        net.add(
            Conv2D(channels=32,
                   kernel_size=(3, 3),
                   padding=0,
                   activation="relu"))
        net.add(MaxPool2D(pool_size=(2, 2)))
        net.add(Dropout(.25))
        net.add(Flatten())
        net.add(Dense(8))

    ctx = mx.gpu() if num_gpus > 0 else mx.cpu()

    # Also known as Glorot
    net.collect_params().initialize(Xavier(magnitude=2.24), ctx=ctx)

    loss = SoftmaxCrossEntropyLoss()

    # kvstore type for multi - gpu and distributed training.
    if len(hosts) == 1:
        kvstore = "device" if num_gpus > 0 else "local"
    else:
        kvstore = "dist_device_sync'" if num_gpus > 0 else "dist_sync"

    trainer = Trainer(net.collect_params(), optimizer="adam", kvstore=kvstore)

    smoothing_constant = .01

    for e in range(epochs):
        moving_loss = 0
        for i, (data, label) in enumerate(train_data):
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data)
                loss_result = loss(output, label)
            loss_result.backward()
            trainer.step(batch_size)

            curr_loss = nd.mean(loss_result).asscalar()
            moving_loss = (curr_loss if ((i == 0) and (e == 0)) else
                           (1 - smoothing_constant) * moving_loss +
                           smoothing_constant * curr_loss)

        validation_accuracy = measure_performance(net, ctx, validation_data)
        train_accuracy = measure_performance(net, ctx, train_data)
        print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
              (e, moving_loss, train_accuracy, validation_accuracy))

    return net
예제 #16
0
    def create_model(self) -> Sequential:
        embedding_size = 100
        model = Sequential()
        with model.name_scope():
            # input shape is (batch_size,), output shape is (batch_size, embedding_size)
            model.add(
                Embedding(input_dim=self.vocab_size,
                          output_dim=embedding_size))
            model.add(Dropout(0.2))
            # layout : str, default 'TNC'
            # The format of input and output tensors.
            # T, N and C stand for sequence length, batch size, and feature dimensions respectively.
            # Change it to NTC so that the input shape can be (batch_size, sequence_length, embedding_size)
            model.add(LSTM(hidden_size=64, layout='NTC', bidirectional=True))
            model.add(Dense(len(self.labels)))

        return model