class PolicyGradAgent(Agent): def __init__(self, policy=DenseBlock(), entropy_weight=1e-2, **kwargs): Agent.__init__(self, **kwargs) self.entropy_weight = entropy_weight self.policy = Sequential() self.policy.add( policy, DenseBlock(n_dims=self.action_space.n, n_hidden_layers=1, transform=softmax)) def forward(self, ob): return self.policy(ob) def step(self, ob, rew): act_dist = self(ob) action = sample_multinomial(act_dist) return action, dict(entropy=entropy(act_dist)) def loss(self, obs, acts, rews, infos): rets = normalize(returns(rews, self.discount)) act_dists = self(obs) entropy_loss = entropy(act_dists) policy_grad_loss = -log(act_dists.pick(acts) + epsilon) * rets loss = policy_grad_loss - self.entropy_weight * entropy_loss return loss, dict( entropy=entropy_loss, policy_grad_loss=policy_grad_loss, )
def __init__(self, agent, predictor=DenseBlock(), **kwargs): super().__init__(agent, **kwargs) self.predictor = Sequential() self.predictor.add(predictor, DenseBlock( n_dims=1, n_hidden_layers=1, ))
def __init__(self, policy=DenseBlock(), entropy_weight=1e-2, **kwargs): Agent.__init__(self, **kwargs) self.entropy_weight = entropy_weight self.policy = Sequential() self.policy.add( policy, DenseBlock(n_dims=self.action_space.n, n_hidden_layers=1, transform=softmax))
class WithHabits(AgentWrapper): def __init__(self, agent, predictor=DenseBlock(), entropy_weight=1e-2, **kwargs): super().__init__(agent, **kwargs) self.entropy_weight = entropy_weight self.predictor = Sequential() self.predictor.add( predictor, DenseBlock(n_dims=self.action_space.n, n_hidden_layers=1, transform=softmax)) def reset(self): self.cum_rew = 0 self.habit_steps = 0 return super().reset() def step(self, ob, rew): act_dist = self.predictor(ob) ent = entropy(act_dist) habit_prob = 1 - ent if random.uniform(0, 1) < habit_prob: self.cum_rew += rew self.habit_steps += 1 return sample_multinomial(act_dist), dict( habit=True, habit_prob=habit_prob, cum_rew=self.cum_rew, habit_steps=self.habit_steps) else: action, stats = super().step(ob, self.cum_rew) self.cum_rew = 0 self.habit_steps = 0 return action, dict(**stats, habit=False, habit_prob=habit_prob.asscalar(), cum_rew=self.cum_rew, habit_steps=self.habit_steps) def loss(self, obs, acts, rews, infos): pred_act_dists = self.predictor(obs) prediction_loss = -log(pred_act_dists.pick(acts) + epsilon) entropy_loss = entropy(pred_act_dists) loss, stats = super().loss(obs, acts, rews, infos) return loss + prediction_loss - self.entropy_weight * entropy_loss, dict( **stats, habit_prediction_loss=prediction_loss, habit_prediction_entropy=entropy_loss, habit_steps=len([info for info in infos if info['habit']]))
def __init__(self, agent, predictor=DenseBlock(), entropy_weight=1e-2, **kwargs): super().__init__(agent, **kwargs) self.entropy_weight = entropy_weight self.predictor = Sequential() self.predictor.add( predictor, DenseBlock(n_dims=self.action_space.n, n_hidden_layers=1, transform=softmax))
def __init__(self, n_dims=128, **kwargs): PersistentBlock.__init__(self, **kwargs) if n_dims < 16: raise ValueError('`n_dims` must be at least 16 (given: %d)' % n_dims) self.encoder = Sequential() self.encoder.add(BatchNorm(), Conv2D(int(n_dims / 16), 6, (4, 3)), Activation('relu'), Conv2D(int(n_dims / 8), 3), Activation('relu'), Conv2D(int(n_dims / 2), 3), BatchNorm(), MaxPool2D(), Activation('relu'), Conv2D(int(n_dims), 3), MaxPool2D(), Activation('relu'), Conv2D(int(n_dims), 3), MaxPool2D(), Activation('relu'), Flatten())
def __init__(self, n_dims=16, n_hidden_units=16, n_hidden_layers=2, activation='relu', transform=(lambda x: x), **kwargs): PersistentBlock.__init__(self, **kwargs) self.transform = transform self.seq = Sequential() self.seq.add( Flatten(), *[ Dense(n_hidden_units, activation=activation) for _ in range(n_hidden_layers) ], Dense(n_dims))
def main(): ctx = mx.cpu() batch_size = 1024 random.seed(47) mnist_train = MNIST(train=True) # 加载训练 tr_data = mnist_train._data.reshape((-1, 28 * 28)) # 数据 tr_label = mnist_train._label # 标签 mnist_test = MNIST(train=False) # 加载测试 te_data = mnist_test._data.reshape((-1, 28 * 28)) # 数据 te_label = mnist_test._label # 标签 def transform(data_, label_): return data_.astype(np.float32) / 255., label_.astype(np.float32) train_data = DataLoader( TripletDataset(rd=tr_data, rl=tr_label, transform=transform), batch_size, shuffle=True) test_data = DataLoader( TripletDataset(rd=te_data, rl=te_label, transform=transform), batch_size, shuffle=True) base_net = Sequential() with base_net.name_scope(): base_net.add(Dense(256, activation='relu')) base_net.add(Dense(128, activation='relu')) base_net.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx) triplet_loss = gluon.loss.TripletLoss() # TripletLoss损失函数 trainer_triplet = gluon.Trainer(base_net.collect_params(), 'sgd', {'learning_rate': 0.05}) for epoch in range(10): curr_loss = 0.0 for i, (data, _) in enumerate(train_data): data = data.as_in_context(ctx) anc_ins, pos_ins, neg_ins = data[:, 0], data[:, 1], data[:, 2] with autograd.record(): inter1 = base_net(anc_ins) inter2 = base_net(pos_ins) inter3 = base_net(neg_ins) loss = triplet_loss(inter1, inter2, inter3) # Triplet Loss loss.backward() trainer_triplet.step(batch_size) curr_loss = mx.nd.mean(loss).asscalar() # print('Epoch: %s, Batch: %s, Triplet Loss: %s' % (epoch, i, curr_loss)) print('Epoch: %s, Triplet Loss: %s' % (epoch, curr_loss)) evaluate_net(base_net, test_data, ctx=ctx) # 数据可视化 te_data, te_label = transform(te_data, te_label) tb_projector(te_data.asnumpy(), te_label, os.path.join(ROOT_DIR, 'logs', 'origin')) te_res = base_net(te_data) tb_projector(te_res.asnumpy(), te_label, os.path.join(ROOT_DIR, 'logs', 'triplet'))
class WithValueEstimator(AgentWrapper): def __init__(self, agent, predictor=DenseBlock(), **kwargs): super().__init__(agent, **kwargs) self.predictor = Sequential() self.predictor.add(predictor, DenseBlock( n_dims=1, n_hidden_layers=1, )) def loss(self, obs, acts, rews, infos, **kwargs): return_pred_loss = (returns(rews, self.discount) - self.predictor(obs))**2 predicted_rest_return = self.predictor(obs[-1].expand_dims(axis=0))[0] new_rews = concat(rews[:-1], rews[-1] + predicted_rest_return, dim=0) loss, stats = super().loss(obs, acts, new_rews, infos, **kwargs) return loss + return_pred_loss, dict(**stats, return_pred_loss=return_pred_loss)
class WithCuriousity(AgentWrapper): def __init__(self, agent, encoder=DenseBlock(), forward_model=DenseBlock(), inverse_model=DenseBlock(), action_pred_weight=0.8, curiosity_rews_weight=0.8, **kwargs): super().__init__(agent, **kwargs) self.action_pred_weight = float(action_pred_weight) self.curiosity_rews_weight = float(curiosity_rews_weight) self.encoder = encoder self.forward_model = forward_model self.inverse_model = Sequential() self.inverse_model.add( inverse_model, DenseBlock(n_dims=self.action_space.n, n_hidden_layers=1, transform=softmax)) self.softmax_loss = loss.SoftmaxCrossEntropyLoss() def loss(self, obs, acts, rews, infos, **kwargs): encs = self.encoder(obs) one_hot_acts = one_hot(acts, self.action_space.n) enc_preds = self.forward_model(concat(encs[:-1], one_hot_acts[:-1])) action_preds = self.inverse_model(concat(encs[:-1], encs[1:])) action_pred_loss = self.softmax_loss(action_preds, acts[:-1]) surprise = cosine_distance(enc_preds, encs[1:]) extra_rews = concat(surprise, array([0]), dim=0) total_rews = \ (1 - self.curiosity_rews_weight)*rews + \ self.curiosity_rews_weight*extra_rews loss, stats = super().loss(obs, acts, total_rews, infos, **kwargs) curiosity_loss = \ (1-self.action_pred_weight)*surprise \ + self.action_pred_weight*action_pred_loss return loss + curiosity_loss.sum(), dict( **stats, surprise=surprise, action_prediction_loss=action_pred_loss)
def __init__(self, agent, encoder=DenseBlock(), forward_model=DenseBlock(), inverse_model=DenseBlock(), action_pred_weight=0.8, curiosity_rews_weight=0.8, **kwargs): super().__init__(agent, **kwargs) self.action_pred_weight = float(action_pred_weight) self.curiosity_rews_weight = float(curiosity_rews_weight) self.encoder = encoder self.forward_model = forward_model self.inverse_model = Sequential() self.inverse_model.add( inverse_model, DenseBlock(n_dims=self.action_space.n, n_hidden_layers=1, transform=softmax)) self.softmax_loss = loss.SoftmaxCrossEntropyLoss()
def vgg_block(num_convs, num_channels): blk = Sequential() for _ in range(num_convs): blk.add(Conv2D(num_channels, kernel_size=3, padding=1, activation='relu')) blk.add(MaxPool2D(pool_size=2, strides=2)) return blk
class AtariImageEncoder(PersistentBlock): def __init__(self, n_dims=128, **kwargs): PersistentBlock.__init__(self, **kwargs) if n_dims < 16: raise ValueError('`n_dims` must be at least 16 (given: %d)' % n_dims) self.encoder = Sequential() self.encoder.add(BatchNorm(), Conv2D(int(n_dims / 16), 6, (4, 3)), Activation('relu'), Conv2D(int(n_dims / 8), 3), Activation('relu'), Conv2D(int(n_dims / 2), 3), BatchNorm(), MaxPool2D(), Activation('relu'), Conv2D(int(n_dims), 3), MaxPool2D(), Activation('relu'), Conv2D(int(n_dims), 3), MaxPool2D(), Activation('relu'), Flatten()) def forward(self, img): transposed = img.transpose((0, 3, 1, 2)) downscaled = transposed[:, :, ::2, ::2] normalized = (downscaled - 128) / 255 enc = self.encoder(normalized) return enc
import d2lzh as d2l from mxnet import gluon, init, nd from mxnet.gluon.nn import Sequential, Conv2D, Dense, MaxPool2D, Dropout net = Sequential() net.add(Conv2D(channels=6, kernel_size=5, activation='sigmoid'), MaxPool2D(pool_size=2, strides=2), Conv2D(channels=16, kernel_size=5, activation='sigmoid'), MaxPool2D(pool_size=2, strides=2), Dense(120, activation='sigmoid'), Dropout(0.05), Dense(84, activation='sigmoid'), Dropout(0.05), Dense(10)) batch_size = 256 train_iter, test_iter = d2l.load_data_mnist(batch_size=batch_size) lr, num_epochs = 0.9, 20 ctx = d2l.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) X = nd.random.uniform(shape=(1, 1, 28, 28)) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
def train(hyperparameters, channel_input_dirs, num_gpus, hosts): batch_size = hyperparameters.get("batch_size", 64) epochs = hyperparameters.get("epochs", 3) mx.random.seed(42) training_dir = channel_input_dirs['training'] with open("{}/train/data.p".format(training_dir), "rb") as pickle: train_nd = load(pickle) with open("{}/validation/data.p".format(training_dir), "rb") as pickle: validation_nd = load(pickle) train_data = gluon.data.DataLoader(train_nd, batch_size, shuffle=True) validation_data = gluon.data.DataLoader(validation_nd, batch_size, shuffle=True) net = Sequential() # http: // gluon.mxnet.io / chapter03_deep - neural - networks / plumbing.html # What's-the-deal-with-name_scope()? with net.name_scope(): net.add( Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) net.add( Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) net.add(MaxPool2D(pool_size=(2, 2))) net.add(Dropout(.25)) net.add(Flatten()) net.add(Dense(8)) ctx = mx.gpu() if num_gpus > 0 else mx.cpu() # Also known as Glorot net.collect_params().initialize(Xavier(magnitude=2.24), ctx=ctx) loss = SoftmaxCrossEntropyLoss() # kvstore type for multi - gpu and distributed training. if len(hosts) == 1: kvstore = "device" if num_gpus > 0 else "local" else: kvstore = "dist_device_sync'" if num_gpus > 0 else "dist_sync" trainer = Trainer(net.collect_params(), optimizer="adam", kvstore=kvstore) smoothing_constant = .01 for e in range(epochs): moving_loss = 0 for i, (data, label) in enumerate(train_data): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss_result = loss(output, label) loss_result.backward() trainer.step(batch_size) curr_loss = nd.mean(loss_result).asscalar() moving_loss = (curr_loss if ((i == 0) and (e == 0)) else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) validation_accuracy = measure_performance(net, ctx, validation_data) train_accuracy = measure_performance(net, ctx, train_data) print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, validation_accuracy)) return net
def create_model(self) -> Sequential: embedding_size = 100 model = Sequential() with model.name_scope(): # input shape is (batch_size,), output shape is (batch_size, embedding_size) model.add( Embedding(input_dim=self.vocab_size, output_dim=embedding_size)) model.add(Dropout(0.2)) # layout : str, default 'TNC' # The format of input and output tensors. # T, N and C stand for sequence length, batch size, and feature dimensions respectively. # Change it to NTC so that the input shape can be (batch_size, sequence_length, embedding_size) model.add(LSTM(hidden_size=64, layout='NTC', bidirectional=True)) model.add(Dense(len(self.labels))) return model