def create_convnet_cifar10_model(num_classes): with default_options(activation=relu, pad=True): return Sequential([ For(range(2), lambda: [ Convolution2D((3, 3), 64), Convolution2D((3, 3), 64), MaxPooling((3, 3), strides=2) ]), For(range(2), lambda i: [ Dense([256, 128][i]), Dropout(0.5) ]), Dense(num_classes, activation=None) ])
def create_vgg16(feature_var, num_classes, dropout=0.9): with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature # extraction (usually before ReLU) For( range(2), lambda i: [ Convolution2D((3, 3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool1'), For( range(2), lambda i: [ Convolution2D((3, 3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool2'), For( range(3), lambda i: [ Convolution2D((3, 3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool3'), For( range(3), lambda i: [ Convolution2D((3, 3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool4'), For( range(3), lambda i: [ Convolution2D((3, 3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(dropout, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(dropout, name='drop7'), Dense(num_classes, name='fc8') ])(feature_var) return z
def wgan_critic(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False): h = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(h), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 64, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2) h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3), 1024, strides=2)(h)), alpha=0.2) h = Convolution2D((4, 4), 1, pad=False, strides=1, bias=True)(h) return h
def pix2pix_generator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): h_enc1 = C.leaky_relu(Convolution2D((4, 4), 64, strides=2, bias=True)(h), alpha=0.2) h_enc2 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 128, strides=2)(h_enc1)), alpha=0.2) h_enc3 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 256, strides=2)(h_enc2)), alpha=0.2) h_enc4 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc3)), alpha=0.2) h_enc5 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc4)), alpha=0.2) h_enc6 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc5)), alpha=0.2) h_enc7 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc6)), alpha=0.2) h_enc8 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc7)), alpha=0.2) h_dec8 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_enc8))) h_dec8 = C.splice(h_dec8, h_enc8, axis=0) h_dec8 = C.relu(h_dec8) h_dec7 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec8))) h_dec7 = C.splice(h_dec7, h_enc7, axis=0) h_dec7 = C.relu(h_dec7) h_dec6 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec7))) h_dec6 = C.splice(h_dec6, h_enc6, axis=0) h_dec6 = C.relu(h_dec6) h_dec5 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 32, img_width // 32))(h_dec6))) h_dec5 = C.splice(h_dec5, h_enc5, axis=0) h_dec5 = C.relu(h_dec5) h_dec4 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 16, img_width // 16))(h_dec5))) h_dec4 = C.splice(h_dec4, h_enc4, axis=0) h_dec4 = C.relu(h_dec4) h_dec3 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 256, strides=2, pad=True, output_shape=(img_height // 8, img_width // 8))(h_dec4))) h_dec3 = C.splice(h_dec3, h_enc3, axis=0) h_dec3 = C.relu(h_dec3) h_dec2 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 128, strides=2, pad=True, output_shape=(img_height // 4, img_width // 4))(h_dec3))) h_dec2 = C.splice(h_dec2, h_enc2, axis=0) h_dec2 = C.relu(h_dec2) h_dec1 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 64, strides=2, pad=True, output_shape=(img_height // 2, img_width // 2))(h_dec2))) h_dec1 = C.splice(h_dec1, h_enc1, axis=0) h_dec1 = C.relu(h_dec1) h = ConvolutionTranspose2D((4, 4), 3, activation=C.tanh, strides=2, pad=True, bias=True, output_shape=(img_height, img_width))(h_dec1) return h
def create_symbol(): # Weight initialiser from uniform distribution # Activation (unless states) is None with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.relu): x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(features) x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(x) x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x) x = Dropout(0.25)(x) x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x) x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x) x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x) x = Dropout(0.25)(x) x = Dense(512)(x) x = Dropout(0.5)(x) x = Dense(N_CLASSES, activation=None)(x) return x
def pix2pix_discriminator(y, x): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): x = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(x), alpha=0.2) y = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(y), alpha=0.2) h = C.splice(x, y, axis=0) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2) h = Convolution2D((1, 1), 1, activation=None, bias=True)(h) return h
def test_layers_convolution_2d(): inC, inH, inW = 1, 3, 3 y = input((inC,inH, inW)) dat = np.ones([1, inC, inH, inW], dtype = np.float32) model = Convolution2D((3, 3), num_filters=1, activation=None, pad=False, strides=1, name='foo') # shape should be model_shape = model(y).foo.shape np.testing.assert_array_equal(model_shape, (1, 1, 1), \ "Error in convolution2D with stride = 1 and padding") res = model(y).eval({y: dat}) expected_res = np.sum(model.foo.W.value) np.testing.assert_array_almost_equal(res[0][0][0][0], expected_res, decimal=5, \ err_msg="Error in convolution2D computation with stride = 1 and zeropad = True")
def dcgan_discriminator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): h = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(h), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 64, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2) h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 1024, strides=2)(h)), alpha=0.2) h = Convolution2D((4, 4), 1, activation=C.sigmoid, pad=False, bias=True, strides=1)(h) return h
def create_alexnet(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input # remove mean value mean_removed_features = minus(feature_var, constant(114), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) Convolution2D((11, 11), 96, init=normal(0.01), pad=False, strides=(4, 4), name='conv1'), Activation(activation=relu, name='relu1'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'), MaxPooling((3, 3), (2, 2), name='pool1'), Convolution2D((5, 5), 192, init=normal(0.01), init_bias=0.1, name='conv2'), Activation(activation=relu, name='relu2'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'), MaxPooling((3, 3), (2, 2), name='pool2'), Convolution2D((3, 3), 384, init=normal(0.01), name='conv3'), Activation(activation=relu, name='relu3'), Convolution2D((3, 3), 384, init=normal(0.01), init_bias=0.1, name='conv4'), Activation(activation=relu, name='relu4'), Convolution2D((3, 3), 256, init=normal(0.01), init_bias=0.1, name='conv5'), Activation(activation=relu, name='relu5'), MaxPooling((3, 3), (2, 2), name='pool5'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, init=normal(0.01), name='fc8') ])(mean_removed_features) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'pe5': pe5, 'output': z }
def __init__(self, input_shape, nb_actions, gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000), learning_rate=0.00025, momentum=0.95, minibatch_size=32, memory_size=500000, train_after=200000, train_interval=4, target_update_interval=10000, monitor=True): self.input_shape = input_shape self.nb_actions = nb_actions self.gamma = gamma self._train_after = train_after self._train_interval = train_interval self._target_update_interval = target_update_interval self._explorer = explorer self._minibatch_size = minibatch_size self._history = History(input_shape) self._memory = ReplayMemory(memory_size, input_shape[1:], 4) self._num_actions_taken = 0 # Metrics accumulator self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], [] # Action Value model (used by agent to interact with the environment) with default_options(activation=relu, init=he_uniform()): self._action_value_net = Sequential([ Convolution2D((8, 8), 16, strides=4), Convolution2D((4, 4), 32, strides=2), Convolution2D((3, 3), 32, strides=1), Dense(256, init=he_uniform(scale=0.01)), Dense(nb_actions, activation=None, init=he_uniform(scale=0.01)) ]) self._action_value_net.update_signature(Tensor[input_shape]) # Target model used to compute the target Q-values in training, updated # less frequently for increased stability. self._target_net = self._action_value_net.clone(CloneMethod.freeze) # Function computing Q-values targets as part of the computation graph @Function @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def compute_q_targets(post_states, rewards, terminals): return element_select( terminals, rewards, gamma * reduce_max(self._target_net(post_states), axis=0) + rewards, ) # Define the loss, using Huber Loss (more robust to outliers) @Function @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions], post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def criterion(pre_states, actions, post_states, rewards, terminals): # Compute the q_targets q_targets = compute_q_targets(post_states, rewards, terminals) # actions is a 1-hot encoding of the action done by the agent q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0) # Define training criterion as the Huber Loss function return huber_loss(q_targets, q_acted, 1.0) # Adam based SGD lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) m_schedule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) l_sgd = adam(self._action_value_net.parameters, lr_schedule, momentum=m_schedule, variance_momentum=vm_schedule) self._metrics_writer = TensorBoardProgressWriter( freq=1, log_dir='metrics', model=criterion) if monitor else None self._learner = l_sgd self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
def create_vgg19(): # Input variables denoting the features and label data feature_var = input((num_channels, image_height, image_width)) label_var = input((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) For( range(2), lambda i: [ Convolution2D((3, 3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool1'), For( range(2), lambda i: [ Convolution2D((3, 3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool2'), For( range(4), lambda i: [ Convolution2D((3, 3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool3'), For( range(4), lambda i: [ Convolution2D((3, 3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool4'), For( range(4), lambda i: [ Convolution2D((3, 3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, name='fc8') ])(input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'pe5': pe5, 'output': z }
def build_model(self): cntk.debugging.set_checked_mode(True) # Defining the input variables for training and evaluation. self.stacked_frames = cntk.input_variable( (1, self.STATE_WIDTH, self.STATE_HEIGHT), dtype=np.float32) #self.stacked_frames = cntk.input_variable((1, 84, 84), dtype=np.float32) self.action = cntk.input_variable(self.num_actions) self.R = cntk.input_variable(1, dtype=np.float32) self.v_calc = cntk.input_variable( 1, dtype=np.float32 ) # In the loss of pi, the parameters of V(s) should be fixed. # Creating the value approximator extension. conv1_v = Convolution2D((8, 8), num_filters=16, pad=False, strides=4, activation=cntk.relu, name='conv1_v') conv2_v = Convolution2D((4, 4), num_filters=32, pad=False, strides=2, activation=cntk.relu, name='conv2_v') dense_v = Dense(256, activation=cntk.sigmoid, name='dense_v', init=cntk.xavier()) v = Sequential([ conv1_v, conv2_v, dense_v, Dense(1, activation=cntk.sigmoid, name='outdense_v', init=cntk.xavier()) ]) # Creating the policy approximator extension. conv1_pi = Convolution2D((8, 8), num_filters=16, pad=False, strides=4, activation=cntk.relu, name='conv1_pi') conv2_pi = Convolution2D((4, 4), num_filters=32, pad=False, strides=2, activation=cntk.relu, name='conv2_pi') dense_pi = Dense(256, activation=cntk.sigmoid, name='dense_pi', init=cntk.xavier()) pi = Sequential([ conv1_v, conv2_v, dense_pi, Dense(self.num_actions, activation=cntk.softmax, name='outdense_pi', init=cntk.xavier()) ]) self.pi = pi(self.stacked_frames) self.pms_pi = self.pi.parameters # List of cntk Parameter types (containes the function's parameters) self.v = v(self.stacked_frames) self.pms_v = self.v.parameters cntk.debugging.debug_model(v)
def main(env_name='KungFuMasterNoFrameskip-v0', train_freq=4, target_update_freq=10000, checkpoint_freq=100000, log_freq=1, batch_size=32, train_after=200000, max_timesteps=5000000, buffer_size=50000, vmin=-10, vmax=10, n=51, gamma=0.99, final_eps=0.1, final_eps_update=1000000, learning_rate=0.00025, momentum=0.95): env = gym.make(env_name) env = wrap_env(env) state_dim = (4, 84, 84) action_count = env.action_space.n with C.default_options(activation=C.relu, init=C.he_uniform()): model_func = Sequential([ Convolution2D((8, 8), 32, strides=4, name='conv1'), Convolution2D((4, 4), 64, strides=2, name='conv2'), Convolution2D((3, 3), 64, strides=1, name='conv3'), Dense(512, name='dense1'), Dense((action_count, n), activation=None, name='out') ]) agent = CategoricalAgent(state_dim, action_count, model_func, vmin, vmax, n, gamma, lr=learning_rate, mm=momentum, use_tensorboard=True) logger = agent.writer epsilon_schedule = LinearSchedule(1.0, final_eps, final_eps_update) replay_buffer = ReplayBuffer(buffer_size) try: obs = env.reset() episode = 0 rewards = 0 steps = 0 for t in range(max_timesteps): # Take action if t > train_after: action = agent.act(obs, epsilon=epsilon_schedule.value(t)) else: action = np.random.choice(action_count) obs_, reward, done, _ = env.step(action) # Store transition in replay buffer replay_buffer.add(obs, action, reward, obs_, float(done)) obs = obs_ rewards += reward if t > train_after and (t % train_freq) == 0: # Minimize error in projected Bellman update on a batch sampled from replay buffer experience = replay_buffer.sample(batch_size) agent.train(*experience) # experience is (s, a, r, s_, t) tuple logger.write_value('loss', agent.trainer.previous_minibatch_loss_average, t) if t > train_after and (t % target_update_freq) == 0: agent.update_target() if t > train_after and (t % checkpoint_freq) == 0: agent.checkpoint('checkpoints/model_{}.chkpt'.format(t)) if done: episode += 1 obs = env.reset() if episode % log_freq == 0: steps = t - steps + 1 logger.write_value('rewards', rewards, episode) logger.write_value('steps', steps, episode) logger.write_value('epsilon', epsilon_schedule.value(t), episode) logger.flush() rewards = 0 steps = t finally: agent.save_model('checkpoints/{}.cdqn'.format(env_name))
from autcar import Trainer from cntk.layers import Dense, Sequential, Activation, Convolution2D, MaxPooling, Dropout, BatchNormalization from cntk import softmax, relu input_folder_path = "src/ml/data/autcar_training" output_folder_path = "src/ml/data/autcar_training_balanced" image_width = 224 image_height = 168 trainer = Trainer(deeplearning_framework="cntk", image_height=image_height, image_width=image_width) trainer.create_balanced_dataset(input_folder_path, output_folder_path=output_folder_path) model = Sequential([ Convolution2D(filter_shape=(5,5), num_filters=32, strides=(1,1), pad=True, name="first_conv"), BatchNormalization(map_rank=1), Activation(relu), MaxPooling(filter_shape=(3,3), strides=(2,2), name="first_max"), Convolution2D(filter_shape=(3,3), num_filters=48, strides=(1,1), pad=True, name="second_conv"), BatchNormalization(map_rank=1), Activation(relu), MaxPooling(filter_shape=(3,3), strides=(2,2), name="second_max"), Convolution2D(filter_shape=(3,3), num_filters=64, strides=(1,1), pad=True, name="third_conv"), BatchNormalization(map_rank=1), Activation(relu), MaxPooling(filter_shape=(3,3), strides=(2,2), name="third_max"), Convolution2D(filter_shape=(5,5), num_filters=32, strides=(1,1), pad=True, name="fourth_conv"), BatchNormalization(map_rank=1), Activation(relu), Dense(100, activation=relu), Dropout(0.1), Dense(12, activation=softmax)
def residual_block(h, num_filters): with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False): h1 = C.relu(InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h))) h2 = InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h1)) return h2 + h
def __init__(self, state_dim, action_dim, gamma=0.99, learning_rate=1e-4, momentum=0.95): self.state_dim = state_dim self.action_dim = action_dim self.gamma = gamma with default_options(activation=relu, init=he_uniform()): # Convolution filter counts were halved to save on memory, no gpu :( self.model = Sequential([ Convolution2D((8, 8), 16, strides=4, name='conv1'), Convolution2D((4, 4), 32, strides=2, name='conv2'), Convolution2D((3, 3), 32, strides=1, name='conv3'), Dense(256, init=he_uniform(scale=0.01), name='dense1'), Dense(action_dim, activation=None, init=he_uniform(scale=0.01), name='actions') ]) self.model.update_signature(Tensor[state_dim]) # Create the target model as a copy of the online model self.target_model = None self.update_target() self.pre_states = input_variable(state_dim, name='pre_states') self.actions = input_variable(action_dim, name='actions') self.post_states = input_variable(state_dim, name='post_states') self.rewards = input_variable((), name='rewards') self.terminals = input_variable((), name='terminals') self.is_weights = input_variable((), name='is_weights') predicted_q = reduce_sum(self.model(self.pre_states) * self.actions, axis=0) # DQN - calculate target q values # post_q = reduce_max(self.target_model(self.post_states), axis=0) # DDQN - calculate target q values online_selection = one_hot( argmax(self.model(self.post_states), axis=0), self.action_dim) post_q = reduce_sum(self.target_model(self.post_states) * online_selection, axis=0) post_q = (1.0 - self.terminals) * post_q target_q = stop_gradient(self.rewards + self.gamma * post_q) # Huber loss delta = 1.0 self.td_error = minus(predicted_q, target_q, name='td_error') abs_error = abs(self.td_error) errors = element_select(less(abs_error, delta), square(self.td_error) * 0.5, delta * (abs_error - 0.5 * delta)) loss = errors * self.is_weights # Adam based SGD lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) m_scheule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) self._learner = adam(self.model.parameters, lr_schedule, m_scheule, variance_momentum=vm_schedule) self.writer = TensorBoardProgressWriter(log_dir='metrics', model=self.model) self.trainer = Trainer(self.model, (loss, None), [self._learner], self.writer)