def criterion(self): # hyperparameters lambda_val = 0.5 # Margin loss left = ct.square(ct.relu(0.9 - self.length)) right = ct.square(ct.relu(self.length - 0.1)) left = ct.reshape(left, (-1)) right = ct.reshape(right, (-1)) lc = self.labels * left + lambda_val * (1 - self.labels) * right margin_loss = ct.reduce_sum(lc, axis=0) margin_loss = ct.reduce_mean(margin_loss, axis=ct.axis.Axis.default_batch_axis()) # classification_error predict = ct.softmax(self.length, axis=0) error = ct.classification_error(ct.reshape(predict, (10)), self.labels) total_loss = margin_loss reconstruction_err = 0 if self.use_reconstruction: features = ct.reshape(self.features, shape=(-1,)) encoder = ct.reshape(self.training_model, shape=(-1,)) squared = ct.square(encoder - features) reconstruction_err = ct.reduce_mean(squared, axis=0) reconstruction_err = ct.reduce_mean(reconstruction_err, axis=ct.axis.Axis.default_batch_axis()) total_loss = margin_loss + (0.0005*784) * reconstruction_err return total_loss, error
def test_relu(): assert_cntk_ngraph_array_equal(C.relu([-2, -1., 0., 1., 2.])) assert_cntk_ngraph_array_equal(C.relu([0.])) assert_cntk_ngraph_array_equal( C.relu([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1])) assert_cntk_ngraph_array_equal(C.relu([[1, 2, 3], [4, 5, 6]])) assert_cntk_ngraph_array_equal(C.relu([[-3, -2, -1], [1, 2, 3]]))
def cyclegan_generator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False): h = C.relu(InstanceNormalization((64, 1, 1))(Convolution2D((7, 7), 64)(h))) h = C.relu(InstanceNormalization((128, 1, 1))(Convolution2D((3, 3), 128, strides=2)(h))) h = C.relu(InstanceNormalization((256, 1, 1))(Convolution2D((3, 3), 256, strides=2)(h))) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = residual_block(h, 256) h = C.relu(InstanceNormalization((128, 1, 1)))( ConvolutionTranspose2D((3, 3), 128, strides=2, output_shape=(img_height // 2, img_width // 2))(h))) h = C.relu(InstanceNormalization((64, 1, 1)))( ConvolutionTranspose2D((3, 3), 64, strides=2, output_shape=(img_height, img_width))(h))) h = Convolution2D((7, 7), 3, activation=C.tanh, bias=True)(h) return h
def test_conv_with_freedim_model(tmpdir): img_shape = (3, 32, 32) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=np.float32) x = C.input_variable((3, C.FreeDimension, C.FreeDimension)) conv_size1 = (32, 3, 5, 5) conv_map1 = C.constant(value=np.arange(np.prod(conv_size1), dtype=np.float32).reshape(conv_size1)) conv_op1 = C.convolution(conv_map1, x, auto_padding=(False, True, True)) relu_op1 = C.relu(conv_op1) maxpool_op1 = C.pooling(relu_op1, C.MAX_POOLING, (2, 2), (2, 2)) conv_size2 = (64, 32, 3, 3) conv_map2 = C.constant(value=np.arange(np.prod(conv_size2), dtype=np.float32).reshape(conv_size2)) conv_op2 = C.convolution(conv_map2, maxpool_op1, auto_padding=(False, True, True)) relu_op2 = C.relu(conv_op2) root_node = C.pooling(relu_op2, C.MAX_POOLING, (2, 2), (2, 2)) filename = os.path.join(str(tmpdir), R'conv_with_freedim.onnx') root_node.save(filename, format=C.ModelFormat.ONNX) loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX) assert root_node.shape == loaded_node.shape x_ = loaded_node.arguments[0] assert np.allclose(loaded_node.eval({x_:img}), root_node.eval({x:img})) # Additional test to ensure that loaded_node can be saved as both ONNX and CNTKv2 again. filename2 = os.path.join(str(tmpdir), R'conv_with_freedim2.onnx') loaded_node.save(filename2, format=C.ModelFormat.ONNX) filename3 = os.path.join(str(tmpdir), R'conv_with_freedim2.cntkmodel') loaded_node.save(filename3, format=C.ModelFormat.CNTKv2)
def cgan_generator(z, y): with C.layers.default_options(init=C.normal(scale=0.02), bias=False, map_rank=1, use_cntk_engine=True): h = C.splice(z, y, axis=0) h = C.relu(BatchNormalization()(Dense(1024)(h))) h = C.relu(BatchNormalization()(Dense((128, 7, 7))(h))) h = C.relu(BatchNormalization()(ConvolutionTranspose2D( (5, 5), 128, strides=(2, 2), pad=True, output_shape=(14, 14))(h))) h = ConvolutionTranspose2D((5, 5), 1, activation=C.sigmoid, strides=(2, 2), pad=True, output_shape=(28, 28))(h) return C.reshape(h, input_dim)
def conv_bn_relu(input, filter_size, num_filters, strides=(1, 1), init=C.he_normal()): r = conv_bn(input, filter_size, num_filters, strides, init, 1) return C.relu(r)
def test_relu_5(): cntk_op = C.relu([[-3, -2, -1], [1, 2, 3]]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.array_equal(cntk_ret, ng_ret)
def vggblock(x, arrays, layer_map, name): f = arrays[0] b = arrays[1] k = C.constant(value=f) t = C.constant(value=np.reshape(b, (-1, 1, 1))) y = C.relu(C.convolution(k, x, auto_padding=[False, True, True]) + t) layer_map[name] = y return y
def resnet_basic_inc(input, num_filters): c1 = convolution_bn(input, (3,3), num_filters, strides=(2,2)) c2 = convolution_bn(c1, (3,3), num_filters, activation=None) s = convolution_bn(input, (1,1), num_filters, strides=(2,2), activation=None) p = c2 + s return C.relu(p)
def resnet_basic_inc(input, num_filters): c1 = convolution_bn(input, (3,3), num_filters, strides=(2,2)) c2 = convolution_bn(c1, (3,3), num_filters, activation=None) s = convolution_bn(input, (1,1), num_filters, strides=(2,2), activation=None) p = c2 + s return C.relu(p)
def test_relu_1(): cntk_op = C.relu([-2, -1., 0., 1., 2.]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.array_equal(cntk_ret, ng_ret)
def pix2pix_generator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): h_enc1 = C.leaky_relu(Convolution2D((4, 4), 64, strides=2, bias=True)(h), alpha=0.2) h_enc2 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 128, strides=2)(h_enc1)), alpha=0.2) h_enc3 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 256, strides=2)(h_enc2)), alpha=0.2) h_enc4 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc3)), alpha=0.2) h_enc5 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc4)), alpha=0.2) h_enc6 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc5)), alpha=0.2) h_enc7 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc6)), alpha=0.2) h_enc8 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc7)), alpha=0.2) h_dec8 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_enc8))) h_dec8 = C.splice(h_dec8, h_enc8, axis=0) h_dec8 = C.relu(h_dec8) h_dec7 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec8))) h_dec7 = C.splice(h_dec7, h_enc7, axis=0) h_dec7 = C.relu(h_dec7) h_dec6 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec7))) h_dec6 = C.splice(h_dec6, h_enc6, axis=0) h_dec6 = C.relu(h_dec6) h_dec5 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 32, img_width // 32))(h_dec6))) h_dec5 = C.splice(h_dec5, h_enc5, axis=0) h_dec5 = C.relu(h_dec5) h_dec4 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 16, img_width // 16))(h_dec5))) h_dec4 = C.splice(h_dec4, h_enc4, axis=0) h_dec4 = C.relu(h_dec4) h_dec3 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 256, strides=2, pad=True, output_shape=(img_height // 8, img_width // 8))(h_dec4))) h_dec3 = C.splice(h_dec3, h_enc3, axis=0) h_dec3 = C.relu(h_dec3) h_dec2 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 128, strides=2, pad=True, output_shape=(img_height // 4, img_width // 4))(h_dec3))) h_dec2 = C.splice(h_dec2, h_enc2, axis=0) h_dec2 = C.relu(h_dec2) h_dec1 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D( (4, 4), 64, strides=2, pad=True, output_shape=(img_height // 2, img_width // 2))(h_dec2))) h_dec1 = C.splice(h_dec1, h_enc1, axis=0) h_dec1 = C.relu(h_dec1) h = ConvolutionTranspose2D((4, 4), 3, activation=C.tanh, strides=2, pad=True, bias=True, output_shape=(img_height, img_width))(h_dec1) return h
def test_conv_with_freedim_model(tmpdir, dtype, device_id): pytest.skip('Needs to be fixed after removal of batch axis change.') if device_id == -1 and dtype == np.float16: pytest.skip('Test only runs on GPU') device = cntk_device(device_id) with C.default_options(dtype=dtype): img_shape = (3, 32, 32) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype) x = C.input_variable((3, C.FreeDimension, C.FreeDimension)) conv_size1 = (32, 3, 5, 5) conv_map1 = C.constant(value=np.arange( np.prod(conv_size1), dtype=dtype).reshape(conv_size1)) conv_op1 = C.convolution(conv_map1, x, auto_padding=(False, True, True)) relu_op1 = C.relu(conv_op1) maxpool_op1 = C.pooling(relu_op1, C.MAX_POOLING, (2, 2), (2, 2)) conv_size2 = (64, 32, 3, 3) conv_map2 = C.constant(value=np.arange( np.prod(conv_size2), dtype=dtype).reshape(conv_size2)) conv_op2 = C.convolution(conv_map2, maxpool_op1, auto_padding=(False, True, True)) relu_op2 = C.relu(conv_op2) root_node = C.pooling(relu_op2, C.MAX_POOLING, (2, 2), (2, 2)) filename = os.path.join(str(tmpdir), R'conv_with_freedim.onnx') root_node.save(filename, format=C.ModelFormat.ONNX) loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX) assert root_node.shape == loaded_node.shape x_ = loaded_node.arguments[0] assert np.allclose(loaded_node.eval({x_: img}, device=device), root_node.eval({x: img}, device=device)) # Additional test to ensure that loaded_node can be saved as both ONNX and CNTKv2 again. filename2 = os.path.join(str(tmpdir), R'conv_with_freedim2.onnx') loaded_node.save(filename2, format=C.ModelFormat.ONNX) filename3 = os.path.join(str(tmpdir), R'conv_with_freedim2.cntkmodel') loaded_node.save(filename3, format=C.ModelFormat.CNTKv2)
def func(x_var): x = C.placeholder() transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT) update = C.relu(C.times(x, WU, name=name + '_U') + bU) return C.as_block( x + transform_gate * (update - x), # trans(x)*u(x)+(1-f(x))*x [(x, x_var)], 'HighwayBlock', 'HighwayBlock' + name)
def test_relu_3(): cntk_op = C.relu( [-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0.]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.array_equal(cntk_ret, ng_ret)
def resnet_bottleneck_inc(input, out_num_filters, inter_out_num_filters, stride1x1, stride3x3): c1 = conv_bn_relu(input, (1, 1), inter_out_num_filters, strides=stride1x1) c2 = conv_bn_relu(c1, (3, 3), inter_out_num_filters, strides=stride3x3) c3 = conv_bn(c2, (1, 1), out_num_filters, bn_init_scale=0) stride = np.multiply(stride1x1, stride3x3) s = conv_bn(input, (1, 1), out_num_filters, strides=stride) # Shortcut p = c3 + s return C.relu(p)
def CNN(x): with C.layers.default_options(init=C.initializer.glorot_uniform()): x = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=16, activation=None)(x) x = C.layers.BatchNormalization(map_rank=1)(x) x = C.relu(x) x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x) x = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=16, activation=None)(x) x = C.layers.BatchNormalization(map_rank=1)(x) x = C.relu(x) x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x) x = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=64, activation=None)(x) x = C.layers.BatchNormalization(map_rank=1)(x) x = C.relu(x) x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x) x = C.layers.Dropout(0.3)(x) x = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=64, activation=None)(x) x = C.layers.BatchNormalization(map_rank=1)(x) x = C.relu(x) x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x) x = C.layers.Dropout(0.3)(x) x = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=256, activation=None)(x) x = C.layers.BatchNormalization(map_rank=1)(x) x = C.relu(x) x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x) x = C.layers.Dropout(0.3)(x) x = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=256, activation=None)(x) x = C.layers.BatchNormalization(map_rank=1)(x) x = C.relu(x) x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x) x = C.layers.Dropout(0.3)(x) x = C.layers.MaxPooling(filter_shape=(3, 3), strides=(1, 1))(x) x = C.layers.Dense(256, activation=None)(x) x = C.relu(x) x = C.layers.Dropout(0.3)(x) x = C.layers.Dense(256, activation=None)(x) x = C.relu(x) x = C.layers.Dropout(0.3)(x) x = C.layers.Dense(2, activation=None)(x) return x
def func(x_var): x = C.placeholder() WT = C.Parameter((dim,dim,), init=transform_weight_initializer, name=name+'_WT') bT = C.Parameter(dim, init=transform_bias_initializer, name=name+'_bT') WU = C.Parameter((dim,dim,), init=update_weight_initializer, name=name+'_WU') bU = C.Parameter(dim, init=update_bias_initializer, name=name+'_bU') transform_gate = C.sigmoid(C.times(x, WT, name=name+'_T') + bT) update = C.relu(C.times(x, WU, name=name+'_U') + bU) return C.as_block( x + transform_gate * (update - x), [(x, x_var)], 'HighwayBlock', 'HighwayBlock'+name)
def test_conv_with_freedim_model(tmpdir, dtype, device_id): if device_id == -1 and dtype == np.float16: pytest.skip('Test only runs on GPU') device = cntk_device(device_id) with C.default_options(dtype=dtype): img_shape = (3, 32, 32) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype) x = C.input_variable((3, C.FreeDimension, C.FreeDimension)) conv_size1 = (32, 3, 5, 5) conv_map1 = C.constant(value=np.arange(np.prod(conv_size1), dtype=dtype).reshape(conv_size1)) conv_op1 = C.convolution(conv_map1, x, auto_padding=(False, True, True)) relu_op1 = C.relu(conv_op1) maxpool_op1 = C.pooling(relu_op1, C.MAX_POOLING, (2, 2), (2, 2)) conv_size2 = (64, 32, 3, 3) conv_map2 = C.constant(value=np.arange(np.prod(conv_size2), dtype=dtype).reshape(conv_size2)) conv_op2 = C.convolution(conv_map2, maxpool_op1, auto_padding=(False, True, True)) relu_op2 = C.relu(conv_op2) root_node = C.pooling(relu_op2, C.MAX_POOLING, (2, 2), (2, 2)) filename = os.path.join(str(tmpdir), R'conv_with_freedim.onnx') root_node.save(filename, format=C.ModelFormat.ONNX) loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX) assert root_node.shape == loaded_node.shape x_ = loaded_node.arguments[0] assert np.allclose(loaded_node.eval({x_:img}, device=device), root_node.eval({x:img}, device=device)) # Additional test to ensure that loaded_node can be saved as both ONNX and CNTKv2 again. filename2 = os.path.join(str(tmpdir), R'conv_with_freedim2.onnx') loaded_node.save(filename2, format=C.ModelFormat.ONNX) filename3 = os.path.join(str(tmpdir), R'conv_with_freedim2.cntkmodel') loaded_node.save(filename3, format=C.ModelFormat.CNTKv2)
def get_AGR(model_path): AR_model = ct.Function.load(model_path + '/' + 'lib-am.43.tc.bin') AR_model = ct.relu(AR_model) GR_model = ct.Function.load(model_path + '/' + 'lib-gm.43.tc.bin') GR_model = ct.relu(GR_model) return AR_model, GR_model
def test_Relu(tmpdir): data = [[-1, -0.5, 0, 1, 2]] model = C.relu([[-1, -0.5, 0, 1, 2]]) verify_no_input(model, tmpdir, 'Relu_0')
def residual_block(h, num_filters): with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False): h1 = C.relu(InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h))) h2 = InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h1)) return h2 + h
def test_Relu(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.array([[-1, -0.5, 0, 1, 2]], dtype=dtype) model = C.relu(data) verify_no_input(model, tmpdir, 'Relu_0')
def test_Relu(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.array([[-1, -0.5, 0, 1, 2]], dtype = dtype) model = C.relu(data) verify_no_input(model, tmpdir, 'Relu_0')
def ddist(prediction, c_interval_center, c_interval_radius): ''' Distance of the predictions from the edges of the intervals ''' return cntk.relu( cntk.abs(prediction - c_interval_center) - c_interval_radius)
def resnet_basic(input, num_filters): c1 = convolution_bn(input, (3,3), num_filters) c2 = convolution_bn(c1, (3,3), num_filters, activation=None) p = c2 + input return C.relu(p)
def resnet_bottleneck(input, out_num_filters, inter_out_num_filters): c1 = conv_bn_relu(input, (1, 1), inter_out_num_filters) c2 = conv_bn_relu(c1, (3, 3), inter_out_num_filters) c3 = conv_bn(c2, (1, 1), out_num_filters, bn_init_scale=0) p = c3 + input return C.relu(p)
def resnet_basic(input, num_filters): c1 = convolution_bn(input, (3, 3), num_filters) c2 = convolution_bn(c1, (3, 3), num_filters, activation=None) p = c2 + input return C.relu(p)
def dense(x): return C.relu(C.times(x, W_proj)+b_proj)
def test_Relu(tmpdir): data = [[-1, -0.5, 0, 1, 2]] model = C.relu([[-1, -0.5, 0, 1, 2]]) verify_no_input(model, tmpdir, 'Relu_0')
#These parameters don't need to be changed, but you can try varying the hidden_size and update_frequency and see how learning is affected. env = gym.make('CartPole-v0') state_dim = env.observation_space.shape[0] # Dimension of state space action_count = env.action_space.n # Number of actions hidden_size = 128 # Number of hidden units update_frequency = 20 #Next we will define the policy network. # The policy network maps an observation to a probability of taking action 0 or 1. observations = C.sequence.input_variable(state_dim, np.float32, name="obs") W1 = C.parameter(shape=(state_dim, hidden_size), init=C.glorot_uniform(), name="W1") b1 = C.parameter(shape=hidden_size, name="b1") layer1 = C.relu(C.times(observations, W1) + b1) W2 = C.parameter(shape=(hidden_size, action_count), init=C.glorot_uniform(), name="W2") b2 = C.parameter(shape=action_count, name="b2") layer2 = C.times(layer1, W2) + b2 output = C.sigmoid(layer2, name="output") ''' Now you must define the loss function for training the policy network. - Recall that the desired loss function is: $\frac{1}{m}\sum_1^m \nabla_\theta \log \pi_\theta(a_t|s_t) R$. - Label is a variable corresponding to $a_t$, the action the policy selected. - output is the policy network that maps an observation to a probability of taking an action.
def dcgan_generator(h): with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True): h = C.reshape(h, (-1, 1, 1)) h = ConvolutionTranspose2D((4, 4), 1024, pad=False, strides=1, output_shape=(4, 4))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 512, strides=2, output_shape=(img_height // 32, img_width // 32))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 256, strides=2, output_shape=(img_height // 16, img_width // 16))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 128, strides=2, output_shape=(img_height // 8, img_width // 8))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 64, strides=2, output_shape=(img_height // 4, img_width // 4))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D( (5, 5), 32, strides=2, output_shape=(img_height // 2, img_width // 2))(h) h = BatchNormalization()(h) h = C.relu(h) h = ConvolutionTranspose2D((5, 5), 3, strides=2, bias=True, output_shape=(img_height, img_width))(h) h = C.tanh(h) return h
def policy_gradient(): import cntk as C TOTAL_EPISODES = 2000 if isFast else 10000 H = 100 # number of hidden layer neurons observations = input(STATE_COUNT, np.float32, name="obs") W1 = C.parameter(shape=(STATE_COUNT, H), init=C.glorot_uniform(), name="W1") b1 = C.parameter(shape=H, name="b1") layer1 = C.relu(C.times(observations, W1) + b1) W2 = C.parameter(shape=(H, ACTION_COUNT), init=C.glorot_uniform(), name="W2") b2 = C.parameter(shape=ACTION_COUNT, name="b2") score = C.times(layer1, W2) + b2 # Until here it was similar to DQN probability = C.sigmoid(score, name="prob") input_y = input(1, np.float32, name="input_y") advantages = input(1, np.float32, name="advt") loss = -C.reduce_mean(C.log(C.square(input_y - probability) + 1e-4) * advantages, axis=0, name='loss') lr = 1e-4 lr_schedule = learning_rate_schedule(lr, UnitType.sample) sgd = C.sgd([W1, W2], lr_schedule) gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2']) xs, hs, label, drs = [], [], [], [] running_reward = None reward_sum = 0 episode_number = 1 observation = env.reset() actionlist = [i for i in range(env.action_space['n']) ] #%% while episode_number <= TOTAL_EPISODES: x = np.reshape(observation, [1, STATE_COUNT]).astype(np.float32) # Run the policy network and get an action to take. #prob = probability.eval(arguments={observations: x})[0][0][0] prob = probability.eval(arguments={observations: x}) normalized_weights = (prob / np.sum(prob))[0][0] action = numpy.random.choice(actionlist, p=normalized_weights) #action = 1 if np.random.uniform() < prob else 0 xs.append(x) # observation # grad that encourages the action that was taken to be taken y = 1 if action == 0 else 0 # a "fake label" label.append(y) # step the environment and get new measurements observation, reward, done, info = env.step(action) reward_sum += float(reward) # Record reward (has to be done after we call step() to get reward for previous action) drs.append(float(reward)) if done: # Stack together all inputs, hidden states, action gradients, and rewards for this episode epx = np.vstack(xs) epl = np.vstack(label).astype(np.float32) epr = np.vstack(drs).astype(np.float32) xs, label, drs = [], [], [] # reset array memory # Compute the discounted reward backwards through time. discounted_epr = discount_rewards(epr) # Size the rewards to be unit normal (helps control the gradient estimator variance) discounted_epr -= np.mean(discounted_epr) discounted_epr /= (np.std(discounted_epr) + 0.000000000001) # Forward pass arguments = {observations: epx, input_y: epl, advantages: discounted_epr} state, outputs_map = loss.forward(arguments, outputs=loss.outputs, keep_for_backward=loss.outputs) # Backward psas root_gradients = {v: np.ones_like(o) for v, o in outputs_map.items()} vargrads_map = loss.backward(state, root_gradients, variables=set([W1, W2])) for var, grad in vargrads_map.items(): gradBuffer[var.name] += grad # Wait for some batches to finish to reduce noise if episode_number % BATCH_SIZE_BASELINE == 0: grads = {W1: gradBuffer['W1'].astype(np.float32), W2: gradBuffer['W2'].astype(np.float32)} updated = sgd.update(grads, BATCH_SIZE_BASELINE) # reset the gradBuffer gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2']) print('Episode: %d. Average reward for episode %f.' % (episode_number, reward_sum / BATCH_SIZE_BASELINE)) if reward_sum / BATCH_SIZE_BASELINE > REWARD_TARGET: print('Task solved in: %d ' % episode_number) break reward_sum = 0 observation = env.reset() # reset env episode_number += 1 probability.save('pg.mod')