Ejemplo n.º 1
0
    def criterion(self):

        # hyperparameters
        lambda_val = 0.5

        # Margin loss
        left = ct.square(ct.relu(0.9 - self.length))
        right = ct.square(ct.relu(self.length - 0.1))
        left = ct.reshape(left, (-1))
        right = ct.reshape(right, (-1))
        lc = self.labels * left + lambda_val * (1 - self.labels) * right

        margin_loss = ct.reduce_sum(lc, axis=0)
        margin_loss = ct.reduce_mean(margin_loss, axis=ct.axis.Axis.default_batch_axis())

        # classification_error
        predict = ct.softmax(self.length, axis=0)
        error = ct.classification_error(ct.reshape(predict, (10)), self.labels)

        total_loss = margin_loss
        reconstruction_err = 0

        if self.use_reconstruction:
            features = ct.reshape(self.features, shape=(-1,))
            encoder = ct.reshape(self.training_model, shape=(-1,))
            squared = ct.square(encoder - features)
            reconstruction_err = ct.reduce_mean(squared, axis=0)
            reconstruction_err = ct.reduce_mean(reconstruction_err, axis=ct.axis.Axis.default_batch_axis())
            total_loss = margin_loss + (0.0005*784) * reconstruction_err

        return total_loss, error
def test_relu():
    assert_cntk_ngraph_array_equal(C.relu([-2, -1., 0., 1., 2.]))
    assert_cntk_ngraph_array_equal(C.relu([0.]))
    assert_cntk_ngraph_array_equal(
        C.relu([-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1]))
    assert_cntk_ngraph_array_equal(C.relu([[1, 2, 3], [4, 5, 6]]))
    assert_cntk_ngraph_array_equal(C.relu([[-3, -2, -1], [1, 2, 3]]))
Ejemplo n.º 3
0
def cyclegan_generator(h):
    with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False):
        h = C.relu(InstanceNormalization((64, 1, 1))(Convolution2D((7, 7), 64)(h)))
        h = C.relu(InstanceNormalization((128, 1, 1))(Convolution2D((3, 3), 128, strides=2)(h)))
        h = C.relu(InstanceNormalization((256, 1, 1))(Convolution2D((3, 3), 256, strides=2)(h)))

        h = residual_block(h, 256)
        h = residual_block(h, 256)
        h = residual_block(h, 256)

        h = residual_block(h, 256)
        h = residual_block(h, 256)
        h = residual_block(h, 256)

        h = residual_block(h, 256)
        h = residual_block(h, 256)
        h = residual_block(h, 256)

        h = C.relu(InstanceNormalization((128, 1, 1)))(
            ConvolutionTranspose2D((3, 3), 128, strides=2, output_shape=(img_height // 2, img_width // 2))(h)))
        h = C.relu(InstanceNormalization((64, 1, 1)))(
            ConvolutionTranspose2D((3, 3), 64, strides=2, output_shape=(img_height, img_width))(h)))
        h = Convolution2D((7, 7), 3, activation=C.tanh, bias=True)(h)

        return h
Ejemplo n.º 4
0
def test_conv_with_freedim_model(tmpdir):    
    img_shape = (3, 32, 32)
    img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=np.float32)

    x = C.input_variable((3, C.FreeDimension, C.FreeDimension))

    conv_size1 = (32, 3, 5, 5)
    conv_map1 = C.constant(value=np.arange(np.prod(conv_size1), dtype=np.float32).reshape(conv_size1))
    conv_op1 = C.convolution(conv_map1, x, auto_padding=(False, True, True))
    relu_op1 = C.relu(conv_op1)
    maxpool_op1 = C.pooling(relu_op1, C.MAX_POOLING, (2, 2), (2, 2))

    conv_size2 = (64, 32, 3, 3)
    conv_map2 = C.constant(value=np.arange(np.prod(conv_size2), dtype=np.float32).reshape(conv_size2))
    conv_op2 = C.convolution(conv_map2, maxpool_op1, auto_padding=(False, True, True))
    relu_op2 = C.relu(conv_op2)
    root_node = C.pooling(relu_op2, C.MAX_POOLING, (2, 2), (2, 2))

    filename = os.path.join(str(tmpdir), R'conv_with_freedim.onnx')
    root_node.save(filename, format=C.ModelFormat.ONNX)

    loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX)
    assert root_node.shape == loaded_node.shape

    x_ = loaded_node.arguments[0]
    assert np.allclose(loaded_node.eval({x_:img}), root_node.eval({x:img}))

    # Additional test to ensure that loaded_node can be saved as both ONNX and CNTKv2 again.
    filename2 = os.path.join(str(tmpdir), R'conv_with_freedim2.onnx')
    loaded_node.save(filename2, format=C.ModelFormat.ONNX)

    filename3 = os.path.join(str(tmpdir), R'conv_with_freedim2.cntkmodel')
    loaded_node.save(filename3, format=C.ModelFormat.CNTKv2)
Ejemplo n.º 5
0
def cgan_generator(z, y):
    with C.layers.default_options(init=C.normal(scale=0.02), bias=False, map_rank=1, use_cntk_engine=True):
        h = C.splice(z, y, axis=0)

        h = C.relu(BatchNormalization()(Dense(1024)(h)))
        h = C.relu(BatchNormalization()(Dense((128, 7, 7))(h)))
        h = C.relu(BatchNormalization()(ConvolutionTranspose2D(
            (5, 5), 128, strides=(2, 2), pad=True, output_shape=(14, 14))(h)))
        h = ConvolutionTranspose2D((5, 5), 1, activation=C.sigmoid, strides=(2, 2), pad=True, output_shape=(28, 28))(h)

    return C.reshape(h, input_dim)
Ejemplo n.º 6
0
def conv_bn_relu(input,
                 filter_size,
                 num_filters,
                 strides=(1, 1),
                 init=C.he_normal()):
    r = conv_bn(input, filter_size, num_filters, strides, init, 1)
    return C.relu(r)
Ejemplo n.º 7
0
def test_relu_5():
    cntk_op = C.relu([[-3, -2, -1], [1, 2, 3]])
    cntk_ret = cntk_op.eval()

    ng_op, _ = CNTKImporter().import_model(cntk_op)
    ng_ret = ng.transformers.make_transformer().computation(ng_op)()

    assert np.array_equal(cntk_ret, ng_ret)
Ejemplo n.º 8
0
def vggblock(x, arrays, layer_map, name):
    f = arrays[0]
    b = arrays[1]
    k = C.constant(value=f)
    t = C.constant(value=np.reshape(b, (-1, 1, 1)))
    y = C.relu(C.convolution(k, x, auto_padding=[False, True, True]) + t)
    layer_map[name] = y
    return y
Ejemplo n.º 9
0
    def resnet_basic_inc(input, num_filters):
        c1 = convolution_bn(input, (3,3), num_filters, strides=(2,2))
        c2 = convolution_bn(c1, (3,3), num_filters, activation=None)

        s = convolution_bn(input, (1,1), num_filters, strides=(2,2), activation=None)
    
        p = c2 + s
        return C.relu(p)
Ejemplo n.º 10
0
    def resnet_basic_inc(input, num_filters):
        c1 = convolution_bn(input, (3,3), num_filters, strides=(2,2))
        c2 = convolution_bn(c1, (3,3), num_filters, activation=None)

        s = convolution_bn(input, (1,1), num_filters, strides=(2,2), activation=None)
    
        p = c2 + s
        return C.relu(p)
Ejemplo n.º 11
0
def test_relu_1():
    cntk_op = C.relu([-2, -1., 0., 1., 2.])
    cntk_ret = cntk_op.eval()

    ng_op, _ = CNTKImporter().import_model(cntk_op)
    ng_ret = ng.transformers.make_transformer().computation(ng_op)()

    assert np.array_equal(cntk_ret, ng_ret)
Ejemplo n.º 12
0
def pix2pix_generator(h):
    with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True):
        h_enc1 = C.leaky_relu(Convolution2D((4, 4), 64, strides=2, bias=True)(h), alpha=0.2)
        h_enc2 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 128, strides=2)(h_enc1)), alpha=0.2)
        h_enc3 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 256, strides=2)(h_enc2)), alpha=0.2)
        h_enc4 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc3)), alpha=0.2)
        h_enc5 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc4)), alpha=0.2)
        h_enc6 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc5)), alpha=0.2)
        h_enc7 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc6)), alpha=0.2)
        h_enc8 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc7)), alpha=0.2)

        h_dec8 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_enc8)))
        h_dec8 = C.splice(h_dec8, h_enc8, axis=0)
        h_dec8 = C.relu(h_dec8)

        h_dec7 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec8)))
        h_dec7 = C.splice(h_dec7, h_enc7, axis=0)
        h_dec7 = C.relu(h_dec7)

        h_dec6 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec7)))
        h_dec6 = C.splice(h_dec6, h_enc6, axis=0)
        h_dec6 = C.relu(h_dec6)

        h_dec5 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 32, img_width // 32))(h_dec6)))
        h_dec5 = C.splice(h_dec5, h_enc5, axis=0)
        h_dec5 = C.relu(h_dec5)

        h_dec4 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 16, img_width // 16))(h_dec5)))
        h_dec4 = C.splice(h_dec4, h_enc4, axis=0)
        h_dec4 = C.relu(h_dec4)

        h_dec3 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 256, strides=2, pad=True, output_shape=(img_height // 8, img_width // 8))(h_dec4)))
        h_dec3 = C.splice(h_dec3, h_enc3, axis=0)
        h_dec3 = C.relu(h_dec3)

        h_dec2 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 128, strides=2, pad=True, output_shape=(img_height // 4, img_width // 4))(h_dec3)))
        h_dec2 = C.splice(h_dec2, h_enc2, axis=0)
        h_dec2 = C.relu(h_dec2)

        h_dec1 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 64, strides=2, pad=True, output_shape=(img_height // 2, img_width // 2))(h_dec2)))
        h_dec1 = C.splice(h_dec1, h_enc1, axis=0)
        h_dec1 = C.relu(h_dec1)

        h = ConvolutionTranspose2D((4, 4), 3, activation=C.tanh, strides=2, pad=True, bias=True,
                                   output_shape=(img_height, img_width))(h_dec1)

        return h
def test_conv_with_freedim_model(tmpdir, dtype, device_id):
    pytest.skip('Needs to be fixed after removal of batch axis change.')
    if device_id == -1 and dtype == np.float16:
        pytest.skip('Test only runs on GPU')
    device = cntk_device(device_id)
    with C.default_options(dtype=dtype):
        img_shape = (3, 32, 32)
        img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype)

        x = C.input_variable((3, C.FreeDimension, C.FreeDimension))

        conv_size1 = (32, 3, 5, 5)
        conv_map1 = C.constant(value=np.arange(
            np.prod(conv_size1), dtype=dtype).reshape(conv_size1))
        conv_op1 = C.convolution(conv_map1,
                                 x,
                                 auto_padding=(False, True, True))
        relu_op1 = C.relu(conv_op1)
        maxpool_op1 = C.pooling(relu_op1, C.MAX_POOLING, (2, 2), (2, 2))

        conv_size2 = (64, 32, 3, 3)
        conv_map2 = C.constant(value=np.arange(
            np.prod(conv_size2), dtype=dtype).reshape(conv_size2))
        conv_op2 = C.convolution(conv_map2,
                                 maxpool_op1,
                                 auto_padding=(False, True, True))
        relu_op2 = C.relu(conv_op2)
        root_node = C.pooling(relu_op2, C.MAX_POOLING, (2, 2), (2, 2))

        filename = os.path.join(str(tmpdir), R'conv_with_freedim.onnx')
        root_node.save(filename, format=C.ModelFormat.ONNX)

        loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX)
        assert root_node.shape == loaded_node.shape

        x_ = loaded_node.arguments[0]
        assert np.allclose(loaded_node.eval({x_: img}, device=device),
                           root_node.eval({x: img}, device=device))

        # Additional test to ensure that loaded_node can be saved as both ONNX and CNTKv2 again.
        filename2 = os.path.join(str(tmpdir), R'conv_with_freedim2.onnx')
        loaded_node.save(filename2, format=C.ModelFormat.ONNX)

        filename3 = os.path.join(str(tmpdir), R'conv_with_freedim2.cntkmodel')
        loaded_node.save(filename3, format=C.ModelFormat.CNTKv2)
Ejemplo n.º 14
0
 def func(x_var):
     x = C.placeholder()
     transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT)
     update = C.relu(C.times(x, WU, name=name + '_U') + bU)
     return C.as_block(
         x + transform_gate * (update - x),  # trans(x)*u(x)+(1-f(x))*x
         [(x, x_var)],
         'HighwayBlock',
         'HighwayBlock' + name)
Ejemplo n.º 15
0
def test_relu_3():
    cntk_op = C.relu(
        [-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0.])
    cntk_ret = cntk_op.eval()

    ng_op, _ = CNTKImporter().import_model(cntk_op)
    ng_ret = ng.transformers.make_transformer().computation(ng_op)()

    assert np.array_equal(cntk_ret, ng_ret)
Ejemplo n.º 16
0
def resnet_bottleneck_inc(input, out_num_filters, inter_out_num_filters,
                          stride1x1, stride3x3):
    c1 = conv_bn_relu(input, (1, 1), inter_out_num_filters, strides=stride1x1)
    c2 = conv_bn_relu(c1, (3, 3), inter_out_num_filters, strides=stride3x3)
    c3 = conv_bn(c2, (1, 1), out_num_filters, bn_init_scale=0)
    stride = np.multiply(stride1x1, stride3x3)
    s = conv_bn(input, (1, 1), out_num_filters, strides=stride)  # Shortcut
    p = c3 + s
    return C.relu(p)
def CNN(x):
    with C.layers.default_options(init=C.initializer.glorot_uniform()):
        x = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=16,
                                   activation=None)(x)
        x = C.layers.BatchNormalization(map_rank=1)(x)
        x = C.relu(x)
        x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x)
        x = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=16,
                                   activation=None)(x)
        x = C.layers.BatchNormalization(map_rank=1)(x)
        x = C.relu(x)
        x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x)
        x = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=64,
                                   activation=None)(x)
        x = C.layers.BatchNormalization(map_rank=1)(x)
        x = C.relu(x)
        x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x)
        x = C.layers.Dropout(0.3)(x)
        x = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=64,
                                   activation=None)(x)
        x = C.layers.BatchNormalization(map_rank=1)(x)
        x = C.relu(x)
        x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x)
        x = C.layers.Dropout(0.3)(x)
        x = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=256,
                                   activation=None)(x)
        x = C.layers.BatchNormalization(map_rank=1)(x)
        x = C.relu(x)
        x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x)
        x = C.layers.Dropout(0.3)(x)
        x = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=256,
                                   activation=None)(x)
        x = C.layers.BatchNormalization(map_rank=1)(x)
        x = C.relu(x)
        x = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2))(x)
        x = C.layers.Dropout(0.3)(x)
        x = C.layers.MaxPooling(filter_shape=(3, 3), strides=(1, 1))(x)
        x = C.layers.Dense(256, activation=None)(x)
        x = C.relu(x)
        x = C.layers.Dropout(0.3)(x)
        x = C.layers.Dense(256, activation=None)(x)
        x = C.relu(x)
        x = C.layers.Dropout(0.3)(x)
        x = C.layers.Dense(2, activation=None)(x)
    return x
Ejemplo n.º 18
0
 def func(x_var):
     x  = C.placeholder()
     WT = C.Parameter((dim,dim,), init=transform_weight_initializer, name=name+'_WT')
     bT = C.Parameter(dim,        init=transform_bias_initializer,   name=name+'_bT')
     WU = C.Parameter((dim,dim,), init=update_weight_initializer,    name=name+'_WU')
     bU = C.Parameter(dim,        init=update_bias_initializer,      name=name+'_bU')
     transform_gate = C.sigmoid(C.times(x, WT, name=name+'_T') + bT)
     update = C.relu(C.times(x, WU, name=name+'_U') + bU)
     return C.as_block(
         x + transform_gate * (update - x),
         [(x, x_var)],
         'HighwayBlock',
         'HighwayBlock'+name)
Ejemplo n.º 19
0
def test_conv_with_freedim_model(tmpdir, dtype, device_id):
    if device_id == -1 and dtype == np.float16:
        pytest.skip('Test only runs on GPU')
    device = cntk_device(device_id)
    with C.default_options(dtype=dtype):
        img_shape = (3, 32, 32)
        img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype)

        x = C.input_variable((3, C.FreeDimension, C.FreeDimension))

        conv_size1 = (32, 3, 5, 5)
        conv_map1 = C.constant(value=np.arange(np.prod(conv_size1), dtype=dtype).reshape(conv_size1))
        conv_op1 = C.convolution(conv_map1, x, auto_padding=(False, True, True))
        relu_op1 = C.relu(conv_op1)
        maxpool_op1 = C.pooling(relu_op1, C.MAX_POOLING, (2, 2), (2, 2))

        conv_size2 = (64, 32, 3, 3)
        conv_map2 = C.constant(value=np.arange(np.prod(conv_size2), dtype=dtype).reshape(conv_size2))
        conv_op2 = C.convolution(conv_map2, maxpool_op1, auto_padding=(False, True, True))
        relu_op2 = C.relu(conv_op2)
        root_node = C.pooling(relu_op2, C.MAX_POOLING, (2, 2), (2, 2))

        filename = os.path.join(str(tmpdir), R'conv_with_freedim.onnx')
        root_node.save(filename, format=C.ModelFormat.ONNX)

        loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX)
        assert root_node.shape == loaded_node.shape

        x_ = loaded_node.arguments[0]
        assert np.allclose(loaded_node.eval({x_:img}, device=device), root_node.eval({x:img}, device=device))

        # Additional test to ensure that loaded_node can be saved as both ONNX and CNTKv2 again.
        filename2 = os.path.join(str(tmpdir), R'conv_with_freedim2.onnx')
        loaded_node.save(filename2, format=C.ModelFormat.ONNX)

        filename3 = os.path.join(str(tmpdir), R'conv_with_freedim2.cntkmodel')
        loaded_node.save(filename3, format=C.ModelFormat.CNTKv2)
def get_AGR(model_path):
    AR_model = ct.Function.load(model_path + '/' + 'lib-am.43.tc.bin')
    AR_model = ct.relu(AR_model)
    GR_model = ct.Function.load(model_path + '/' + 'lib-gm.43.tc.bin')
    GR_model = ct.relu(GR_model)
    return AR_model, GR_model
Ejemplo n.º 21
0
def test_Relu(tmpdir):
    data = [[-1, -0.5, 0, 1, 2]]
    model = C.relu([[-1, -0.5, 0, 1, 2]])
    verify_no_input(model, tmpdir, 'Relu_0')
Ejemplo n.º 22
0
def residual_block(h, num_filters):
    with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False):
        h1 = C.relu(InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h)))
        h2 = InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h1))
        return h2 + h
Ejemplo n.º 23
0
def test_Relu(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        data = np.array([[-1, -0.5, 0, 1, 2]], dtype=dtype)
        model = C.relu(data)
        verify_no_input(model, tmpdir, 'Relu_0')
Ejemplo n.º 24
0
def test_Relu(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        data = np.array([[-1, -0.5, 0, 1, 2]], dtype = dtype)
        model = C.relu(data)
        verify_no_input(model, tmpdir, 'Relu_0')
Ejemplo n.º 25
0
def ddist(prediction, c_interval_center, c_interval_radius):
    ''' Distance of the predictions from the edges of the intervals '''
    return cntk.relu(
        cntk.abs(prediction - c_interval_center) - c_interval_radius)
Ejemplo n.º 26
0
 def resnet_basic(input, num_filters):
     c1 = convolution_bn(input, (3,3), num_filters)
     c2 = convolution_bn(c1, (3,3), num_filters, activation=None)
     p  = c2 + input
     return C.relu(p)
Ejemplo n.º 27
0
def resnet_bottleneck(input, out_num_filters, inter_out_num_filters):
    c1 = conv_bn_relu(input, (1, 1), inter_out_num_filters)
    c2 = conv_bn_relu(c1, (3, 3), inter_out_num_filters)
    c3 = conv_bn(c2, (1, 1), out_num_filters, bn_init_scale=0)
    p = c3 + input
    return C.relu(p)
 def resnet_basic(input, num_filters):
     c1 = convolution_bn(input, (3, 3), num_filters)
     c2 = convolution_bn(c1, (3, 3), num_filters, activation=None)
     p = c2 + input
     return C.relu(p)
Ejemplo n.º 29
0
 def dense(x):
     return C.relu(C.times(x, W_proj)+b_proj)
Ejemplo n.º 30
0
def test_Relu(tmpdir):
    data = [[-1, -0.5, 0, 1, 2]]
    model = C.relu([[-1, -0.5, 0, 1, 2]])
    verify_no_input(model, tmpdir, 'Relu_0')
Ejemplo n.º 31
0
#These parameters don't need to be changed, but you can try varying the hidden_size and update_frequency and see how learning is affected.
env = gym.make('CartPole-v0')

state_dim = env.observation_space.shape[0]  # Dimension of state space
action_count = env.action_space.n  # Number of actions
hidden_size = 128  # Number of hidden units
update_frequency = 20

#Next we will define the policy network.
# The policy network maps an observation to a probability of taking action 0 or 1.
observations = C.sequence.input_variable(state_dim, np.float32, name="obs")
W1 = C.parameter(shape=(state_dim, hidden_size),
                 init=C.glorot_uniform(),
                 name="W1")
b1 = C.parameter(shape=hidden_size, name="b1")
layer1 = C.relu(C.times(observations, W1) + b1)
W2 = C.parameter(shape=(hidden_size, action_count),
                 init=C.glorot_uniform(),
                 name="W2")
b2 = C.parameter(shape=action_count, name="b2")
layer2 = C.times(layer1, W2) + b2
output = C.sigmoid(layer2, name="output")
'''
Now you must define the loss function for training the policy network. 

- Recall that the desired loss function is: $\frac{1}{m}\sum_1^m \nabla_\theta \log \pi_\theta(a_t|s_t) R$. 

- Label is a variable corresponding to $a_t$, the action the policy selected. 

- output is the policy network that maps an observation to a probability of taking an action.
Ejemplo n.º 32
0
def dcgan_generator(h):
    with C.layers.default_options(init=C.normal(0.02),
                                  pad=True,
                                  bias=False,
                                  map_rank=1,
                                  use_cntk_engine=True):
        h = C.reshape(h, (-1, 1, 1))

        h = ConvolutionTranspose2D((4, 4),
                                   1024,
                                   pad=False,
                                   strides=1,
                                   output_shape=(4, 4))(h)
        h = BatchNormalization()(h)
        h = C.relu(h)

        h = ConvolutionTranspose2D(
            (5, 5),
            512,
            strides=2,
            output_shape=(img_height // 32, img_width // 32))(h)
        h = BatchNormalization()(h)
        h = C.relu(h)

        h = ConvolutionTranspose2D(
            (5, 5),
            256,
            strides=2,
            output_shape=(img_height // 16, img_width // 16))(h)
        h = BatchNormalization()(h)
        h = C.relu(h)

        h = ConvolutionTranspose2D(
            (5, 5),
            128,
            strides=2,
            output_shape=(img_height // 8, img_width // 8))(h)
        h = BatchNormalization()(h)
        h = C.relu(h)

        h = ConvolutionTranspose2D(
            (5, 5),
            64,
            strides=2,
            output_shape=(img_height // 4, img_width // 4))(h)
        h = BatchNormalization()(h)
        h = C.relu(h)

        h = ConvolutionTranspose2D(
            (5, 5),
            32,
            strides=2,
            output_shape=(img_height // 2, img_width // 2))(h)
        h = BatchNormalization()(h)
        h = C.relu(h)

        h = ConvolutionTranspose2D((5, 5),
                                   3,
                                   strides=2,
                                   bias=True,
                                   output_shape=(img_height, img_width))(h)
        h = C.tanh(h)

        return h
Ejemplo n.º 33
0
def policy_gradient():
    import cntk as C

    TOTAL_EPISODES = 2000 if isFast else 10000

    H = 100 # number of hidden layer neurons
    
    observations = input(STATE_COUNT, np.float32, name="obs")
    
    W1 = C.parameter(shape=(STATE_COUNT, H), init=C.glorot_uniform(), name="W1")
    b1 = C.parameter(shape=H, name="b1")
    layer1 = C.relu(C.times(observations, W1) + b1)
    
    W2 = C.parameter(shape=(H, ACTION_COUNT), init=C.glorot_uniform(), name="W2")
    b2 = C.parameter(shape=ACTION_COUNT, name="b2")
    score = C.times(layer1, W2) + b2
    # Until here it was similar to DQN
    
    probability = C.sigmoid(score, name="prob")
    input_y = input(1, np.float32, name="input_y")
    advantages = input(1, np.float32, name="advt")
    
    loss = -C.reduce_mean(C.log(C.square(input_y - probability) + 1e-4) * advantages, axis=0, name='loss')
    
    lr = 1e-4
    lr_schedule = learning_rate_schedule(lr, UnitType.sample)
    sgd = C.sgd([W1, W2], lr_schedule)
    
    gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2'])
    
    xs, hs, label, drs = [], [], [], []
    running_reward = None
    reward_sum = 0
    episode_number = 1
    
    observation = env.reset()
    actionlist = [i for i in range(env.action_space['n']) ]
#%%
    while episode_number <= TOTAL_EPISODES:
        x = np.reshape(observation, [1, STATE_COUNT]).astype(np.float32)
    
        # Run the policy network and get an action to take.
        #prob = probability.eval(arguments={observations: x})[0][0][0]
        prob = probability.eval(arguments={observations: x})        
        normalized_weights = (prob / np.sum(prob))[0][0]                
        action = numpy.random.choice(actionlist, p=normalized_weights)
        #action = 1 if np.random.uniform() < prob else 0
    
        xs.append(x)  # observation
        # grad that encourages the action that was taken to be taken
    
        y = 1 if action == 0 else 0  # a "fake label"
        label.append(y)
    
        # step the environment and get new measurements
        observation, reward, done, info = env.step(action)
        reward_sum += float(reward)
    
        # Record reward (has to be done after we call step() to get reward for previous action)
        drs.append(float(reward))
    
        if done:
            # Stack together all inputs, hidden states, action gradients, and rewards for this episode
            epx = np.vstack(xs)
            epl = np.vstack(label).astype(np.float32)
            epr = np.vstack(drs).astype(np.float32)
            xs, label, drs = [], [], []  # reset array memory
    
            # Compute the discounted reward backwards through time.
            discounted_epr = discount_rewards(epr)
            # Size the rewards to be unit normal (helps control the gradient estimator variance)
            discounted_epr -= np.mean(discounted_epr)
            discounted_epr /= (np.std(discounted_epr) + 0.000000000001)
    
            # Forward pass
            arguments = {observations: epx, input_y: epl, advantages: discounted_epr}
            state, outputs_map = loss.forward(arguments, outputs=loss.outputs,
                                              keep_for_backward=loss.outputs)
    
            # Backward psas
            root_gradients = {v: np.ones_like(o) for v, o in outputs_map.items()}
            vargrads_map = loss.backward(state, root_gradients, variables=set([W1, W2]))
    
            for var, grad in vargrads_map.items():
                gradBuffer[var.name] += grad
    
            # Wait for some batches to finish to reduce noise
            if episode_number % BATCH_SIZE_BASELINE == 0:
                grads = {W1: gradBuffer['W1'].astype(np.float32),
                         W2: gradBuffer['W2'].astype(np.float32)}
                updated = sgd.update(grads, BATCH_SIZE_BASELINE)
    
                # reset the gradBuffer
                gradBuffer = dict((var.name, np.zeros(shape=var.shape))
                                  for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2'])
    
                print('Episode: %d. Average reward for episode %f.' % (episode_number, reward_sum / BATCH_SIZE_BASELINE))
    
                if reward_sum / BATCH_SIZE_BASELINE > REWARD_TARGET:
                    print('Task solved in: %d ' % episode_number)
                    break
    
                reward_sum = 0    
            observation = env.reset()  # reset env
            episode_number += 1    
    probability.save('pg.mod')