예제 #1
0
def create_convnet_cifar10_model(num_classes):
    with default_options(activation=relu, pad=True):
        return Sequential([
            For(range(2), lambda: [
                Convolution2D((3, 3), 64),
                Convolution2D((3, 3), 64),
                MaxPooling((3, 3), strides=2)
            ]),
            For(range(2), lambda i: [
                Dense([256, 128][i]),
                Dropout(0.5)
            ]),
            Dense(num_classes, activation=None)
        ])
예제 #2
0
파일: models.py 프로젝트: patykov/TCC
def create_vgg16(feature_var, num_classes, dropout=0.9):

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature
            # extraction (usually before ReLU)
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 64, name='conv1_{}'.format(i)),
                    Activation(activation=relu, name='relu1_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool1'),
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 128, name='conv2_{}'.format(i)),
                    Activation(activation=relu, name='relu2_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool2'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 256, name='conv3_{}'.format(i)),
                    Activation(activation=relu, name='relu3_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool3'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv4_{}'.format(i)),
                    Activation(activation=relu, name='relu4_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool4'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv5_{}'.format(i)),
                    Activation(activation=relu, name='relu5_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool5'),
            Dense(4096, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(dropout, name='drop6'),
            Dense(4096, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(dropout, name='drop7'),
            Dense(num_classes, name='fc8')
        ])(feature_var)

    return z
예제 #3
0
def wgan_critic(h):
    with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False):
        h = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(h),
                         alpha=0.2)

        h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3),
                                                            64,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3),
                                                            128,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3),
                                                            256,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3),
                                                            512,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(LayerNormalization()(Convolution2D((3, 3),
                                                            1024,
                                                            strides=2)(h)),
                         alpha=0.2)

        h = Convolution2D((4, 4), 1, pad=False, strides=1, bias=True)(h)
        return h
예제 #4
0
def pix2pix_generator(h):
    with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True):
        h_enc1 = C.leaky_relu(Convolution2D((4, 4), 64, strides=2, bias=True)(h), alpha=0.2)
        h_enc2 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 128, strides=2)(h_enc1)), alpha=0.2)
        h_enc3 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 256, strides=2)(h_enc2)), alpha=0.2)
        h_enc4 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc3)), alpha=0.2)
        h_enc5 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc4)), alpha=0.2)
        h_enc6 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=2)(h_enc5)), alpha=0.2)
        h_enc7 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc6)), alpha=0.2)
        h_enc8 = C.leaky_relu(BatchNormalization()(Convolution2D((4, 4), 512, strides=1)(h_enc7)), alpha=0.2)

        h_dec8 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_enc8)))
        h_dec8 = C.splice(h_dec8, h_enc8, axis=0)
        h_dec8 = C.relu(h_dec8)

        h_dec7 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec8)))
        h_dec7 = C.splice(h_dec7, h_enc7, axis=0)
        h_dec7 = C.relu(h_dec7)

        h_dec6 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=1, pad=True, output_shape=(img_height // 64, img_width // 64))(h_dec7)))
        h_dec6 = C.splice(h_dec6, h_enc6, axis=0)
        h_dec6 = C.relu(h_dec6)

        h_dec5 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 32, img_width // 32))(h_dec6)))
        h_dec5 = C.splice(h_dec5, h_enc5, axis=0)
        h_dec5 = C.relu(h_dec5)

        h_dec4 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 512, strides=2, pad=True, output_shape=(img_height // 16, img_width // 16))(h_dec5)))
        h_dec4 = C.splice(h_dec4, h_enc4, axis=0)
        h_dec4 = C.relu(h_dec4)

        h_dec3 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 256, strides=2, pad=True, output_shape=(img_height // 8, img_width // 8))(h_dec4)))
        h_dec3 = C.splice(h_dec3, h_enc3, axis=0)
        h_dec3 = C.relu(h_dec3)

        h_dec2 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 128, strides=2, pad=True, output_shape=(img_height // 4, img_width // 4))(h_dec3)))
        h_dec2 = C.splice(h_dec2, h_enc2, axis=0)
        h_dec2 = C.relu(h_dec2)

        h_dec1 = Dropout(0.5)(BatchNormalization()(ConvolutionTranspose2D(
            (4, 4), 64, strides=2, pad=True, output_shape=(img_height // 2, img_width // 2))(h_dec2)))
        h_dec1 = C.splice(h_dec1, h_enc1, axis=0)
        h_dec1 = C.relu(h_dec1)

        h = ConvolutionTranspose2D((4, 4), 3, activation=C.tanh, strides=2, pad=True, bias=True,
                                   output_shape=(img_height, img_width))(h_dec1)

        return h
def create_symbol():
    # Weight initialiser from uniform distribution
    # Activation (unless states) is None
    with cntk.layers.default_options(init=cntk.glorot_uniform(),
                                     activation=cntk.relu):
        x = Convolution2D(filter_shape=(3, 3), num_filters=50,
                          pad=True)(features)
        x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(x)
        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
        x = Dropout(0.25)(x)

        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
        x = Dropout(0.25)(x)

        x = Dense(512)(x)
        x = Dropout(0.5)(x)
        x = Dense(N_CLASSES, activation=None)(x)
        return x
예제 #6
0
def pix2pix_discriminator(y, x):
    with C.layers.default_options(init=C.normal(0.02), pad=True, bias=False, map_rank=1, use_cntk_engine=True):
        x = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(x), alpha=0.2)
        y = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(y), alpha=0.2)

        h = C.splice(x, y, axis=0)

        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 128, strides=2)(h)), alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 256, strides=2)(h)), alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3), 512, strides=2)(h)), alpha=0.2)

        h = Convolution2D((1, 1), 1, activation=None, bias=True)(h)

        return h
예제 #7
0
def test_layers_convolution_2d():
    inC, inH, inW = 1, 3, 3
    y = input((inC,inH, inW))

    dat = np.ones([1, inC, inH, inW], dtype = np.float32)

    model = Convolution2D((3, 3),
                          num_filters=1,
                          activation=None,
                          pad=False,
                          strides=1, name='foo')
    # shape should be
    model_shape = model(y).foo.shape
    np.testing.assert_array_equal(model_shape, (1, 1, 1), \
        "Error in convolution2D with stride = 1 and padding")

    res = model(y).eval({y: dat})

    expected_res = np.sum(model.foo.W.value)

    np.testing.assert_array_almost_equal(res[0][0][0][0], expected_res, decimal=5, \
        err_msg="Error in convolution2D computation with stride = 1 and zeropad = True")
예제 #8
0
def dcgan_discriminator(h):
    with C.layers.default_options(init=C.normal(0.02),
                                  pad=True,
                                  bias=False,
                                  map_rank=1,
                                  use_cntk_engine=True):
        h = C.leaky_relu(Convolution2D((3, 3), 32, strides=2, bias=True)(h),
                         alpha=0.2)

        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3),
                                                            64,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3),
                                                            128,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3),
                                                            256,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3),
                                                            512,
                                                            strides=2)(h)),
                         alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((3, 3),
                                                            1024,
                                                            strides=2)(h)),
                         alpha=0.2)

        h = Convolution2D((4, 4),
                          1,
                          activation=C.sigmoid,
                          pad=False,
                          bias=True,
                          strides=1)(h)

        return h
예제 #9
0
def create_alexnet():
    # Input variables denoting the features and label data
    feature_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # apply model to input
    # remove mean value
    mean_removed_features = minus(feature_var,
                                  constant(114),
                                  name='mean_removed_input')

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
            Convolution2D((11, 11),
                          96,
                          init=normal(0.01),
                          pad=False,
                          strides=(4, 4),
                          name='conv1'),
            Activation(activation=relu, name='relu1'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'),
            MaxPooling((3, 3), (2, 2), name='pool1'),
            Convolution2D((5, 5),
                          192,
                          init=normal(0.01),
                          init_bias=0.1,
                          name='conv2'),
            Activation(activation=relu, name='relu2'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'),
            MaxPooling((3, 3), (2, 2), name='pool2'),
            Convolution2D((3, 3), 384, init=normal(0.01), name='conv3'),
            Activation(activation=relu, name='relu3'),
            Convolution2D((3, 3),
                          384,
                          init=normal(0.01),
                          init_bias=0.1,
                          name='conv4'),
            Activation(activation=relu, name='relu4'),
            Convolution2D((3, 3),
                          256,
                          init=normal(0.01),
                          init_bias=0.1,
                          name='conv5'),
            Activation(activation=relu, name='relu5'),
            MaxPooling((3, 3), (2, 2), name='pool5'),
            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(0.5, name='drop6'),
            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(0.5, name='drop7'),
            Dense(num_classes, init=normal(0.01), name='fc8')
        ])(mean_removed_features)

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)

    log_number_of_parameters(z)
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce': ce,
        'pe': pe,
        'pe5': pe5,
        'output': z
    }
예제 #10
0
파일: DQNcar.py 프로젝트: zld3002/airsim
    def __init__(self,
                 input_shape,
                 nb_actions,
                 gamma=0.99,
                 explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.00025,
                 momentum=0.95,
                 minibatch_size=32,
                 memory_size=500000,
                 train_after=200000,
                 train_interval=4,
                 target_update_interval=10000,
                 monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma

        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval

        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = ReplayMemory(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0

        # Metrics accumulator
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        # Action Value model (used by agent to interact with the environment)
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Convolution2D((8, 8), 16, strides=4),
                Convolution2D((4, 4), 32, strides=2),
                Convolution2D((3, 3), 32, strides=1),
                Dense(256, init=he_uniform(scale=0.01)),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))
            ])
        self._action_value_net.update_signature(Tensor[input_shape])

        # Target model used to compute the target Q-values in training, updated
        # less frequently for increased stability.
        self._target_net = self._action_value_net.clone(CloneMethod.freeze)

        # Function computing Q-values targets as part of the computation graph
        @Function
        @Signature(post_states=Tensor[input_shape],
                   rewards=Tensor[()],
                   terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) +
                rewards,
            )

        # Define the loss, using Huber Loss (more robust to outliers)
        @Function
        @Signature(pre_states=Tensor[input_shape],
                   actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape],
                   rewards=Tensor[()],
                   terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions,
                                 axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters,
                     lr_schedule,
                     momentum=m_schedule,
                     variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(
            freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd,
                                self._metrics_writer)
예제 #11
0
def create_vgg19():

    # Input variables denoting the features and label data
    feature_var = input((num_channels, image_height, image_width))
    label_var = input((num_classes))

    # apply model to input
    # remove mean value
    input = minus(feature_var,
                  constant([[[104]], [[117]], [[124]]]),
                  name='mean_removed_input')

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 64, name='conv1_{}'.format(i)),
                    Activation(activation=relu, name='relu1_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool1'),
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 128, name='conv2_{}'.format(i)),
                    Activation(activation=relu, name='relu2_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool2'),
            For(
                range(4), lambda i: [
                    Convolution2D((3, 3), 256, name='conv3_{}'.format(i)),
                    Activation(activation=relu, name='relu3_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool3'),
            For(
                range(4), lambda i: [
                    Convolution2D((3, 3), 512, name='conv4_{}'.format(i)),
                    Activation(activation=relu, name='relu4_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool4'),
            For(
                range(4), lambda i: [
                    Convolution2D((3, 3), 512, name='conv5_{}'.format(i)),
                    Activation(activation=relu, name='relu5_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool5'),
            Dense(4096, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(0.5, name='drop6'),
            Dense(4096, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(0.5, name='drop7'),
            Dense(num_classes, name='fc8')
        ])(input)

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)

    log_number_of_parameters(z)
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce': ce,
        'pe': pe,
        'pe5': pe5,
        'output': z
    }
예제 #12
0
    def build_model(self):

        cntk.debugging.set_checked_mode(True)

        # Defining the input variables for training and evaluation.
        self.stacked_frames = cntk.input_variable(
            (1, self.STATE_WIDTH, self.STATE_HEIGHT), dtype=np.float32)
        #self.stacked_frames = cntk.input_variable((1, 84, 84), dtype=np.float32)
        self.action = cntk.input_variable(self.num_actions)
        self.R = cntk.input_variable(1, dtype=np.float32)
        self.v_calc = cntk.input_variable(
            1, dtype=np.float32
        )  # In the loss of pi, the parameters of V(s) should be fixed.

        # Creating the value approximator extension.
        conv1_v = Convolution2D((8, 8),
                                num_filters=16,
                                pad=False,
                                strides=4,
                                activation=cntk.relu,
                                name='conv1_v')
        conv2_v = Convolution2D((4, 4),
                                num_filters=32,
                                pad=False,
                                strides=2,
                                activation=cntk.relu,
                                name='conv2_v')
        dense_v = Dense(256,
                        activation=cntk.sigmoid,
                        name='dense_v',
                        init=cntk.xavier())
        v = Sequential([
            conv1_v, conv2_v, dense_v,
            Dense(1,
                  activation=cntk.sigmoid,
                  name='outdense_v',
                  init=cntk.xavier())
        ])

        # Creating the policy approximator extension.
        conv1_pi = Convolution2D((8, 8),
                                 num_filters=16,
                                 pad=False,
                                 strides=4,
                                 activation=cntk.relu,
                                 name='conv1_pi')
        conv2_pi = Convolution2D((4, 4),
                                 num_filters=32,
                                 pad=False,
                                 strides=2,
                                 activation=cntk.relu,
                                 name='conv2_pi')
        dense_pi = Dense(256,
                         activation=cntk.sigmoid,
                         name='dense_pi',
                         init=cntk.xavier())
        pi = Sequential([
            conv1_v, conv2_v, dense_pi,
            Dense(self.num_actions,
                  activation=cntk.softmax,
                  name='outdense_pi',
                  init=cntk.xavier())
        ])

        self.pi = pi(self.stacked_frames)
        self.pms_pi = self.pi.parameters  # List of cntk Parameter types (containes the function's parameters)
        self.v = v(self.stacked_frames)
        self.pms_v = self.v.parameters

        cntk.debugging.debug_model(v)
예제 #13
0
def main(env_name='KungFuMasterNoFrameskip-v0',
         train_freq=4,
         target_update_freq=10000,
         checkpoint_freq=100000,
         log_freq=1,
         batch_size=32,
         train_after=200000,
         max_timesteps=5000000,
         buffer_size=50000,
         vmin=-10,
         vmax=10,
         n=51,
         gamma=0.99,
         final_eps=0.1,
         final_eps_update=1000000,
         learning_rate=0.00025,
         momentum=0.95):
    env = gym.make(env_name)
    env = wrap_env(env)

    state_dim = (4, 84, 84)
    action_count = env.action_space.n

    with C.default_options(activation=C.relu, init=C.he_uniform()):
        model_func = Sequential([
            Convolution2D((8, 8), 32, strides=4, name='conv1'),
            Convolution2D((4, 4), 64, strides=2, name='conv2'),
            Convolution2D((3, 3), 64, strides=1, name='conv3'),
            Dense(512, name='dense1'),
            Dense((action_count, n), activation=None, name='out')
        ])

    agent = CategoricalAgent(state_dim, action_count, model_func, vmin, vmax, n, gamma,
                             lr=learning_rate, mm=momentum, use_tensorboard=True)
    logger = agent.writer

    epsilon_schedule = LinearSchedule(1.0, final_eps, final_eps_update)
    replay_buffer = ReplayBuffer(buffer_size)

    try:
        obs = env.reset()
        episode = 0
        rewards = 0
        steps = 0

        for t in range(max_timesteps):
            # Take action
            if t > train_after:
                action = agent.act(obs, epsilon=epsilon_schedule.value(t))
            else:
                action = np.random.choice(action_count)
            obs_, reward, done, _ = env.step(action)

            # Store transition in replay buffer
            replay_buffer.add(obs, action, reward, obs_, float(done))

            obs = obs_
            rewards += reward

            if t > train_after and (t % train_freq) == 0:
                # Minimize error in projected Bellman update on a batch sampled from replay buffer
                experience = replay_buffer.sample(batch_size)
                agent.train(*experience)  # experience is (s, a, r, s_, t) tuple
                logger.write_value('loss', agent.trainer.previous_minibatch_loss_average, t)

            if t > train_after and (t % target_update_freq) == 0:
                agent.update_target()

            if t > train_after and (t % checkpoint_freq) == 0:
                agent.checkpoint('checkpoints/model_{}.chkpt'.format(t))

            if done:
                episode += 1
                obs = env.reset()

                if episode % log_freq == 0:
                    steps = t - steps + 1

                    logger.write_value('rewards', rewards, episode)
                    logger.write_value('steps', steps, episode)
                    logger.write_value('epsilon', epsilon_schedule.value(t), episode)
                    logger.flush()

                rewards = 0
                steps = t

    finally:
        agent.save_model('checkpoints/{}.cdqn'.format(env_name))
예제 #14
0
from autcar import Trainer
from cntk.layers import Dense, Sequential, Activation, Convolution2D, MaxPooling, Dropout, BatchNormalization
from cntk import softmax, relu

input_folder_path = "src/ml/data/autcar_training"
output_folder_path = "src/ml/data/autcar_training_balanced"
image_width = 224
image_height = 168

trainer = Trainer(deeplearning_framework="cntk", image_height=image_height, image_width=image_width)
trainer.create_balanced_dataset(input_folder_path, output_folder_path=output_folder_path)

model = Sequential([
    Convolution2D(filter_shape=(5,5), num_filters=32, strides=(1,1), pad=True, name="first_conv"),
    BatchNormalization(map_rank=1),
    Activation(relu),
    MaxPooling(filter_shape=(3,3), strides=(2,2), name="first_max"),
    Convolution2D(filter_shape=(3,3), num_filters=48, strides=(1,1), pad=True, name="second_conv"),
    BatchNormalization(map_rank=1),
    Activation(relu),
    MaxPooling(filter_shape=(3,3), strides=(2,2), name="second_max"),
    Convolution2D(filter_shape=(3,3), num_filters=64, strides=(1,1), pad=True, name="third_conv"),
    BatchNormalization(map_rank=1),
    Activation(relu),
    MaxPooling(filter_shape=(3,3), strides=(2,2), name="third_max"),
    Convolution2D(filter_shape=(5,5), num_filters=32, strides=(1,1), pad=True, name="fourth_conv"),
    BatchNormalization(map_rank=1),
    Activation(relu),
    Dense(100, activation=relu),
    Dropout(0.1),
    Dense(12, activation=softmax)
예제 #15
0
def residual_block(h, num_filters):
    with C.layers.default_options(init=C.normal(0.02), pad=True, strides=1, bias=False):
        h1 = C.relu(InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h)))
        h2 = InstanceNormalization((num_filters, 1, 1))(Convolution2D((3, 3), num_filters)(h1))
        return h2 + h
예제 #16
0
    def __init__(self,
                 state_dim,
                 action_dim,
                 gamma=0.99,
                 learning_rate=1e-4,
                 momentum=0.95):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma

        with default_options(activation=relu, init=he_uniform()):
            # Convolution filter counts were halved to save on memory, no gpu :(
            self.model = Sequential([
                Convolution2D((8, 8), 16, strides=4, name='conv1'),
                Convolution2D((4, 4), 32, strides=2, name='conv2'),
                Convolution2D((3, 3), 32, strides=1, name='conv3'),
                Dense(256, init=he_uniform(scale=0.01), name='dense1'),
                Dense(action_dim,
                      activation=None,
                      init=he_uniform(scale=0.01),
                      name='actions')
            ])
            self.model.update_signature(Tensor[state_dim])

        # Create the target model as a copy of the online model
        self.target_model = None
        self.update_target()

        self.pre_states = input_variable(state_dim, name='pre_states')
        self.actions = input_variable(action_dim, name='actions')
        self.post_states = input_variable(state_dim, name='post_states')
        self.rewards = input_variable((), name='rewards')
        self.terminals = input_variable((), name='terminals')
        self.is_weights = input_variable((), name='is_weights')

        predicted_q = reduce_sum(self.model(self.pre_states) * self.actions,
                                 axis=0)

        # DQN - calculate target q values
        # post_q = reduce_max(self.target_model(self.post_states), axis=0)

        # DDQN - calculate target q values
        online_selection = one_hot(
            argmax(self.model(self.post_states), axis=0), self.action_dim)
        post_q = reduce_sum(self.target_model(self.post_states) *
                            online_selection,
                            axis=0)

        post_q = (1.0 - self.terminals) * post_q
        target_q = stop_gradient(self.rewards + self.gamma * post_q)

        # Huber loss
        delta = 1.0
        self.td_error = minus(predicted_q, target_q, name='td_error')
        abs_error = abs(self.td_error)
        errors = element_select(less(abs_error, delta),
                                square(self.td_error) * 0.5,
                                delta * (abs_error - 0.5 * delta))
        loss = errors * self.is_weights

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_scheule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)

        self._learner = adam(self.model.parameters,
                             lr_schedule,
                             m_scheule,
                             variance_momentum=vm_schedule)
        self.writer = TensorBoardProgressWriter(log_dir='metrics',
                                                model=self.model)
        self.trainer = Trainer(self.model, (loss, None), [self._learner],
                               self.writer)