Ejemplo n.º 1
0
 def _build_model(self):
     with default_options(init=he_uniform(), activation=relu, bias=True):
         model = Sequential([
             Convolution((4, 4), 64, strides=(2, 2), name='conv1'),
             Convolution((3, 3), 64, strides=(1, 1), name='conv2'),
             Dense(512, name='dense1', init=he_normal(0.01)),
             Dense(self._nb_actions, activation=None, init=he_normal(0.01), name='qvalues')
         ])
         return model
Ejemplo n.º 2
0
    def __init__(self, input_shape, nb_actions,
                 gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.00025, momentum=0.95, minibatch_size=32,
                 memory_size=500000, train_after=10000, train_interval=4,
                 target_update_interval=10000, monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma
        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval
        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = RepMem(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Dense(input_shape, init=he_uniform(scale=0.01)),
                Dense(input_shape),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))])

        self._action_value_net.update_signature(Tensor[input_shape])

        self._target_net = self._action_value_net.clone(CloneMethod.freeze)


        @Function
        @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) + rewards,
            )

        @Function
        @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            q_targets = compute_q_targets(post_states, rewards, terminals)

            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            return huber_loss(q_targets, q_acted, 1.0)

        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters, lr_schedule,
                     momentum=m_schedule, variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
Ejemplo n.º 3
0
 def _build_model(self):
     with default_options(init=he_uniform(), activation=relu, bias=True):
         model = Sequential([
             Convolution((8, 8), 32, strides=(4, 4)),
             Convolution((4, 4), 64, strides=(2, 2)),
             Convolution((3, 3), 64, strides=(1, 1)),
             Dense(512, init=he_normal(0.01)),
             Dense(self._nb_actions, activation=None, init=he_normal(0.01))
         ])
         return model
 def _build_model(self):
     with default_options(init=he_uniform(), activation=relu, bias=True):
         model = Sequential([
             Convolution((8, 8), 32, strides=(4, 4)),
             Convolution((4, 4), 64, strides=(2, 2)),
             Convolution((3, 3), 64, strides=(1, 1)),
             Dense(512, init=he_normal(0.01)),
             Dense(self._nb_actions, activation=None, init=he_normal(0.01))
         ])
         return model
Ejemplo n.º 5
0
    def create_model(self):

        mean_removed_features = minus(self.input,
                                      constant(114),
                                      name='mean_removed_input')

        with default_options(activation=None, pad=True, bias=True):
            self.model = Sequential([
                Convolution2D((11, 11),
                              96,
                              init=normal(0.01),
                              pad=False,
                              name='conv1'),
                Activation(activation=relu, name='relu1'),
                self.__local_response_normalization(1.0,
                                                    2,
                                                    0.0001,
                                                    0.75,
                                                    name='norm1'),
                MaxPooling((3, 3), (2, 2), name='pool1'),
                Convolution2D((5, 5),
                              192,
                              init=normal(0.01),
                              init_bias=0.1,
                              name='conv2'),
                Activation(activation=relu, name='relu2'),
                self.__local_response_normalization(1.0,
                                                    2,
                                                    0.0001,
                                                    0.75,
                                                    name='norm2'),
                MaxPooling((3, 3), (2, 2), name='pool2'),
                Convolution2D((3, 3), 384, init=normal(0.01), name='conv3'),
                Activation(activation=relu, name='relu3'),
                Convolution2D((3, 3),
                              384,
                              init=normal(0.01),
                              init_bias=0.1,
                              name='conv4'),
                Activation(activation=relu, name='relu4'),
                Convolution2D((3, 3),
                              256,
                              init=normal(0.01),
                              init_bias=0.1,
                              name='conv5'),
                Activation(activation=relu, name='relu5'),
                MaxPooling((3, 3), (2, 2), name='pool5'),
                Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'),
                Activation(activation=relu, name='relu6'),
                Dropout(0.5, name='drop6'),
                Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'),
                Activation(activation=relu, name='relu7'),
                Dropout(0.5, name='drop7'),
                Dense(self.number_labels, init=normal(0.01), name='fc8')
            ])(mean_removed_features)
Ejemplo n.º 6
0
def create_alexnet():

    # Input variables denoting the features and label data
    feature_var = input_variable((num_channels, image_height, image_width))
    label_var = input_variable((num_classes))

    # apply model to input
    # remove mean value
    input = minus(feature_var, constant(114), name='mean_removed_input')

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
            Convolution2D((11,11), 96, init=normal(0.01), pad=False, strides=(4,4), name='conv1'),
            Activation(activation=relu, name='relu1'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'),
            MaxPooling((3,3), (2,2), name='pool1'),

            Convolution2D((5,5), 192, init=normal(0.01), init_bias=0.1, name='conv2'),
            Activation(activation=relu, name='relu2'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'),
            MaxPooling((3,3), (2,2), name='pool2'),

            Convolution2D((3,3), 384, init=normal(0.01), name='conv3'),
            Activation(activation=relu, name='relu3'),
            Convolution2D((3,3), 384, init=normal(0.01), init_bias=0.1, name='conv4'),
            Activation(activation=relu, name='relu4'),
            Convolution2D((3,3), 256, init=normal(0.01), init_bias=0.1, name='conv5'),
            Activation(activation=relu, name='relu5'),
            MaxPooling((3,3), (2,2), name='pool5'),

            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(0.5, name='drop6'),
            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(0.5, name='drop7'),
            Dense(num_classes, init=normal(0.01), name='fc8')
            ])(input)

    # loss and metric
    ce  = cross_entropy_with_softmax(z, label_var)
    pe  = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)

    log_number_of_parameters(z) ; print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'pe5': pe5,
        'output': z
    }
Ejemplo n.º 7
0
def create_alexnet():

    # Input variables denoting the features and label data
    feature_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # apply model to input
    # remove mean value 
    mean_removed_features = minus(feature_var, constant(114), name='mean_removed_input')
    
    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) 
            Convolution2D((11,11), 96, init=normal(0.01), pad=False, strides=(4,4), name='conv1'),
            Activation(activation=relu, name='relu1'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'),
            MaxPooling((3,3), (2,2), name='pool1'),

            Convolution2D((5,5), 192, init=normal(0.01), init_bias=0.1, name='conv2'), 
            Activation(activation=relu, name='relu2'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'),
            MaxPooling((3,3), (2,2), name='pool2'),

            Convolution2D((3,3), 384, init=normal(0.01), name='conv3'), 
            Activation(activation=relu, name='relu3'),
            Convolution2D((3,3), 384, init=normal(0.01), init_bias=0.1, name='conv4'), 
            Activation(activation=relu, name='relu4'),
            Convolution2D((3,3), 256, init=normal(0.01), init_bias=0.1, name='conv5'), 
            Activation(activation=relu, name='relu5'), 
            MaxPooling((3,3), (2,2), name='pool5'), 

            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(0.5, name='drop6'),
            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(0.5, name='drop7'),
            Dense(num_classes, init=normal(0.01), name='fc8')
            ])(mean_removed_features)

    # loss and metric
    ce  = cross_entropy_with_softmax(z, label_var)
    pe  = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)

    log_number_of_parameters(z) ; print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'pe5': pe5,
        'output': z
    }
 def _build_model(self):
     with default_options(init=he_uniform(), activation=relu, bias=True):
         model = Sequential([
             Convolution((4, 4), 64, strides=(2, 2), name='conv1'),
             Convolution((3, 3), 64, strides=(1, 1), name='conv2'),
             Dense(512, name='dense1', init=he_normal(0.01)),
             Dense(self._nb_actions,
                   activation=None,
                   init=he_normal(0.01),
                   name='qvalues')
         ])
         return model
Ejemplo n.º 9
0
def create_convnet_cifar10_model(num_classes):
    with default_options(activation=relu, pad=True):
        return Sequential([
            For(
                range(2), lambda: [
                    Convolution2D((3, 3), 64),
                    Convolution2D((3, 3), 64),
                    MaxPooling((3, 3), strides=2)
                ]),
            For(range(2), lambda i: [Dense([256, 128][i]),
                                     Dropout(0.5)]),
            Dense(num_classes, activation=None)
        ])
Ejemplo n.º 10
0
def create_convnet_cifar10_model(num_classes):
    with default_options(activation=relu, pad=True):
        return Sequential([
            For(range(2), lambda : [
                Convolution2D((3,3), 64), 
                Convolution2D((3,3), 64), 
                MaxPooling((3,3), strides=2)
            ]), 
            For(range(2), lambda i: [
                Dense([256,128][i]), 
                Dropout(0.5)
            ]), 
            Dense(num_classes, activation=None)
        ])
Ejemplo n.º 11
0
def create_model(feature_dimensions, classes):
    with default_options(activation=relu, init=glorot_uniform()):
        model = Sequential([
            For(
                range(3), lambda i: [
                    Convolution((5, 5), [32, 32, 64][i], pad=True),
                    BatchNormalization(map_rank=1),
                    MaxPooling((3, 3), strides=(2, 2))
                ]),
            Dense(64),
            BatchNormalization(map_rank=1),
            Dense(len(classes), activation=None)
        ])

    return model(feature_dimensions)
Ejemplo n.º 12
0
def create_vgg16(feature_var, num_classes, dropout=0.9):

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature
            # extraction (usually before ReLU)
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 64, name='conv1_{}'.format(i)),
                    Activation(activation=relu, name='relu1_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool1'),
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 128, name='conv2_{}'.format(i)),
                    Activation(activation=relu, name='relu2_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool2'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 256, name='conv3_{}'.format(i)),
                    Activation(activation=relu, name='relu3_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool3'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv4_{}'.format(i)),
                    Activation(activation=relu, name='relu4_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool4'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv5_{}'.format(i)),
                    Activation(activation=relu, name='relu5_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool5'),
            Dense(4096, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(dropout, name='drop6'),
            Dense(4096, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(dropout, name='drop7'),
            Dense(num_classes, name='fc8')
        ])(feature_var)

    return z
Ejemplo n.º 13
0
def create_advanced_model(input, out_dims):
    
    with default_options(activation=relu):
        model = Sequential([
            For(range(2), lambda i: [  # lambda with one parameter
                Convolution((3,3), [32,64][i], pad=True),  # depth depends on i
                Convolution((5,5), [32,64][i], pad=True),
                Convolution((9,9), [32,64][i], pad=True),            
                MaxPooling((3,3), strides=(2,2))
            ]),
            For(range(2), lambda : [   # lambda without parameter
                Dense(512),
                Dropout(0.5)
            ]),
            Dense(out_dims, activation=None)
        ])
    output_layer=model(input)
    
    return output_layer
Ejemplo n.º 14
0
def create_network(num_convolution_layers):
    """ Create network

    """
    # Input variables denoting the features and label data
    input_var = cntk.input_variable(
        (_NUM_CHANNELS, _IMAGE_HEIGHT, _IMAGE_WIDTH))
    label_var = cntk.input_variable((_NUM_CLASSES))

    # create model, and configure learning parameters
    # Instantiate the feedforward classification model
    input_removemean = minus(input_var, constant(128))
    scaled_input = element_times(constant(0.00390625), input_removemean)

    print('Creating NN model')
    with layers.default_options(activation=relu, pad=True):
        model = layers.Sequential([
            layers.For(
                range(num_convolution_layers), lambda: [
                    layers.Convolution2D((3, 3), 64),
                    layers.Convolution2D((3, 3), 64),
                    layers.MaxPooling((3, 3), (2, 2))
                ]),
            layers.For(
                range(2),
                lambda i: [layers.Dense([256, 128][i]),
                           layers.Dropout(0.5)]),
            layers.Dense(_NUM_CLASSES, activation=None)
        ])(scaled_input)

    # loss and metric
    ce = cross_entropy_with_softmax(model, label_var)
    pe = classification_error(model, label_var)

    return {
        'name': 'convnet',
        'feature': input_var,
        'label': label_var,
        'ce': ce,
        'pe': pe,
        'output': model
    }
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 2
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim)
    label = C.input_variable(num_output_classes)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    # z = Sequential([
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(num_output_classes)])(scaled_input)

    with default_options(activation=relu, init=C.glorot_uniform()):
        z = Sequential([
            For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim)),
            Dense(num_output_classes, activation=None)
        ])(scaled_input)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    # setup the data
    path = abs_path + "\Train-28x28_cntk_text.txt"

    reader_train = MinibatchSource(
        CTFDeserializer(
            path,
            StreamDefs(features=StreamDef(field='features', shape=input_dim),
                       labels=StreamDef(field='labels',
                                        shape=num_output_classes))))

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels
    }

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    progress_writers = [
        ProgressPrinter(tag='Training', num_epochs=num_sweeps_to_train_with)
    ]

    # Instantiate the trainer object to drive the model training
    lr = learning_rate_schedule(1, UnitType.sample)
    trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)],
                      progress_writers)

    training_session(trainer=trainer,
                     mb_source=reader_train,
                     mb_size=minibatch_size,
                     model_inputs_to_streams=input_map,
                     max_samples=num_samples_per_sweep *
                     num_sweeps_to_train_with,
                     progress_frequency=num_samples_per_sweep).train()

    # Load test data
    path = abs_path + "\Test-28x28_cntk_text.txt"

    reader_test = MinibatchSource(
        CTFDeserializer(
            path,
            StreamDefs(features=StreamDef(field='features', shape=input_dim),
                       labels=StreamDef(field='labels',
                                        shape=num_output_classes))))

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels
    }

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size,
                                        input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Ejemplo n.º 16
0
def create_vgg19():

    # Input variables denoting the features and label data
    feature_var = input_variable((num_channels, image_height, image_width))
    label_var = input_variable((num_classes))

    # apply model to input
    # remove mean value 
    input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input')
    
    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) 
            For(range(2), lambda i: [
                Convolution2D((3,3), 64, name='conv1_{}'.format(i)), 
                Activation(activation=relu, name='relu1_{}'.format(i)), 
            ]),
            MaxPooling((2,2), (2,2), name='pool1'),

            For(range(2), lambda i: [
                Convolution2D((3,3), 128, name='conv2_{}'.format(i)), 
                Activation(activation=relu, name='relu2_{}'.format(i)), 
            ]),
            MaxPooling((2,2), (2,2), name='pool2'),

            For(range(4), lambda i: [
                Convolution2D((3,3), 256, name='conv3_{}'.format(i)), 
                Activation(activation=relu, name='relu3_{}'.format(i)), 
            ]),
            MaxPooling((2,2), (2,2), name='pool3'),

            For(range(4), lambda i: [
                Convolution2D((3,3), 512, name='conv4_{}'.format(i)), 
                Activation(activation=relu, name='relu4_{}'.format(i)), 
            ]),
            MaxPooling((2,2), (2,2), name='pool4'),

            For(range(4), lambda i: [
                Convolution2D((3,3), 512, name='conv5_{}'.format(i)), 
                Activation(activation=relu, name='relu5_{}'.format(i)), 
            ]),
            MaxPooling((2,2), (2,2), name='pool5'),

            Dense(4096, name='fc6'), 
            Activation(activation=relu, name='relu6'), 
            Dropout(0.5, name='drop6'), 
            Dense(4096, name='fc7'), 
            Activation(activation=relu, name='relu7'), 
            Dropout(0.5, name='drop7'),
            Dense(num_classes, name='fc8')
            ])(input)

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)

    log_number_of_parameters(z) ; print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'pe5': pe5, 
        'output': z
    }
Ejemplo n.º 17
0
    def __init__(self,
                 state_dim,
                 action_dim,
                 gamma=0.99,
                 learning_rate=1e-4,
                 momentum=0.95):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma

        with default_options(activation=relu, init=he_uniform()):
            # Convolution filter counts were halved to save on memory, no gpu :(
            self.model = Sequential([
                Convolution2D((8, 8), 16, strides=4, name='conv1'),
                Convolution2D((4, 4), 32, strides=2, name='conv2'),
                Convolution2D((3, 3), 32, strides=1, name='conv3'),
                Dense(256, init=he_uniform(scale=0.01), name='dense1'),
                Dense(action_dim,
                      activation=None,
                      init=he_uniform(scale=0.01),
                      name='actions')
            ])
            self.model.update_signature(Tensor[state_dim])

        # Create the target model as a copy of the online model
        self.target_model = None
        self.update_target()

        self.pre_states = input_variable(state_dim, name='pre_states')
        self.actions = input_variable(action_dim, name='actions')
        self.post_states = input_variable(state_dim, name='post_states')
        self.rewards = input_variable((), name='rewards')
        self.terminals = input_variable((), name='terminals')
        self.is_weights = input_variable((), name='is_weights')

        predicted_q = reduce_sum(self.model(self.pre_states) * self.actions,
                                 axis=0)

        # DQN - calculate target q values
        # post_q = reduce_max(self.target_model(self.post_states), axis=0)

        # DDQN - calculate target q values
        online_selection = one_hot(
            argmax(self.model(self.post_states), axis=0), self.action_dim)
        post_q = reduce_sum(self.target_model(self.post_states) *
                            online_selection,
                            axis=0)

        post_q = (1.0 - self.terminals) * post_q
        target_q = stop_gradient(self.rewards + self.gamma * post_q)

        # Huber loss
        delta = 1.0
        self.td_error = minus(predicted_q, target_q, name='td_error')
        abs_error = abs(self.td_error)
        errors = element_select(less(abs_error, delta),
                                square(self.td_error) * 0.5,
                                delta * (abs_error - 0.5 * delta))
        loss = errors * self.is_weights

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_scheule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)

        self._learner = adam(self.model.parameters,
                             lr_schedule,
                             m_scheule,
                             variance_momentum=vm_schedule)
        self.writer = TensorBoardProgressWriter(log_dir='metrics',
                                                model=self.model)
        self.trainer = Trainer(self.model, (loss, None), [self._learner],
                               self.writer)
Ejemplo n.º 18
0
def net_model(feature):
    with default_options(init=cntk.glorot_uniform()):
        layers = Dense(output_classes, activation=None)(feature)
        return layers
Ejemplo n.º 19
0
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 2
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim)
    label = C.input_variable(num_output_classes)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    # z = Sequential([
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(num_output_classes)])(scaled_input)

    with default_options(activation=relu, init=C.glorot_uniform()):
        z = Sequential([For(range(num_hidden_layers),
            lambda i: Dense(hidden_layers_dim)),
            Dense(num_output_classes, activation=None)])(scaled_input)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    # setup the data
    path = abs_path + "\Train-28x28_cntk_text.txt"

    reader_train = MinibatchSource(CTFDeserializer(path, StreamDefs(
        features=StreamDef(field='features', shape=input_dim),
        labels=StreamDef(field='labels', shape=num_output_classes))))

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels
    }

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    progress_writers = [ProgressPrinter(
        tag='Training',
        num_epochs=num_sweeps_to_train_with)]

    # Instantiate the trainer object to drive the model training
    lr = learning_rate_schedule(1, UnitType.sample)
    trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers)

    training_session(
        trainer=trainer,
        mb_source=reader_train,
        mb_size=minibatch_size,
        model_inputs_to_streams=input_map,
        max_samples=num_samples_per_sweep * num_sweeps_to_train_with,
        progress_frequency=num_samples_per_sweep
    ).train()

    # Load test data
    path = abs_path + "\Test-28x28_cntk_text.txt"

    reader_test = MinibatchSource(CTFDeserializer(path, StreamDefs(
        features=StreamDef(field='features', shape=input_dim),
        labels=StreamDef(field='labels', shape=num_output_classes))))

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels
    }

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Ejemplo n.º 20
0
    def __init__(self, input_shape, nb_actions,
                 gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.00025, momentum=0.95, minibatch_size=32,
                 memory_size=500000, train_after=10000, train_interval=4, target_update_interval=10000,
                 monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma

        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval

        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = ReplayMemory(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0

        # Metrics accumulator
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        # Action Value model (used by agent to interact with the environment)
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Convolution2D((8, 8), 16, strides=4),
                Convolution2D((4, 4), 32, strides=2),
                Convolution2D((3, 3), 32, strides=1),
                Dense(256, init=he_uniform(scale=0.01)),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))
            ])
        self._action_value_net.update_signature(Tensor[input_shape])

        # Target model used to compute the target Q-values in training, updated
        # less frequently for increased stability.
        self._target_net = self._action_value_net.clone(CloneMethod.freeze)

        # Function computing Q-values targets as part of the computation graph
        @Function
        @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) + rewards,
            )

        # Define the loss, using Huber Loss (more robust to outliers)
        @Function
        @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters, lr_schedule,
                     momentum=m_schedule, variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
Ejemplo n.º 21
0
def do_demo():
    # create NN, train, test, predict
    input_dim = 4
    hidden_dim = 2
    output_dim = 3

    train_file = "trainData_cntk.txt"
    test_file = "testData_cntk.txt"

    input_Var = C.ops.input(input_dim, np.float32)
    label_Var = C.ops.input(output_dim, np.float32)

    print("Creating a 4-2-3 tanh softmax NN for Iris data ")
    with default_options(init=glorot_uniform()):
        hLayer = C.layers.Dense(hidden_dim,
                                activation=C.ops.tanh,
                                name='hidLayer')(input_Var)
        oLayer = Dense(output_dim, activation=C.ops.softmax,
                       name='outLayer')(hLayer)
    nnet = oLayer

    print("Creating a cross entropy mini-batch Trainer \n")
    ce = C.cross_entropy_with_softmax(nnet, label_Var)
    pe = C.classification_error(nnet, label_Var)

    fixed_lr = 0.05
    lr_per_batch = learning_rate_schedule(fixed_lr, UnitType.minibatch)
    learner = C.sgd(nnet.parameters, lr_per_batch)
    trainer = C.Trainer(nnet, (ce, pe), [learner])

    max_iter = 5000  # Máximo de iterações para o treino
    batch_size = 5  # Define o tamanho para o mini-batch
    progress_freq = 1000  # Exibe o erro a cada n mini-batches

    reader_train = create_reader(train_file, True, input_dim, output_dim)
    my_input_map = {
        input_Var: reader_train.streams.features,
        label_Var: reader_train.streams.labels
    }
    pp = ProgressPrinter(progress_freq)

    print("Starting training \n")
    for i in range(0, max_iter):
        currBatch = reader_train.next_minibatch(batch_size,
                                                input_map=my_input_map)
        trainer.train_minibatch(currBatch)
        pp.update_with_trainer(trainer)
    print("\nTraining complete")

    # ----------------------------------

    print("\nEvaluating test data \n")
    reader_test = create_reader(test_file, False, input_dim, output_dim)
    numTestItems = 30
    allTest = reader_test.next_minibatch(numTestItems, input_map=my_input_map)
    test_error = trainer.test_minibatch(allTest)
    print("Classification error on the 30 test items = %f" % test_error)

    # ----------------------------------

    # Faz a predição para uma flor desconhecida
    unknown = np.array([[6.9, 3.1, 4.6, 1.3]], dtype=np.float32)
    print(
        "\nPrevisão  de espécies de Íris para as características de entrada:")
    my_print(unknown[0], 1)  # 1 decimal

    predicted = nnet.eval({input_Var: unknown})
    print("Prediction is: ")
    my_print(predicted[0], 3)  # 3 decimais

    # ---------------------------------

    print("\nTrained model input-to-hidden weights:\n")
    print(hLayer.hidLayer.W.value)
    print("\nTrained model hidden node biases:\n")
    print(hLayer.hidLayer.b.value)

    print("\nTrained model hidden-to-output weights:\n")
    print(oLayer.outLayer.W.value)
    print("\nTrained model output node biases:\n")
    print(oLayer.outLayer.b.value)

    save_weights("weights.txt", hLayer.hidLayer.W.value,
                 hLayer.hidLayer.b.value, oLayer.outLayer.W.value,
                 oLayer.outLayer.b.value)

    return 0  # success
Ejemplo n.º 22
0
    def __init__(self, input_shape, nb_actions,
                 gamma=0.95, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 100000),
                 learning_rate=0.01, momentum=0.8, minibatch_size=16,
                 memory_size=15000, train_after=100, train_interval=100, target_update_interval=500,
                 monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma

        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval

        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = ReplayMemory(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0
        self._num_trains = 0

        # Metrics accumulator
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        '''
        # Action Value model (used by agent to interact with the environment)
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Convolution2D((8, 8), 16, strides=4),
                Convolution2D((4, 4), 32, strides=2),
                Convolution2D((3, 3), 32, strides=1),
                Dense(256, init=he_uniform(scale=0.01)),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))
            ])
        ''' 
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Dense(7, init=he_uniform(scale=0.01)),
                Dense(8, init=he_uniform(scale=0.01)),
                #Dense(16, init=he_uniform(scale=0.01)),
                #Dense(32, init=he_uniform(scale=0.01)),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))
            ])
        
        self._action_value_net.update_signature(Tensor[input_shape])

        # Target model used to compute the target Q-values in training, updated
        # less frequently for increased stability.
        self._target_net = self._action_value_net.clone(CloneMethod.freeze)

        # Function computing Q-values targets as part of the computation graph
        @Function
        @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) + rewards,
            )

        # Define the loss, using Huber Loss (more robust to outliers)
        @Function
        @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters, lr_schedule,
                     momentum=m_schedule, variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
Ejemplo n.º 23
0
def create_vgg19():

    # Input variables denoting the features and label data
    feature_var = input((num_channels, image_height, image_width))
    label_var = input((num_classes))

    # apply model to input
    # remove mean value
    input = minus(feature_var,
                  constant([[[104]], [[117]], [[124]]]),
                  name='mean_removed_input')

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 64, name='conv1_{}'.format(i)),
                    Activation(activation=relu, name='relu1_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool1'),
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 128, name='conv2_{}'.format(i)),
                    Activation(activation=relu, name='relu2_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool2'),
            For(
                range(4), lambda i: [
                    Convolution2D((3, 3), 256, name='conv3_{}'.format(i)),
                    Activation(activation=relu, name='relu3_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool3'),
            For(
                range(4), lambda i: [
                    Convolution2D((3, 3), 512, name='conv4_{}'.format(i)),
                    Activation(activation=relu, name='relu4_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool4'),
            For(
                range(4), lambda i: [
                    Convolution2D((3, 3), 512, name='conv5_{}'.format(i)),
                    Activation(activation=relu, name='relu5_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool5'),
            Dense(4096, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(0.5, name='drop6'),
            Dense(4096, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(0.5, name='drop7'),
            Dense(num_classes, name='fc8')
        ])(input)

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)

    log_number_of_parameters(z)
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce': ce,
        'pe': pe,
        'pe5': pe5,
        'output': z
    }