Beispiel #1
0
def test_lookup():
    model = Sequential([ Dense(3, init=1, name='first'), Dense(2, init=2, name='second')])
    model.update_signature((2,))
    W1 = model.first.W.value
    W2 = model.second.W.value
    np.testing.assert_array_equal(W1, np.ones((2,3)),     err_msg='Error in lookup of Dense parameters')
    np.testing.assert_array_equal(W2, np.ones((3,2)) * 2, err_msg='Error in lookup of Dense parameters')
Beispiel #2
0
def create_model(input_dim):
    row = sequence.input_variable(shape=input_dim)
    col = sequence.input_variable(shape=input_dim)
    rowh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(row)
    colh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(col)

    x = C.splice(rowh, colh, axis=-1)
    x = lightlstm(opt.embed, opt.nhid)(x)
    x = For(range(opt.layer-1), lambda: lightlstm(opt.nhid, opt.nhid))(x)
    rowh = C.slice(x, -1, opt.nhid * 0, opt.nhid * 1)
    colh = C.slice(x, -1, opt.nhid * 1, opt.nhid * 2)

    row_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(rowh)
    col_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(colh)

    # variable : row label and col label
    row_label = sequence.input_variable(shape=input_dim)
    col_label = sequence.input_variable(shape=input_dim)
    model = C.combine([row_predict, col_predict])

    return {'row':       row,
            'col':       col,
            'row_label': row_label,
            'col_label': col_label,
            'model':     model}
Beispiel #3
0
def test_lookup():
    model = Sequential([ Dense(3, init=1, name='first'), Dense(2, init=2, name='second')])
    model.update_signature((2,))
    W1 = model.first.W.value
    W2 = model.second.W.value
    np.testing.assert_array_equal(W1, np.ones((2,3)),     err_msg='Error in lookup of Dense parameters')
    np.testing.assert_array_equal(W2, np.ones((3,2)) * 2, err_msg='Error in lookup of Dense parameters')
Beispiel #4
0
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim):
    # Create the rnn that computes the latent representation for the next token.
    rnn_with_latent_output = Sequential([
        C.layers.Embedding(hidden_dim),
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
    ])

    # Apply it to the input sequence.
    latent_vector = rnn_with_latent_output(input_sequence)

    # Connect the latent output to (sampled/full) softmax.
    if use_sampled_softmax:
        weights = load_sampling_weights(token_frequencies_file_path)
        smoothed_weights = np.float32(np.power(weights, alpha))
        sampling_weights = C.reshape(C.Constant(smoothed_weights),
                                     shape=(1, vocab_dim))
        z, ce, errs = cross_entropy_with_sampled_softmax(
            latent_vector, label_sequence, vocab_dim, hidden_dim,
            softmax_sample_size, sampling_weights)
    else:
        z, ce, errs = cross_entropy_with_full_softmax(latent_vector,
                                                      label_sequence,
                                                      vocab_dim, hidden_dim)

    return z, ce, errs
Beispiel #5
0
    def __init__(self, input_shape, nb_actions,
                 gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.00025, momentum=0.95, minibatch_size=32,
                 memory_size=500000, train_after=10000, train_interval=4,
                 target_update_interval=10000, monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma
        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval
        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = RepMem(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Dense(input_shape, init=he_uniform(scale=0.01)),
                Dense(input_shape),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))])

        self._action_value_net.update_signature(Tensor[input_shape])

        self._target_net = self._action_value_net.clone(CloneMethod.freeze)


        @Function
        @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) + rewards,
            )

        @Function
        @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            q_targets = compute_q_targets(post_states, rewards, terminals)

            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            return huber_loss(q_targets, q_acted, 1.0)

        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters, lr_schedule,
                     momentum=m_schedule, variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
Beispiel #6
0
 def model(self, x):
     param1 = 500
     param2 = 250
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x)
     x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x)
     x = Dense(self.dim_y)(x)
     return x
Beispiel #7
0
def create_model(output_dim):

    return Sequential([
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
        Dense(output_dim)
    ])
Beispiel #8
0
def create_model():
    '''
    Creates the model to train
    :return: Returns the last output of a sequential model using LSTMs
    '''
    return Sequential([
        For(range(NUMBER_LAYERS),
            lambda: Sequential([Recurrence(LSTM(HIDDEN_LAYER_DIMENSIONS))])),
        sequence.last,
        Dense(NUM_OUTPUT_CLASSES)
    ])
Beispiel #9
0
def create_network():
    # Create the input and target variables
    input_var = cntk.input_variable(
        (sequence_length, frame_height, frame_width), name='input_var')
    target_var = cntk.input_variable((num_classes, ),
                                     is_sparse=True,
                                     name='target_var')

    input_head = cntk.slice(input_var, axis=0, begin_index=0, end_index=19)
    input_tail = cntk.slice(input_var, axis=0, begin_index=1, end_index=20)
    diff = input_tail - input_head

    model = Sequential([
        resnet_model(cntk.placeholder()),
        Label('resnet'),
        Dense(num_classes, name='output')
    ])(diff)

    return {
        'input': input_var,
        'target': target_var,
        'model': model,
        'loss': cntk.cross_entropy_with_softmax(model, target_var),
        'metric': cntk.classification_error(model, target_var)
    }
Beispiel #10
0
def LSTM_sequence_classifier_net(input, num_output_classes, embedding_dim,
                                LSTM_dim, cell_dim):
    lstm_classifier = Sequential([Embedding(embedding_dim),
                                  Recurrence(LSTM(LSTM_dim, cell_dim)),
                                  sequence.last,
                                  Dense(num_output_classes)])
    return lstm_classifier(input)
def test_depth_first_search_blocks(depth, prefix_count):
    from cntk.layers import Sequential, Convolution, MaxPooling, Dense
    from cntk.default_options import default_options

    def Blocked_Dense(dim, activation=None):
        dense = Dense(dim, activation=activation)

        @C.layers.BlockFunction('blocked_dense', 'blocked_dense')
        def func(x):
            return dense(x)

        return func

    with default_options(activation=C.relu):
        image_to_vec = Sequential([
            Convolution((5, 5), 32, pad=True),
            MaxPooling((3, 3), strides=(2, 2)),
            Dense(10, activation=None),
            Blocked_Dense(10)
        ])

    in1 = C.input_variable(shape=(3, 256, 256), name='image')
    img = image_to_vec(in1)

    found = C.logging.graph.depth_first_search(img,
                                               lambda x: True,
                                               depth=depth)
    found_str = [str(v) for v in found]

    assert len(found) == sum(prefix_count.values())
    for prefix, count in prefix_count.items():
        assert sum(f.startswith(prefix) for f in found_str) == count
Beispiel #12
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.input_variable(((2 * context + 1) * feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.adam_sgd(z.parameters,
                         lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                     epoch_size),
                         momentum=C.momentum_as_time_constant_schedule(1000),
                         low_memory=True,
                         gradient_clipping_threshold_per_sample=15,
                         gradient_clipping_with_truncation=True)
    progress_printer = C.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
Beispiel #13
0
    def create_model(self):

        mean_removed_features = minus(self.input,
                                      constant(114),
                                      name='mean_removed_input')

        with default_options(activation=None, pad=True, bias=True):
            self.model = Sequential([
                Convolution2D((11, 11),
                              96,
                              init=normal(0.01),
                              pad=False,
                              name='conv1'),
                Activation(activation=relu, name='relu1'),
                self.__local_response_normalization(1.0,
                                                    2,
                                                    0.0001,
                                                    0.75,
                                                    name='norm1'),
                MaxPooling((3, 3), (2, 2), name='pool1'),
                Convolution2D((5, 5),
                              192,
                              init=normal(0.01),
                              init_bias=0.1,
                              name='conv2'),
                Activation(activation=relu, name='relu2'),
                self.__local_response_normalization(1.0,
                                                    2,
                                                    0.0001,
                                                    0.75,
                                                    name='norm2'),
                MaxPooling((3, 3), (2, 2), name='pool2'),
                Convolution2D((3, 3), 384, init=normal(0.01), name='conv3'),
                Activation(activation=relu, name='relu3'),
                Convolution2D((3, 3),
                              384,
                              init=normal(0.01),
                              init_bias=0.1,
                              name='conv4'),
                Activation(activation=relu, name='relu4'),
                Convolution2D((3, 3),
                              256,
                              init=normal(0.01),
                              init_bias=0.1,
                              name='conv5'),
                Activation(activation=relu, name='relu5'),
                MaxPooling((3, 3), (2, 2), name='pool5'),
                Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'),
                Activation(activation=relu, name='relu6'),
                Dropout(0.5, name='drop6'),
                Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'),
                Activation(activation=relu, name='relu7'),
                Dropout(0.5, name='drop7'),
                Dense(self.number_labels, init=normal(0.01), name='fc8')
            ])(mean_removed_features)
Beispiel #14
0
def create_alexnet():

    # Input variables denoting the features and label data
    feature_var = input_variable((num_channels, image_height, image_width))
    label_var = input_variable((num_classes))

    # apply model to input
    # remove mean value
    input = minus(feature_var, constant(114), name='mean_removed_input')

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU)
            Convolution2D((11,11), 96, init=normal(0.01), pad=False, strides=(4,4), name='conv1'),
            Activation(activation=relu, name='relu1'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'),
            MaxPooling((3,3), (2,2), name='pool1'),

            Convolution2D((5,5), 192, init=normal(0.01), init_bias=0.1, name='conv2'),
            Activation(activation=relu, name='relu2'),
            LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'),
            MaxPooling((3,3), (2,2), name='pool2'),

            Convolution2D((3,3), 384, init=normal(0.01), name='conv3'),
            Activation(activation=relu, name='relu3'),
            Convolution2D((3,3), 384, init=normal(0.01), init_bias=0.1, name='conv4'),
            Activation(activation=relu, name='relu4'),
            Convolution2D((3,3), 256, init=normal(0.01), init_bias=0.1, name='conv5'),
            Activation(activation=relu, name='relu5'),
            MaxPooling((3,3), (2,2), name='pool5'),

            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(0.5, name='drop6'),
            Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(0.5, name='drop7'),
            Dense(num_classes, init=normal(0.01), name='fc8')
            ])(input)

    # loss and metric
    ce  = cross_entropy_with_softmax(z, label_var)
    pe  = classification_error(z, label_var)
    pe5 = classification_error(z, label_var, topN=5)

    log_number_of_parameters(z) ; print()

    return {
        'feature': feature_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'pe5': pe5,
        'output': z
    }
def create_model(input_dim, output_dim, hidden_dim, feature_input):    
    """
    Create a model with the layers library.
    """
    my_model = Sequential ([
            Dense(hidden_dim, tanh),
            Dense(output_dim)
            ])

    netout = my_model(feature_input)   
    return(netout)
Beispiel #16
0
def ffnet(learner, trainer=None):
    inputs = 5
    outputs = 3
    layers = 2
    hidden_dimension = 3

    if trainer is None:
        # input variables denoting the features and label data
        features = C.input_variable((inputs), np.float32)
        label = C.input_variable((outputs), np.float32)

        # Instantiate the feedforward classification model
        my_model = Sequential([
            Dense(hidden_dimension,
                  activation=C.sigmoid,
                  init=C.glorot_uniform(seed=98052)),
            Dense(outputs, init=C.glorot_uniform(seed=98052))
        ])
        z = my_model(features)

        ce = C.cross_entropy_with_softmax(z, label)
        pe = C.classification_error(z, label)

        # Instantiate the trainer object to drive the model training
        progress_printer = ProgressPrinter(0)
        trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)],
                            [progress_printer])
    else:
        features = trainer.loss_function.arguments[0]
        label = trainer.loss_function.arguments[1]

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 100

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
    avg_error = trainer.test_minibatch({
        features: test_features,
        label: test_labels
    })
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error, trainer
Beispiel #17
0
def clone_conv_layers(base_model):
    if not globalvars['train_conv']:
        conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
    elif feature_node_name == start_train_conv_node_name:
        conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.clone)
    else:
        fixed_conv_layers = clone_model(base_model, [feature_node_name], [start_train_conv_node_name],
                                        CloneMethod.freeze)
        train_conv_layers = clone_model(base_model, [start_train_conv_node_name], [last_conv_node_name],
                                        CloneMethod.clone)
        conv_layers = Sequential([fixed_conv_layers, train_conv_layers])
    return conv_layers
def create_convnet_cifar10_model(num_classes):
    with default_options(activation=relu, pad=True):
        return Sequential([
            For(
                range(2), lambda: [
                    Convolution2D((3, 3), 64),
                    Convolution2D((3, 3), 64),
                    MaxPooling((3, 3), strides=2)
                ]),
            For(range(2), lambda i: [Dense([256, 128][i]),
                                     Dropout(0.5)]),
            Dense(num_classes, activation=None)
        ])
def test_sequential_constructor(input_data):
    x = C.input_variable(len(input_data))
    np_data = np.asarray(input_data, np.float32)

    seq_layers = Sequential([abs, sqrt, square, cos])(x)

    assert seq_layers.shape == x.shape

    res = seq_layers(np_data)

    expected_res = np.cos(np.square(np.sqrt(np.abs(np_data))))

    np.testing.assert_array_almost_equal(res[0], expected_res, decimal=4)
Beispiel #20
0
def ffnet():
    inputs = 3
    outputs = 3
    layers = 2
    hidden_dimension = 3

    # input variables denoting the features and label data
    features = C.input((inputs), np.float32)
    label = C.input((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential(
        [Dense(hidden_dimension, activation=C.sigmoid),
         Dense(outputs)])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
    progress_printer = ProgressPrinter(0)
    trainer = C.Trainer(z, (ce, pe), [
        sgd(z.parameters,
            lr=lr_per_minibatch,
            gaussian_noise_injection_std_dev=0.01)
    ], [progress_printer])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 100

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
    avg_error = trainer.test_minibatch({
        features: test_features,
        label: test_labels
    })
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error
Beispiel #21
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in, ))
        self.label = C.sequence.input_variable(shape=(self.n_out, ))

        self.three_dnn = Sequential([
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh)
        ])
        self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.final_dnn = Dense(self.n_out)

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters,
                                      lr=self.lr_s,
                                      momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err),
                                 [self.learner])
def create_model(feature_dimensions, classes):
    with default_options(activation=relu, init=glorot_uniform()):
        model = Sequential([
            For(
                range(3), lambda i: [
                    Convolution((5, 5), [32, 32, 64][i], pad=True),
                    BatchNormalization(map_rank=1),
                    MaxPooling((3, 3), strides=(2, 2))
                ]),
            Dense(64),
            BatchNormalization(map_rank=1),
            Dense(len(classes), activation=None)
        ])

    return model(feature_dimensions)
Beispiel #23
0
def clone_conv_layers(base_model, cfg):
    feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME
    start_train_conv_node_name = cfg["MODEL"].START_TRAIN_CONV_NODE_NAME
    last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME
    if not cfg.TRAIN_CONV_LAYERS:
        conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze)
    elif feature_node_name == start_train_conv_node_name:
        conv_layers = clone_model(base_model, [feature_node_name], [last_conv_node_name], CloneMethod.clone)
    else:
        fixed_conv_layers = clone_model(base_model, [feature_node_name], [start_train_conv_node_name],
                                        CloneMethod.freeze)
        train_conv_layers = clone_model(base_model, [start_train_conv_node_name], [last_conv_node_name],
                                        CloneMethod.clone)
        conv_layers = Sequential([fixed_conv_layers, train_conv_layers])
    return conv_layers
Beispiel #24
0
def ffnet(optimizer, num_minibatches_to_train, learning_rate_func, lr_args,
          learner_kwargs):
    inputs = 2
    outputs = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential([
        Dense(hidden_dimension,
              activation=C.sigmoid,
              init=C.glorot_uniform(seed=SEED)),
        Dense(outputs, init=C.glorot_uniform(seed=SEED))
    ])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr = learning_rate_func(0.125, *lr_args)
    progress_printer = ProgressPrinter(0)
    learner = optimizer(z.parameters, lr) if optimizer != sgd else sgd(
        z.parameters, lr, **learner_kwargs)

    trainer = C.Trainer(z, (ce, pe), [learner], progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})

    test_features, test_labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
    avg_error = trainer.test_minibatch({
        features: test_features,
        label: test_labels
    })
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return z.parameters
Beispiel #25
0
def create_vgg16(feature_var, num_classes, dropout=0.9):

    with default_options(activation=None, pad=True, bias=True):
        z = Sequential([
            # we separate Convolution and ReLU to name the output for feature
            # extraction (usually before ReLU)
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 64, name='conv1_{}'.format(i)),
                    Activation(activation=relu, name='relu1_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool1'),
            For(
                range(2), lambda i: [
                    Convolution2D((3, 3), 128, name='conv2_{}'.format(i)),
                    Activation(activation=relu, name='relu2_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool2'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 256, name='conv3_{}'.format(i)),
                    Activation(activation=relu, name='relu3_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool3'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv4_{}'.format(i)),
                    Activation(activation=relu, name='relu4_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool4'),
            For(
                range(3), lambda i: [
                    Convolution2D((3, 3), 512, name='conv5_{}'.format(i)),
                    Activation(activation=relu, name='relu5_{}'.format(i)),
                ]),
            MaxPooling((2, 2), (2, 2), name='pool5'),
            Dense(4096, name='fc6'),
            Activation(activation=relu, name='relu6'),
            Dropout(dropout, name='drop6'),
            Dense(4096, name='fc7'),
            Activation(activation=relu, name='relu7'),
            Dropout(dropout, name='drop7'),
            Dense(num_classes, name='fc8')
        ])(feature_var)

    return z
Beispiel #26
0
def ffnet():
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    feature = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    netout = Sequential([
        For(range(num_hidden_layers),
            lambda i: Dense(hidden_layers_dim, activation=sigmoid)),
        Dense(num_output_classes)
    ])(feature)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    lr_per_minibatch = learning_parameter_schedule(0.5)
    # Instantiate the trainer object to drive the model training
    learner = sgd(netout.parameters, lr=lr_per_minibatch)
    progress_printer = ProgressPrinter(128)
    trainer = Trainer(netout, (ce, pe), learner, progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(1024):
        features, labels = generate_random_data(minibatch_size, input_dim,
                                                num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({feature: features, label: labels})

    trainer.summarize_training_progress()
    test_features, test_labels = generate_random_data(minibatch_size,
                                                      input_dim,
                                                      num_output_classes)
    avg_error = trainer.test_minibatch({
        feature: test_features,
        label: test_labels
    })
    return avg_error
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input_variable(((2*context+1)*feature_dim))
    labels = sequence.input_variable((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
Beispiel #28
0
def create_advanced_model(input, out_dims):
    
    with default_options(activation=relu):
        model = Sequential([
            For(range(2), lambda i: [  # lambda with one parameter
                Convolution((3,3), [32,64][i], pad=True),  # depth depends on i
                Convolution((5,5), [32,64][i], pad=True),
                Convolution((9,9), [32,64][i], pad=True),            
                MaxPooling((3,3), strides=(2,2))
            ]),
            For(range(2), lambda : [   # lambda without parameter
                Dense(512),
                Dropout(0.5)
            ]),
            Dense(out_dims, activation=None)
        ])
    output_layer=model(input)
    
    return output_layer
Beispiel #29
0
def test_depth_first_search_blocks(depth, prefix_count):
    from cntk.layers import Sequential, Convolution, MaxPooling, Dense
    from cntk.default_options import default_options

    with default_options(activation=relu):
        image_to_vec = Sequential([
            Convolution((5, 5), 32, pad=True),
            MaxPooling((3, 3), strides=(2, 2)),
            Dense(10, activation=None)
        ])

    in1 = input(shape=(3, 256, 256), name='image')
    img = image_to_vec(in1)

    found = depth_first_search(img, lambda x: True, depth=depth)
    found_str = [str(v) for v in found]

    assert len(found) == sum(prefix_count.values())
    for prefix, count in prefix_count.items():
        assert sum(f.startswith(prefix) for f in found_str) == count
Beispiel #30
0
def ffnet(optimizer):
    inputs = 2
    outputs = 2
    layers = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential ([
                    Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=98052)),
                    Dense(outputs, init=C.glorot_uniform(seed=98052))])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
    trainer = C.Trainer(z, (ce, pe), [optimizer(z.parameters, lr_per_minibatch)])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 63

    pp = ProgressPrinter(0)
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        pp.update_with_trainer(trainer)

    last_avg_error = pp.avg_loss_since_start()

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return z.parameters
Beispiel #31
0
def create_network():
    input_var = cntk.sequence.input_variable((num_channels, frame_height, frame_width), name='input_var')
    target_var = cntk.input_variable((num_classes,), is_sparse=True, name='target_var')

    with cntk.layers.default_options(enable_self_stabilization=True):
        model = Sequential([
            resnet_model(cntk.placeholder()), Label('resnet'),
            Dense(hidden_dim, name='cnn_fc'),
            cntk.layers.Stabilizer(),
            bidirectional_recurrence(LSTM(hidden_dim // 2), LSTM(hidden_dim // 2)),
            cntk.sequence.last,
            BatchNormalization(),
            Dense(num_classes)
        ])(input_var)

    return {
        'input': input_var,
        'target': target_var,
        'model': model,
        'loss': cntk.cross_entropy_with_softmax(model, target_var),
        'metric': cntk.classification_error(model, target_var)
    }
Beispiel #32
0
class DeepQAgent(object):
    """
    Implementation of Deep Q Neural Network agent like in:
        Nature 518. "Human-level control through deep reinforcement learning" (Mnih & al. 2015)
    """
    def __init__(self, input_shape, nb_actions,
                 gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.00025, momentum=0.95, minibatch_size=32,
                 memory_size=500000, train_after=10000, train_interval=4, target_update_interval=10000,
                 monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma

        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval

        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = ReplayMemory(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0

        # Metrics accumulator
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        # Action Value model (used by agent to interact with the environment)
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Convolution2D((8, 8), 16, strides=4),
                Convolution2D((4, 4), 32, strides=2),
                Convolution2D((3, 3), 32, strides=1),
                Dense(256, init=he_uniform(scale=0.01)),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))
            ])
        self._action_value_net.update_signature(Tensor[input_shape])

        # Target model used to compute the target Q-values in training, updated
        # less frequently for increased stability.
        self._target_net = self._action_value_net.clone(CloneMethod.freeze)

        # Function computing Q-values targets as part of the computation graph
        @Function
        @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) + rewards,
            )

        # Define the loss, using Huber Loss (more robust to outliers)
        @Function
        @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters, lr_schedule,
                     momentum=m_schedule, variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)

    def act(self, state):
        """ This allows the agent to select the next action to perform in regard of the current state of the environment.
        It follows the terminology used in the Nature paper.

        Attributes:
            state (Tensor[input_shape]): The current environment state

        Returns: Int >= 0 : Next action to do
        """
        # Append the state to the short term memory (ie. History)
        self._history.append(state)

        # If policy requires agent to explore, sample random action
        if self._explorer.is_exploring(self._num_actions_taken):
            action = self._explorer(self.nb_actions)
        else:
            # Use the network to output the best action
            env_with_history = self._history.value
            q_values = self._action_value_net.eval(
                # Append batch axis with only one sample to evaluate
                env_with_history.reshape((1,) + env_with_history.shape)
            )

            self._episode_q_means.append(np.mean(q_values))
            self._episode_q_stddev.append(np.std(q_values))

            # Return the value maximizing the expected reward
            action = q_values.argmax()

        # Keep track of interval action counter
        self._num_actions_taken += 1
        return action

    def observe(self, old_state, action, reward, done):
        """ This allows the agent to observe the output of doing the action it selected through act() on the old_state

        Attributes:
            old_state (Tensor[input_shape]): Previous environment state
            action (int): Action done by the agent
            reward (float): Reward for doing this action in the old_state environment
            done (bool): Indicate if the action has terminated the environment
        """
        self._episode_rewards.append(reward)

        # If done, reset short term memory (ie. History)
        if done:
            # Plot the metrics through Tensorboard and reset buffers
            if self._metrics_writer is not None:
                self._plot_metrics()
            self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

            # Reset the short term memory
            self._history.reset()

        # Append to long term memory
        self._memory.append(old_state, action, reward, done)

    def train(self):
        """ This allows the agent to train itself to better understand the environment dynamics.
        The agent will compute the expected reward for the state(t+1)
        and update the expected reward at step t according to this.

        The target expectation is computed through the Target Network, which is a more stable version
        of the Action Value Network for increasing training stability.

        The Target Network is a frozen copy of the Action Value Network updated as regular intervals.
        """

        agent_step = self._num_actions_taken

        if agent_step >= self._train_after:
            if (agent_step % self._train_interval) == 0:
                pre_states, actions, post_states, rewards, terminals = self._memory.minibatch(self._minibatch_size)

                self._trainer.train_minibatch(
                    self._trainer.loss_function.argument_map(
                        pre_states=pre_states,
                        actions=Value.one_hot(actions.reshape(-1, 1).tolist(), self.nb_actions),
                        post_states=post_states,
                        rewards=rewards,
                        terminals=terminals
                    )
                )

                # Update the Target Network if needed
                if (agent_step % self._target_update_interval) == 0:
                    self._target_net = self._action_value_net.clone(CloneMethod.freeze)
                    filename = "models\model%d" % agent_step
                    self._trainer.save_checkpoint(filename)

    def _plot_metrics(self):
        """Plot current buffers accumulated values to visualize agent learning
        """
        if len(self._episode_q_means) > 0:
            mean_q = np.asscalar(np.mean(self._episode_q_means))
            self._metrics_writer.write_value('Mean Q per ep.', mean_q, self._num_actions_taken)

        if len(self._episode_q_stddev) > 0:
            std_q = np.asscalar(np.mean(self._episode_q_stddev))
            self._metrics_writer.write_value('Mean Std Q per ep.', std_q, self._num_actions_taken)

        self._metrics_writer.write_value('Sum rewards per ep.', sum(self._episode_rewards), self._num_actions_taken)
Beispiel #33
0
    def __init__(self, input_shape, nb_actions,
                 gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000),
                 learning_rate=0.00025, momentum=0.95, minibatch_size=32,
                 memory_size=500000, train_after=10000, train_interval=4, target_update_interval=10000,
                 monitor=True):
        self.input_shape = input_shape
        self.nb_actions = nb_actions
        self.gamma = gamma

        self._train_after = train_after
        self._train_interval = train_interval
        self._target_update_interval = target_update_interval

        self._explorer = explorer
        self._minibatch_size = minibatch_size
        self._history = History(input_shape)
        self._memory = ReplayMemory(memory_size, input_shape[1:], 4)
        self._num_actions_taken = 0

        # Metrics accumulator
        self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], []

        # Action Value model (used by agent to interact with the environment)
        with default_options(activation=relu, init=he_uniform()):
            self._action_value_net = Sequential([
                Convolution2D((8, 8), 16, strides=4),
                Convolution2D((4, 4), 32, strides=2),
                Convolution2D((3, 3), 32, strides=1),
                Dense(256, init=he_uniform(scale=0.01)),
                Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))
            ])
        self._action_value_net.update_signature(Tensor[input_shape])

        # Target model used to compute the target Q-values in training, updated
        # less frequently for increased stability.
        self._target_net = self._action_value_net.clone(CloneMethod.freeze)

        # Function computing Q-values targets as part of the computation graph
        @Function
        @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def compute_q_targets(post_states, rewards, terminals):
            return element_select(
                terminals,
                rewards,
                gamma * reduce_max(self._target_net(post_states), axis=0) + rewards,
            )

        # Define the loss, using Huber Loss (more robust to outliers)
        @Function
        @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions],
                   post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()])
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._action_value_net.parameters, lr_schedule,
                     momentum=m_schedule, variance_momentum=vm_schedule)

        self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None
        self._learner = l_sgd
        self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)