예제 #1
0
    def criterion(self):

        # hyperparameters
        lambda_val = 0.5

        # Margin loss
        left = ct.square(ct.relu(0.9 - self.length))
        right = ct.square(ct.relu(self.length - 0.1))
        left = ct.reshape(left, (-1))
        right = ct.reshape(right, (-1))
        lc = self.labels * left + lambda_val * (1 - self.labels) * right

        margin_loss = ct.reduce_sum(lc, axis=0)
        margin_loss = ct.reduce_mean(margin_loss, axis=ct.axis.Axis.default_batch_axis())

        # classification_error
        predict = ct.softmax(self.length, axis=0)
        error = ct.classification_error(ct.reshape(predict, (10)), self.labels)

        total_loss = margin_loss
        reconstruction_err = 0

        if self.use_reconstruction:
            features = ct.reshape(self.features, shape=(-1,))
            encoder = ct.reshape(self.training_model, shape=(-1,))
            squared = ct.square(encoder - features)
            reconstruction_err = ct.reduce_mean(squared, axis=0)
            reconstruction_err = ct.reduce_mean(reconstruction_err, axis=ct.axis.Axis.default_batch_axis())
            total_loss = margin_loss + (0.0005*784) * reconstruction_err

        return total_loss, error
예제 #2
0
 def instance_normalization(x):
     mean = C.reduce_mean(x, axis=(1, 2))
     x0 = x - mean
     std = C.sqrt(C.reduce_mean(x0 * x0, axis=(1, 2)))
     if epsilon != 0:
         std += epsilon
     x_hat = x0 / std
     return x_hat * C.reshape(scale, (-1, 1, 1)) + C.reshape(bias, (-1, 1, 1))
def total_variation_loss(x):
    xx = C.reshape(x, (1,)+x.shape)
    delta = np.array([-1, 1], dtype=np.float32)
    kh = C.constant(value=delta.reshape(1, 1, 1, 1, 2))
    kv = C.constant(value=delta.reshape(1, 1, 1, 2, 1))
    dh = C.convolution(kh, xx, auto_padding=[False])
    dv = C.convolution(kv, xx, auto_padding=[False])
    avg = 0.5 * (C.reduce_mean(C.square(dv)) + C.reduce_mean(C.square(dh)))
    return avg
예제 #4
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
예제 #5
0
def test_gather_op(device_id, precision):
    a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])]
    a = C.input_variable((2,1))
    r_data = np.arange(12).reshape(6,2).astype('f')
    r = C.parameter(shape=r_data.data, init=r_data)
    res = C.gather(r, a).eval({a:a_data})
    expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]])
    assert np.array_equal(res, expectd)

    grads = C.gather(r, a).grad({a:a_data}, [r])
    expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32)
    assert np.array_equal(grads, expectd_grad)

    #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed)
    indices_params = C.parameter(shape=(1,), init=1.0)
    grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params])
    assert np.array_equal(grads[r], expectd_grad)
    assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32))


    b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]),
              AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])]
    b = C.input_variable((2,2))
    res2 = C.gather(r, b).eval({b:b_data})

    expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]])
    assert np.array_equal(res2, expectd2)

    #the following small model is to test the memory reuse issue of gather node.
    x = C.input((3, 4))
    x1 = C.to_sequence(x)
    w = C.parameter((5, 6), init=1)
    z = C.gather(w, x1)
    assert z.shape == (4, 6)
    #need the unpack node to trigger memory reuse.
    f = C.sequence.unpack(z, 0, no_mask_output=True)
    y = C.input((3, 4, 6))
    loss = C.reduce_mean(C.square(f - y), axis=-1)
    loss = C.reduce_mean(loss, axis=C.Axis.all_axes())

    g = C.constant(0, shape=w.shape)
    u = C.assign(w, g + 1)
    learner = C.cntk_py.universal_learner([w], [g], u)
    trainer = C.trainer.Trainer(loss, [loss], [learner])
    indices = np.asarray([[[1, 2, 1, 2]]])
    input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0)
    lable = np.full((10, 3, 4, 6), 2)
    trainer.train_minibatch({x: input, y: lable})
    # the 2nd and 3rd rows should be udpated by gradients.
    assert np.mean(w.value[1, :]) < 1
    assert np.mean(w.value[2, :]) < 1
    # the other three rows should keep as 1
    assert np.isclose(np.mean(w.value[0, :]), 1)
    assert np.isclose(np.mean(w.value[3, :]), 1)
    assert np.isclose(np.mean(w.value[4, :]), 1)
예제 #6
0
def __cntk_cov2__(m):
    m = C.reshape(m, -1)
    m = C.unpack_batch(m)

    m = C.transpose(m, [1, 0])

    count = C.reduce_sum(C.reduce_mean(C.ones_like(m), axis=0))

    fact = 1.0 / (count - 1)
    m -= C.reduce_mean(m, axis=1)
    mt = C.transpose(m, [1, 0])
    return fact * C.squeeze(m @ mt)
예제 #7
0
 def create_model(self):
     hidden_layers = self._hidden_layers
     with C.layers.default_options(init=C.layers.glorot_uniform(),
                                   activation=C.ops.relu):
         h = self._input
         for i in range(self._num_hidden_layers):
             h = C.layers.Dense(hidden_layers[i])(h)
         model = C.layers.Dense(self._output_size, activation=None)(h)
         loss = C.reduce_mean(C.square(model - self._output), axis=0)
         meas = C.reduce_mean(C.square(model - self._output), axis=0)
         learner = C.adadelta(model.parameters, self._lr_schedule)
         trainer = C.Trainer(model, (loss, meas), learner)
         return model, loss, learner, trainer
예제 #8
0
 def create_model(self):
     hidden_layers = self._hidden_layers
     with cntk.layers.default_options(init=cntk.layers.glorot_uniform(),
                                      activation=cntk.ops.relu):
         h = self._input
         for i in range(self._num_hidden_layers):
             h = cntk.layers.Dense(hidden_layers[i],
                                   activation=cntk.ops.relu)(h)
         model = cntk.layers.Dense(self._output_size, activation=None)(h)
         loss = cntk.reduce_mean(cntk.square(model - self._output), axis=0)
         meas = cntk.reduce_mean(cntk.square(model - self._output), axis=0)
         learner = cntk.adadelta(model.parameters,
                                 self._lr_schedule,
                                 l2_regularization_weight=0.01)
         trainer = cntk.Trainer(model, (loss, meas), learner)
         return model, loss, learner, trainer
예제 #9
0
def implementing_1d_convnet_cntk():
    max_features = 10000  # number of words to consider as features
    max_len = 500  # cut texts after this number of words (among top max_features most common words)
    x_train, y_train, x_test, y_test = load_data(max_features, max_len)

    model = build_model_cntk(max_features, max_len)
    x = cntk.input_variable(shape=(max_len, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model.replace_placeholders({model.placeholders[0]: x})

    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
예제 #10
0
def learning_word_embeddings_with_the_embedding_layer_cntk():
    x_train, y_train, x_test, y_test = load_from_files()

    max_features = 10000
    maxlen = 20
    embedding_dim = 8

    x = cntk.input_variable(shape=(maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x, num_classes=max_features, sparse_output=True)
    model = cntk.layers.Embedding(embedding_dim)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 30
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
def test_normal_diff_along_batch(arg0, arg1, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    N = 1000
    B = 10.0 / np.sqrt(N)
    x = C.sequence.input_variable(1, dtype=dt)
    x0 = np.zeros((N, 2, 1), dtype=dt)
    z = cr.normal_like(x, arg0, arg1, seed=98052)
    diff = C.sequence.first(z) - C.sequence.last(z)
    mean = C.reduce_mean(diff, axis=C.Axis.all_axes())
    var = C.reduce_mean(diff * diff, axis=C.Axis.all_axes())
    expr = C.combine([mean, var])
    values = expr.eval({x: x0}, device=dev)
    assert np.abs(values[mean.output]) < B
    assert np.abs(values[var.output] - 2 * arg1 * arg1) < np.sqrt(2) * arg1 * B
예제 #12
0
def create_word2vec_cbow_model(word_one_hot, context_one_hots, negative_one_hots):
	# shared_embedding_layer = Embedding(G.embedding_dimension, uniform(scale=1.0/2.0/G.embedding_dimension))
	shared_embedding_layer = Embedding(G.embedding_dimension)

	word_embedding = shared_embedding_layer(word_one_hot)
	context_embeddings = [shared_embedding_layer(x) for x in context_one_hots]
	negative_embeddings = [shared_embedding_layer(x) for x in negative_one_hots]

	print(word_embedding.shape)
	word_embedding_reshaped = C.reshape(word_embedding, shape=(1, G.embedding_dimension))
	print(word_embedding_reshaped.shape)

	context_embeddings_all = C.reshape(C.splice(*context_embeddings), shape=(context_size, G.embedding_dimension))
	negative_embeddings_all = C.reshape(C.splice(*negative_embeddings), shape=(G.negative, G.embedding_dimension))
	print(context_embeddings_all.shape)
	print(negative_embeddings_all.shape)
	cbow = C.reshape(C.reduce_mean(context_embeddings_all, 0), shape=(G.embedding_dimension))
	print(cbow.shape)

	# word_context_product = C.times_transpose(word_embedding_reshaped, cbow)
	word_context_product = C.times_transpose(word_embedding, cbow)
	print(word_context_product.shape)
	negative_context_product = C.reshape(C.times_transpose(negative_embeddings_all, cbow), shape=(G.negative))
	print(negative_context_product.shape)

	word_negative_context_product = C.splice(word_context_product, negative_context_product)
	print(word_negative_context_product.shape)
	# return model and shared embedding layer
	return word_negative_context_product, shared_embedding_layer
예제 #13
0
def test_ReduceMean(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        data = np.array(
            [[[5, 1], [20, 2]], [[30, 1], [40, 2]], [[55, 1], [60, 2]]],
            dtype=dtype)
        model = C.reduce_mean(data, 0)
        verify_no_input(model, tmpdir, 'ReduceMean_0')
예제 #14
0
def dice_coefficient(x, y):
    # average of per-channel dice coefficient
    # global dice coefificnet doesn't work as class with larger region dominates the metrics
    # https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
    intersection = C.reduce_sum(x * y, axis=(1,2))

    return C.reduce_mean(2.0 * intersection / (C.reduce_sum(x, axis=(1,2)) + C.reduce_sum(y, axis=(1,2)) + 1.0))
예제 #15
0
    def __get_trainer_loss(self, model, action_count):
        q_target = C.sequence.input_variable(action_count, np.float32)

        # loss='mse'
        loss = C.reduce_mean(C.square(model - q_target), axis=0)
        meas = C.reduce_mean(C.square(model - q_target), axis=0)

        # optimizer
        lr_schedule = C.learning_rate_schedule(LEARNING_RATE,
                                               C.UnitType.minibatch)
        learner = C.sgd(model.parameters,
                        lr_schedule,
                        gradient_clipping_threshold_per_sample=10)
        trainer = C.Trainer(model, (loss, meas), learner)

        return trainer, loss
예제 #16
0
def test_normal_diff_along_batch(arg0, arg1, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    N = 1000
    B = 10.0 / np.sqrt(N)
    x  = C.sequence.input_variable(1, dtype=dt)
    x0 = np.zeros((N,2,1), dtype=dt)
    z = cr.normal_like(x, arg0, arg1, seed=98052)
    diff = C.sequence.first(z)-C.sequence.last(z)
    mean = C.reduce_mean(diff, axis=C.Axis.all_axes())
    var  = C.reduce_mean(diff*diff, axis=C.Axis.all_axes())
    expr = C.combine([mean, var])
    values = expr.eval({x:x0}, device=dev)
    assert np.abs(values[mean.output]) < B
    assert np.abs(values[var.output] - 2*arg1*arg1) < np.sqrt(2)*arg1*B
예제 #17
0
def dice_coefficient(x, y):
    # average of per-channel dice coefficient

    intersection = C.reduce_sum(x * y, axis=(1, 2))

    return C.reduce_mean(
        2.0 * intersection /
        (C.reduce_sum(x, axis=(1, 2)) + C.reduce_sum(y, axis=(1, 2)) + 1.0))
예제 #18
0
def layer_normalization(inputs: C.Function,
                        name='layer_normalization') -> C.Function:
    X = C.placeholder(
        inputs.shape,
        (C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()),
        name=name + '_ph')

    mu = C.reduce_mean(X, name='mu')
    sigma = C.sqrt(C.reduce_mean(C.square(X - mu)), name='sigma')

    result = (X - mu) / sigma

    #region scale + bias
    scale = C.parameter(inputs.shape, init=1, name='scale')
    bias = C.parameter(inputs.shape, init=0, name='bias')
    result = result * scale + bias
    #endregion

    block = C.as_block(result, [(X, X)], name)

    return block(inputs)
예제 #19
0
def __cntk_cov__(m, rowvar: bool = False):
    if len(m.shape) > 2:
        raise ValueError('m has more than 2 dimensions')
    if len(m.shape) < 2:
        m = C.reshape(m, (1, -1))
    if not rowvar and m.shape[0] != 1:
        m = C.transpose(m, [1, 0])

    fact = 1.0 / (m.shape[1] - 1)
    m -= C.reduce_mean(m, axis=1)
    mt = C.transpose(m, [1, 0])
    return fact * C.squeeze(m @ mt)
예제 #20
0
def criteria(label, output, block_size, c_classes, weights):
    ''' Define the loss function and metric '''
    probs = cntk.softmax(output, axis=0)
    log_probs = cntk.log(probs)
    ce = cntk.times(weights,
                    -cntk.element_times(log_probs, label),
                    output_rank=2)
    mean_ce = cntk.reduce_mean(ce)
    _, w, h = label.shape
    pe = cntk.classification_error(probs, label, axis=0) - \
     cntk.reduce_sum(cntk.slice(label, 0, 0, 1)) / cntk.reduce_sum(label)
    return (mean_ce, pe)
예제 #21
0
    def Loss(self):
        # Evaluating old actions and values :
        logprobs, state_value, dist_entropy = self.policy.evaluate()

        # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling)
        c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs')
        ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs))

        c_rewards = C.input_variable(1, name='rewards')
        advantages = c_rewards - C.stop_gradient(state_value)

        # Finding Surrogate Loss:
        surr1 = ratios * advantages
        surr2 = C.clip(ratios, 1 - self.eps_clip,
                       1 + self.eps_clip) * advantages
        neglog_loss = -C.element_min(surr1, surr2)
        entropy_loss = -0.01 * dist_entropy
        actor_loss = C.reduce_mean(neglog_loss + entropy_loss)
        critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards))
        loss = actor_loss + critic_loss

        chunk = {
            'neglog_loss': neglog_loss,
            'entropy_loss': entropy_loss,
            'actor_loss': actor_loss,
            'critic_loss': critic_loss
        }

        trainer = C.Trainer(
            loss, (loss, None),
            C.adam(loss.parameters,
                   C.learning_parameter_schedule_per_sample(self.lr),
                   C.momentum_schedule_per_sample(self.betas[0]),
                   variance_momentum=C.momentum_schedule_per_sample(
                       self.betas[1])))
        # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate

        return loss, chunk, trainer
예제 #22
0
def multivariate_kl_divergence(input_layer):
    _dim = input_layer.shape[0]

    out_value = C.unpack_batch(input_layer)
    _mu1 = C.transpose(C.reduce_mean(out_value, axis=0), [1, 0])
    _sigma1 = C.cov2(input_layer)

    _mu2 = C.zeros_like(_mu1)
    _sigma2 = C.Constant(np.eye(_dim))
    _sigma2_inv = _sigma2  # identity matrix

    return 0.5 * (C.log(C.det(_sigma2) / C.det(_sigma1)) - _dim +
                  C.trace(_sigma2_inv @ _sigma1) + C.transpose(
                      (_mu2 - _mu1), [1, 0]) @ _sigma2_inv @ (_mu2 - _mu1))
예제 #23
0
def test_data_resize():
    batch_size = 8
    w = C.parameter(shape=(3, 2), name='w1')
    x = C.input_variable(shape=[3], name='x')
    y = C.softmax(C.times(x, w))
    y = C.unpack_batch(y)
    y = C.reshape(y, [batch_size * 2])
    loss = C.reduce_mean(-C.log(y))

    learning_rate = 0.01
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0)
    trainer = C.Trainer(y, (loss), [learner])

    features = np.random.randn(batch_size, 3)
    trainer.train_minibatch({x: features})
예제 #24
0
def test_data_resize():
    batch_size = 8
    w = C.parameter(shape=(3, 2), name='w1')
    x = C.input_variable(shape=[3], name='x')
    y = C.softmax(C.times(x, w))
    y = C.unpack_batch(y)
    y = C.reshape(y, [batch_size * 2])
    loss = C.reduce_mean(-C.log(y))

    learning_rate = 0.01
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0)
    trainer = C.Trainer(y, (loss), [learner])

    features = np.random.randn(batch_size, 3)
    trainer.train_minibatch({x: features})
예제 #25
0
    def _create(self, hidden):
        observation = C.input_variable(STATE_COUNT, name="s")
        q_target = C.input_variable(ACTION_COUNT, name="q")

        model = C.layers.Dense(hidden, activation=C.relu)(observation)
        model = C.layers.Dense(ACTION_COUNT)(model)

        # loss='mse'
        loss = C.reduce_mean(C.square(model - q_target)) #, axis=0)

        # optimizer
        lr = 0.00025
        lr_schedule = C.learning_parameter_schedule(lr)
        learner = C.sgd(model.parameters, lr_schedule, gradient_clipping_threshold_per_sample=10)
        trainer = C.Trainer(model, (loss, None), learner)

        return model, trainer, loss
예제 #26
0
    def mae(self, z, l):
        ''' Small helpfunction implementing mae.
        Used as an error metric during optimization.
        (So far only used within all subclasses based on neural networks.)

        Parameters
        ----------
        z: vector<float>
            prediction
        l: vector<float>
            label
        
        Returns
        -------
        errors:
            mape
        '''
        return C.reduce_mean(C.abs(z - l))
예제 #27
0
def std_normalized_l2_loss(output, target):
    std_inv = np.array([
        6.6864805402, 5.2904440280, 3.7165409939, 4.1421640454, 8.1537399389,
        7.0312877415, 2.6712380967, 2.6372177876, 8.4253649884, 6.7482162880,
        9.0849960354, 10.2624412692, 3.1325531319, 3.1091179819, 2.7337937590,
        2.7336441031, 4.3542467871, 5.4896293687, 6.2003761588, 3.1290341469,
        5.7677042738, 11.5460919611, 9.9926451700, 5.4259818848, 20.5060642486,
        4.7692101480, 3.1681517575, 3.8582905289, 3.4222250436, 4.6828286809,
        3.0070785113, 2.8936539301, 4.0649030157, 25.3068458731, 6.0030623160,
        3.1151977458, 7.7773542649, 6.2057372469, 9.9494258692, 4.6865422850,
        5.3300697628, 2.7722027974, 4.0658663003, 18.1101618617, 3.5390113731,
        2.7794520068
    ],
                       dtype=np.float32)
    weights = C.constant(value=std_inv)  #.reshape((1, label_dim)))
    dif = output - target
    ret = C.reduce_mean(C.square(C.element_times(dif, weights)))
    return ret
예제 #28
0
def build_SRResNet_graph(lr_image_shape, hr_image_shape, net):
    inp_dynamic_axes = [C.Axis.default_batch_axis()]
    real_X = C.input(
        lr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_X")
    real_Y = C.input(
        hr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_Y")

    real_X_scaled = real_X/255
    real_Y_scaled = real_Y/255

    genG = net(real_X_scaled)

    G_loss = C.reduce_mean(C.square(real_Y_scaled - genG))

    G_optim = C.adam(G_loss.parameters,
                     lr=C.learning_rate_schedule(
                         [(1, 0.01), (1, 0.001), (98, 0.0001)], C.UnitType.minibatch, 10000),
                     momentum=C.momentum_schedule(0.9), gradient_clipping_threshold_per_sample=1.0)

    G_G_trainer = C.Trainer(genG, (G_loss, None), G_optim)

    return (real_X, real_Y, genG, real_X_scaled, real_Y_scaled, G_optim, G_G_trainer)
예제 #29
0
    def _create_model(self, input_dim, output_dim, hidden_dims):
        c_in = C.input_variable(input_dim, name='state')
        model = c_in

        for h in hidden_dims:
            model = C.layers.Dense(h, activation=C.relu)(model)
        model = C.layers.Dense(output_dim, activation=C.softmax)(model)

        c_action_prob = model
        c_action_onehot = C.input_variable(output_dim, name='action_onehot')
        c_reward = C.input_variable(1, name='reward')
        action_prob = C.reduce_sum(c_action_prob * c_action_onehot)
        log_action_prog = C.log(action_prob)
        loss = -log_action_prog * c_reward
        loss = C.reduce_mean(loss)

        lr = 1e-2
        lr_schedule = C.learning_parameter_schedule(lr)
        learner = C.adam(model.parameters, lr_schedule,
                         C.momentum_schedule(0.9))
        trainer = C.Trainer(model, (loss, None), learner)

        return model, loss, trainer
예제 #30
0
def use_glove_word_embeddings_cntk(preload_weights=False):
    tokenizer, x_train, y_train, x_val, y_val = from_raw_text_to_word_embeddings(
    )

    x = cntk.input_variable(shape=(Constants.maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x,
                         num_classes=Constants.max_words,
                         sparse_output=True)
    if preload_weights is True:
        embedding_matrix = compute_embedding_matrix(tokenizer)
        assert (Constants.embedding_dim
                == embedding_matrix.shape[0]) or (Constants.embedding_dim
                                                  == embedding_matrix.shape[1])
        model = cntk.layers.Embedding(weights=embedding_matrix)(model)
    else:
        model = cntk.layers.Embedding(Constants.embedding_dim)(model)
    model = cntk.layers.Dense(32, activation=cntk.relu)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
예제 #31
0
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None,
                       model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width), name='features')
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters
    if network_name == 'resnet20':
        z = create_cifar10_model(input_var, 3, num_classes)
        lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
    elif network_name == 'resnet110':
        z = create_cifar10_model(input_var, 18, num_classes)
        lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
    else:
        raise RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # shared training parameters
    minibatch_size = 128
    momentum_time_constant = -minibatch_size/np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # progress writers
    progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                           l2_regularization_weight = l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z) ; print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch)

        if model_dir:
            z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch)))
        enable_profiler() # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size     = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer/metric_denom
예제 #32
0
def train_and_evaluate(reader_train,
                       reader_test,
                       network_name,
                       epoch_size,
                       max_epochs,
                       profiler_dir=None,
                       model_dir=None,
                       log_dir=None,
                       tensorboard_logdir=None,
                       gen_heartbeat=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width),
                                 name='features')
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters
    if network_name == 'resnet20':
        z = create_cifar10_model(input_var, 3, num_classes)
        lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01]
    elif network_name == 'resnet110':
        z = create_cifar10_model(input_var, 18, num_classes)
        lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01]
    else:
        raise RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # shared training parameters
    minibatch_size = 128
    momentum_time_constant = -minibatch_size / np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr / minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample,
                                         epoch_size=epoch_size,
                                         unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # progress writers
    progress_writers = [
        ProgressPrinter(tag='Training',
                        log_to_file=log_dir,
                        num_epochs=max_epochs,
                        gen_heartbeat=gen_heartbeat)
    ]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(
            freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z)
    print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean",
                                               reduce_mean(parameter).eval(),
                                               epoch)

        if model_dir:
            z.save(
                os.path.join(model_dir,
                             network_name + "_{}.dnn".format(epoch)))
        enable_profiler()  # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer / metric_denom
예제 #33
0
def test_ReduceMean(tmpdir):
    data = np.array(
        [[[5, 1], [20, 2]], [[30, 1], [40, 2]], [[55, 1], [60, 2]]],
        dtype=np.float32)
    model = C.reduce_mean(data, 0)
    verify_no_input(model, tmpdir, 'ReduceMean_0')
예제 #34
0
def crossentropy(y, t):
    prob = C.squeeze(C.reduce_sum(y * t, axis=0), 0)
    return -C.reduce_mean(C.unpack_batch(C.log(prob)))
예제 #35
0
def test_ReduceMean(tmpdir):
    data = np.array([[[5,1], [20,2]],[[30,1], [40,2]],[[55,1], [60,2]]], dtype=np.float32)
    model = C.reduce_mean(data, 0)
    verify_no_input(model, tmpdir, 'ReduceMean_0')
예제 #36
0
def test_ReduceMean(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        data = np.array([[[5,1], [20,2]],[[30,1], [40,2]],[[55,1], [60,2]]], dtype=dtype)
        model = C.reduce_mean(data, 0)
        verify_no_input(model, tmpdir, 'ReduceMean_0')