Example #1
0
def round(arg, name=''):
    '''
    The output of this operation is the element wise value rounded to the nearest integer. 
    In case of tie, where element can have exact fractional part of 0.5
    this operation follows "round half-up" tie breaking strategy.
    This is different from the round operation of numpy which follows
    round half to even.

    Example:
        >>> C.eval(C.round([0.2, 1.3, 4., 5.5, 0.0]))
        [array([[ 0.,  1.,  4.,  6.,  0.]])]

        >>> C.eval(C.round([[0.6, 3.3], [1.9, 5.6]]))
        [array([[[ 1.,  3.],
                 [ 2.,  6.]]])]

        >>> C.eval(C.round([-5.5, -4.2, -3., -0.7, 0]))
        [array([[-5., -4., -3., -1.,  0.]])]

        >>> C.eval(C.round([[-0.6, -4.3], [1.9, -3.2]]))
        [array([[[-1., -4.],
                 [ 2., -3.]]])]

    Args:
        arg: input tensor
        name (str): the name of the node in the network (optional)
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import round
    left = sanitize_input(left, get_data_type(right))
    right = sanitize_input(right, get_data_type(left))
    return round(arg, name).output()
Example #2
0
def learning_word_embeddings_with_the_embedding_layer_cntk():
    x_train, y_train, x_test, y_test = load_from_files()

    max_features = 10000
    maxlen = 20
    embedding_dim = 8

    x = cntk.input_variable(shape=(maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x, num_classes=max_features, sparse_output=True)
    model = cntk.layers.Embedding(embedding_dim)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 30
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
Example #3
0
def round(arg, name=''):
    '''
    The output of this operation is the element wise value rounded to the nearest integer. 
    In case of tie, where element can have exact fractional part of 0.5
    this operation follows "round half-up" tie breaking strategy.
    This is different from the round operation of numpy which follows
    round half to even.

    Example:
        >>> C.eval(C.round([0.2, 1.3, 4., 5.5, 0.0]))
        [array([[ 0.,  1.,  4.,  6.,  0.]])]

        >>> C.eval(C.round([[0.6, 3.3], [1.9, 5.6]]))
        [array([[[ 1.,  3.],
                 [ 2.,  6.]]])]

        >>> C.eval(C.round([-5.5, -4.2, -3., -0.7, 0]))
        [array([[-5., -4., -3., -1.,  0.]])]

        >>> C.eval(C.round([[-0.6, -4.3], [1.9, -3.2]]))
        [array([[[-1., -4.],
                 [ 2., -3.]]])]

    Args:
        arg: input tensor
        name (str): the name of the node in the network (optional)
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import round
    left = sanitize_input(left, get_data_type(right))
    right = sanitize_input(right, get_data_type(left))
    return round(arg, name).output()
Example #4
0
def implementing_1d_convnet_cntk():
    max_features = 10000  # number of words to consider as features
    max_len = 500  # cut texts after this number of words (among top max_features most common words)
    x_train, y_train, x_test, y_test = load_data(max_features, max_len)

    model = build_model_cntk(max_features, max_len)
    x = cntk.input_variable(shape=(max_len, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model.replace_placeholders({model.placeholders[0]: x})

    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
Example #5
0
def hierarchical_softmax_layer(input_var, label_index, label_dim, label_classes=None):
    '''
    A two layers hierarchical softmax function:

    Args:
        input_var: Variable with shape: [#,*](dim_x)
        label_index: index of label's category:  [#,*](1)
        label_dim: number of the label categories
        label_classes: number of classes of the label categories
    Returns:
        output_prob: the probability of the given label [#,*](1)
        class_probs: the probability of all the label classes [#,*](label_classes)
        all_probs: the probability of all label classes 
    '''
    input_dim = input_var.shape[0]

    if not label_classes:
        label_classes = int(np.ceil(np.sqrt(float(label_dim))))

    n_outputs_per_class = int(np.ceil(label_dim / label_classes))

    target_class = C.floor((label_index + 0.5) / n_outputs_per_class)
    target_output_in_class = C.round(label_index - target_class * n_outputs_per_class)

    w1 = parameter(shape=(input_dim, label_classes), init=C.glorot_normal(), name='hsoftmax_w1')
    b1 = parameter(shape=(label_classes), init=C.glorot_normal(), name='hsoftmax_b1')
    w2s = parameter(shape=(label_classes, input_dim, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_w2s')
    b2s = parameter(shape=(label_classes, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_b2s')

    class_probs = softmax(b1 + times(input_var, w1))

    # TODO: fix the bug in backprop for sparse, and use sparse embedding to accelerate
    target_class_one_hot = C.one_hot(target_class, num_classes=label_classes, sparse_output=False)
    w2 = C.reshape(C.times(target_class_one_hot, w2s, output_rank=2), [input_dim, -1])
    b2 = C.reshape(times(target_class_one_hot, b2s, output_rank=1), [-1])
    probs_in_class = softmax(b2 + times(input_var, w2))

    prob_in_class = C.times_transpose(C.one_hot(target_output_in_class, num_classes=n_outputs_per_class, sparse_output=False), probs_in_class)
    class_prob = C.times_transpose(C.one_hot(target_class, num_classes=label_classes, sparse_output=False), class_probs)
    output_prob = prob_in_class * class_prob

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(label_classes):
        ci = C.constant(i)
        ci_one_hot = C.one_hot(ci, num_classes=label_classes, sparse_output=False)
        w2a = C.times(ci_one_hot, w2s, output_rank=2)
        b2a = C.times(ci_one_hot, b2s, output_rank=1)
        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        class_proba = C.times_transpose(ci_one_hot, class_probs)
        output_proba = probs_in_classa * class_proba
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
Example #6
0
def use_glove_word_embeddings_cntk(preload_weights=False):
    tokenizer, x_train, y_train, x_val, y_val = from_raw_text_to_word_embeddings(
    )

    x = cntk.input_variable(shape=(Constants.maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x,
                         num_classes=Constants.max_words,
                         sparse_output=True)
    if preload_weights is True:
        embedding_matrix = compute_embedding_matrix(tokenizer)
        assert (Constants.embedding_dim
                == embedding_matrix.shape[0]) or (Constants.embedding_dim
                                                  == embedding_matrix.shape[1])
        model = cntk.layers.Embedding(weights=embedding_matrix)(model)
    else:
        model = cntk.layers.Embedding(Constants.embedding_dim)(model)
    model = cntk.layers.Dense(32, activation=cntk.relu)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
Example #7
0
def run_experiment_cntk():
    if os.path.isfile('x_train_imdb.bin'):
        print('Loading from .bin files')
        x_train, y_train, x_test, y_test = load_from_files(x_shape=(25000,
                                                                    500),
                                                           y_shape=(25000, ))
    else:
        print('Loading data...')
        (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(
            num_words=Constants.max_words)
        print(len(x_train), 'train sequences')
        print(len(x_test), 'test sequences')

        print('Pad sequences (samples x time)')
        x_train = keras.preprocessing.sequence.pad_sequences(
            x_train, maxlen=Constants.maxlen)
        x_test = keras.preprocessing.sequence.pad_sequences(
            x_test, maxlen=Constants.maxlen)
        print('x_train shape:', x_train.shape)
        print('x_test shape:', x_test.shape)
        print('Saving to .bin files')
        save_to_files(x_train, y_train, x_test, y_test)

    x = cntk.sequence.input_variable(shape=(), dtype=np.float32)
    y = cntk.input_variable(shape=(), dtype=np.float32)
    x_placeholder = cntk.placeholder(shape=(),
                                     dynamic_axes=[
                                         cntk.Axis.default_batch_axis(),
                                         cntk.Axis.default_dynamic_axis()
                                     ])

    model = cntk.one_hot(x_placeholder,
                         num_classes=Constants.max_words,
                         sparse_output=True)
    model = cntk.layers.Embedding(Constants.embedding_dim)(model)
    model = cntk.layers.Recurrence(cntk.layers.LSTM(32))(model)
    model = cntk.sequence.last(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    model.save('ch6-2.cntk.model')
    model = None
    model = cntk.load_model('ch6-2.cntk.model')
    model.replace_placeholders({model.placeholders[0]: x})

    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements,
                                         axis=cntk.Axis.all_static_axes())

    max_epochs = 10
    batch_size = 128
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.01),
                        cntk.learning_parameter_schedule_per_sample(0.9))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
def hierarchical_softmax_layer(input_var,
                               label_index,
                               label_dim,
                               label_classes=None):
    '''
    A two layers hierarchical softmax function:

    Args:
        input_var: Variable with shape: [#,*](dim_x)
        label_index: index of label's category:  [#,*](1)
        label_dim: number of the label categories
        label_classes: number of classes of the label categories
    Returns:
        output_prob: the probability of the given label [#,*](1)
        class_probs: the probability of all the label classes [#,*](label_classes)
        all_probs: the probability of all label classes 
    '''
    input_dim = input_var.shape[0]

    if not label_classes:
        label_classes = int(np.ceil(np.sqrt(float(label_dim))))

    n_outputs_per_class = int(np.ceil(label_dim / label_classes))

    target_class = C.floor((label_index + 0.5) / n_outputs_per_class)
    target_output_in_class = C.round(label_index -
                                     target_class * n_outputs_per_class)

    w1 = parameter(shape=(input_dim, label_classes),
                   init=C.glorot_normal(),
                   name='hsoftmax_w1')
    b1 = parameter(shape=(label_classes),
                   init=C.glorot_normal(),
                   name='hsoftmax_b1')
    w2s = parameter(shape=(
        label_classes,
        input_dim,
        n_outputs_per_class,
    ),
                    init=C.glorot_normal(),
                    name='hsoftmax_w2s')
    b2s = parameter(shape=(
        label_classes,
        n_outputs_per_class,
    ),
                    init=C.glorot_normal(),
                    name='hsoftmax_b2s')

    class_probs = softmax(b1 + times(input_var, w1))

    # TODO: fix the bug in backprop for sparse, and use sparse embedding to accelerate
    target_class_one_hot = C.one_hot(target_class,
                                     num_classes=label_classes,
                                     sparse_output=False)
    w2 = C.reshape(C.times(target_class_one_hot, w2s, output_rank=2),
                   [input_dim, -1])
    b2 = C.reshape(times(target_class_one_hot, b2s, output_rank=1), [-1])
    probs_in_class = softmax(b2 + times(input_var, w2))

    prob_in_class = C.times_transpose(
        C.one_hot(target_output_in_class,
                  num_classes=n_outputs_per_class,
                  sparse_output=False), probs_in_class)
    class_prob = C.times_transpose(
        C.one_hot(target_class, num_classes=label_classes,
                  sparse_output=False), class_probs)
    output_prob = prob_in_class * class_prob

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(label_classes):
        ci = C.constant(i)
        ci_one_hot = C.one_hot(ci,
                               num_classes=label_classes,
                               sparse_output=False)
        w2a = C.times(ci_one_hot, w2s, output_rank=2)
        b2a = C.times(ci_one_hot, b2s, output_rank=1)
        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        class_proba = C.times_transpose(ci_one_hot, class_probs)
        output_proba = probs_in_classa * class_proba
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs