def make_weights(
        input_placeholder,
        hidden_size,
        weight_initializer,
        bias_initializer,
        init_state=False):
    gates = ['i', 'f', 'o', 'g']

    # input axis + any extra axes of length 1
    in_feature_axes = tuple(input_placeholder.axes)[:-2]
    out_feature_axes = ng.make_axes([ng.make_axis(hidden_size)])
    batch_axis = input_placeholder.axes.batch_axis()
    hidden_axis = ng.make_axis(hidden_size)

    w_in_axes = ng.make_axes(hidden_axis) + in_feature_axes
    w_rec_axes = ng.make_axes(hidden_axis) + out_feature_axes

    W_in = {gate: weight_initializer(w_in_axes) for gate in gates}
    W_rec = {gate: weight_initializer(w_rec_axes) for gate in gates}
    b = {gate: bias_initializer(hidden_axis) for gate in gates}

    if init_state is True:
        ax_s = ng.make_axes([hidden_axis, batch_axis])
        init_state = {name: ng.placeholder(ax_s) for name in ['h', 'c']}
        init_state_value = {
            name: rng.uniform(-1, 1, ax_s) for name in ['h', 'c']}
    else:
        init_state = None
        init_state_value = None

    return W_in, W_rec, b, init_state, init_state_value
def test_lut(lut_args):
    """
    test lut fprop and bprop
    """
    pad_idx = 0
    with ExecutorFactory() as ex:

        vocab_size, embed_dim, bsz, seq_len, mem_size = lut_args

        V = ng.make_axis(vocab_size)
        F = ng.make_axis(embed_dim)
        M = ng.make_axis(mem_size)

        ax.N.length = bsz
        ax.REC.length = seq_len

        # Multi-axis input to LUT
        ax_idx = ng.make_axes([M, ax.REC, ax.N])
        ax_lut = ng.make_axes([V, F])

        lut = ng.placeholder(ax_lut)
        idx = ng.placeholder(ax_idx)
        idx_flat = ng.flatten(idx)
        ax_out = idx_flat.axes | ng.make_axes([F])

        # fprop
        lut_out_ng = ng.lookuptable(lut, idx_flat, ax_out, pad_idx=pad_idx)
        fprop_fun = ex.executor(lut_out_ng, lut, idx)

        # bprop
        update_error = ng.placeholder(ax_out)
        update_out_ng = lookuptable_update(update_error, lut, idx, lut_out_ng)
        update_fun = ex.executor(update_out_ng, update_error, lut, idx)

        # provide actual inputs and execute the graph
        lut_value = rng.uniform(-1, 1, lut.axes)
        idx_value = rng.random_integers(0, vocab_size - 1, idx.axes)
        fprop_lut = fprop_fun(lut_value, idx_value).copy()

        # compare fprop
        fprop_ref = lut_fprop_ref(lut_value, idx_value)
        ng.testing.assert_allclose(fprop_lut, fprop_ref, rtol=0.0, atol=1.0e-5)

        # provide actual delta and execute the update op
        update_value = rng.uniform(-1, 1, update_error.axes)
        update_lut = update_fun(update_value, lut_value, idx_value).copy()

        # compare bprop (udpate)
        update_ref = lut_update_ref(
            update_value,
            lut_value,
            idx_value,
            pad_idx=pad_idx)
        ng.testing.assert_allclose(
            update_lut, update_ref, rtol=0.0, atol=1.0e-5)
def make_placeholder(input_size, sequence_length, batch_size, extra_axes=0):

    input_axis = ng.make_axis(name='features')
    recurrent_axis = ng.make_axis(name='REC_REP')
    batch_axis = ng.make_axis(name='N')

    input_axes = ng.make_axes([input_axis, recurrent_axis, batch_axis])
    input_axes.set_shape((input_size, sequence_length, batch_size))
    input_axes = ng.make_axes([ng.make_axis(length=1, name='features_' + str(i))
                               for i in range(extra_axes)]) + input_axes

    input_placeholder = ng.placeholder(input_axes)
    rng = RandomTensorGenerator()
    input_value = rng.uniform(-0.01, 0.01, input_axes)

    return input_placeholder, input_value
    def __call__(self, in_obj, **kwargs):
        """
        Arguments:
            in_obj (Tensor): object that provides the lookup indices
        """
        in_obj = ng.flatten(in_obj)
        in_axes = in_obj.axes

        # label lut_v_axis as shadow axis for initializers ... once #1158 is
        # in, shadow axis will do more than just determine fan in/out for
        # initializers.
        self.lut_v_axis = ng.make_axis(self.vocab_size).named('V')
        self.axes_map = shadow_axes_map([self.lut_v_axis])
        self.lut_v_axis = list(self.axes_map.values())[0]

        self.lut_f_axis = ng.make_axis(self.embed_dim).named('F')

        self.w_axes = ng.make_axes([self.lut_v_axis, self.lut_f_axis])
        self.lut_o_axes = in_axes | ng.make_axes([self.lut_f_axis])
        self.o_axes = ng.make_axes([self.lut_f_axis]) | in_axes[0].axes

        if not self.initialized:
            self.W = ng.variable(
                axes=self.w_axes,
                initial_value=self.lut_init(
                    self.w_axes,
                    self.lut_v_axis,
                    self.pad_idx),
                metadata={
                    "label": LABELS["weight"]},
            ).named('LutW')

        lut_result = ng.lookuptable(
            self.W,
            in_obj,
            self.lut_o_axes,
            update=self.update,
            pad_idx=self.pad_idx)
        return ng.map_roles(ng.unflatten(lut_result), self.axes_map)
Esempio n. 5
0
def get_output_dict(train,max_question):
    """
    Function to populate data dictionary with data and defined axes as
    required by ArrayIterator object in ngraph
    """

    train['para']['data'] = np.array(
        [xi for xi in train['para']['data'][:-1]], dtype=np.int32)
    train['question']['data'] = np.array(
        [xi for xi in train['question']['data'][:-1]], dtype=np.int32)
    train['para_len']['data'] = np.array(
        [xi for xi in train['para_len']['data'][:-1]], dtype=np.int32)
    train['question_len']['data'] = np.array(
        [xi for xi in train['question_len']['data'][:-1]], dtype=np.int32)
    train['question_mask']['data'] = np.array(
        [xi for xi in train['question_mask']['data'][:-1]], dtype=np.int32)
    train['para_mask']['data'] = np.array(
        [xi for xi in train['para_mask']['data'][:-1]], dtype=np.int32)

    train['answer']['data'] = np.array(
        train['answer']['data'][:-1], dtype=np.int32)
    train['dropout_val']['data'] = np.array(
        train['dropout_val']['data'][:-1], dtype=np.float32)

    REC2 = ng.make_axis(length=max_question, name='REC2')

    span = ng.make_axis(length=2, name='span')
    dummy_axis = ng.make_axis(length=1, name='dummy_axis')
    train['para']['axes'] = ('batch', 'REC')
    train['question']['axes'] = ('batch', 'REC2')
    train['para_len']['axes'] = ('batch', 'dummy_axis', 'REC')
    train['question_len']['axes'] = ('batch', 'dummy_axis', 'REC2')
    train['answer']['axes'] = ('batch', 'span')
    train['question_mask']['axes'] = ('batch', 'dummy_axis', 'REC2')
    train['para_mask']['axes'] = ('batch', 'dummy_axis', 'REC')
    train['dropout_val']['axes'] = ('batch')

    return train
Esempio n. 6
0
    def __init__(self, num_iterations, batch_size, emb_size, nhops,
                 story_length, memory_size, vocab_size, vocab_axis, use_v_luts):

        self.num_iterations = num_iterations
        self.batch_size = batch_size
        self.emb_size = emb_size
        self.nhops = nhops
        self.story_length = story_length
        self.memory_size = memory_size
        self.vocab_size = vocab_size
        self.use_v_luts = use_v_luts

        # Create graph
        # Make axes
        self.batch_axis = ng.make_axis(length=batch_size, name='N')
        self.sentence_axis = ng.make_axis(length=story_length, name='sentence_axis')
        self.sentence_rec_axis = ng.make_axis(length=story_length, name='REC')
        self.memory_axis = ng.make_axis(length=memory_size, name='memory_axis')

        self.val_len_axis = ng.make_axis(length=1, name='REC')

        self.embedding_axis = ng.make_axis(length=emb_size, name='F')

        self.vocab_axis = vocab_axis

        # weight initializationn
        self.init = GaussianInit(mean=0.0, std=0.1)
        # Create constant position encoding tensor to multiply elementwise with embedded words
        self.pos_enc = position_encoding(self.sentence_rec_axis, self.embedding_axis)

        # Weight sharing
        self.LUT_A = ModifiedLookupTable(self.vocab_size, self.emb_size, self.init, update=True,
                                         pad_idx=0, name='LUT_A')
        if use_v_luts:
            self.LUTs_C = [ModifiedLookupTable(self.vocab_size, self.emb_size, self.init,
                           update=True, pad_idx=0) for n in range(self.nhops)]
Esempio n. 7
0
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func):
    # ======================================================================================
    # Creating
    # ======================================================================================
    input_var = C.input((num_channels, image_height, image_width))
    feature_scale = 1.0 / 256.0
    input_var_norm = C.element_times(feature_scale, input_var)

    cntk_model = model_func(input_var_norm, num_classes)

    label_var = C.input((num_classes))
    loss = C.cross_entropy_with_softmax(cntk_model, label_var)
    error = C.classification_error(cntk_model, label_var)

    minibatch_size = 64
    learning_rate = 0.01
    momentum = 0.9

    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.momentum_sgd(cntk_model.parameters, lr_schedule,
                             C.momentum_schedule(momentum))
    trainer = C.Trainer(cntk_model, (loss, error), [learner])

    ng_model, ng_placeholders = CNTKImporter(
        batch_size=minibatch_size).import_model(cntk_model)
    ng_labels = ng.placeholder(
        [ng.make_axis(num_classes),
         ng.make_axis(minibatch_size, 'N')])
    ng_placeholders.append(ng_labels)

    transformer = ng.transformers.make_transformer()

    ng_loss = create_loss_and_learner(ng_model, ng_labels, learning_rate,
                                      momentum)
    training_fun = transformer.computation(ng_loss, *ng_placeholders)

    ng_error = classification_error(ng_model, ng_labels)
    test_fun = transformer.computation(ng_error, *ng_placeholders)

    # ======================================================================================
    # Training
    # ======================================================================================
    epoch_size = 50000
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    num_minibatches_to_train = (epoch_size * max_epochs) / minibatch_size
    for _ in range(0, int(num_minibatches_to_train)):
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)

        features_batch = np.moveaxis(np.squeeze(data[input_var].asarray()), 0,
                                     -1)
        labels_batch = np.moveaxis(
            data[label_var].data.data.slice_view(
                [0, 0, 0], [minibatch_size, num_classes]).asarray().todense(),
            0, -1)
        training_fun(features_batch, labels_batch)

    # ======================================================================================
    # Evaluation
    # ======================================================================================
    cntk_results = 0.0
    ng_results = 0.0
    epoch_size = 10000
    input_map = {
        input_var: reader_test.streams.features,
        label_var: reader_test.streams.labels
    }

    num_minibatches_to_test = epoch_size // minibatch_size
    for _ in range(num_minibatches_to_test):
        data = reader_test.next_minibatch(minibatch_size, input_map=input_map)
        cntk_results += trainer.test_minibatch(data)

        features_batch = np.moveaxis(np.squeeze(data[input_var].asarray()), 0,
                                     -1)
        labels_batch = np.moveaxis(
            data[label_var].data.data.slice_view([0, 0, 0],
                                                 [64, 10]).asarray().todense(),
            0, -1)
        ng_results += test_fun(features_batch, labels_batch)

    print("CNTK results: {0:.2f}%".format(
        (cntk_results * 100.0) / num_minibatches_to_test))
    print("ngraph results: {0:.2f}%".format(
        (ng_results * 100.0) / num_minibatches_to_test))
    print("")

    return C.softmax(cntk_model)
Esempio n. 8
0
    def __init__(
        self,
        cands,
        num_cands,
        max_cand_len,
        memory_size,
        max_utt_len,
        vocab_size,
        emb_size,
        batch_size,
        use_match_type=False,
        kb_ents_to_type=None,
        kb_ents_to_cand_idxs=None,
        match_type_idxs=None,
        nhops=3,
        eps=1e-6,
        init=GaussianInit(
            mean=0.0,
            std=0.1)):
        super(MemN2N_Dialog, self).__init__()

        self.cands = cands
        self.memory_size = memory_size
        self.max_utt_len = max_utt_len
        self.vocab_size = vocab_size
        self.num_cands = num_cands
        self.max_cand_len = max_cand_len
        self.batch_size = batch_size
        self.use_match_type = use_match_type
        self.kb_ents_to_type = kb_ents_to_type
        self.kb_ents_to_cand_idxs = kb_ents_to_cand_idxs
        self.match_type_idxs = match_type_idxs
        self.nhops = nhops
        self.eps = eps
        self.init = init

        # Make axes
        self.batch_axis = ng.make_axis(length=batch_size, name='N')
        self.sentence_rec_axis = ng.make_axis(length=max_utt_len, name='REC')
        self.memory_axis = ng.make_axis(length=memory_size, name='memory_axis')
        self.embedding_axis = ng.make_axis(length=emb_size, name='F')
        self.embedding_axis_proj = ng.make_axis(length=emb_size, name='F_proj')
        self.cand_axis = ng.make_axis(length=num_cands, name='cand_axis')
        self.cand_rec_axis = ng.make_axis(length=max_cand_len, name='REC')

        # Weight sharing of A's accross all hops input and output
        self.LUT_A = ModifiedLookupTable(
            vocab_size, emb_size, init, update=True, pad_idx=0)
        # Use lookuptable W to embed the candidate answers
        self.LUT_W = ModifiedLookupTable(
            vocab_size, emb_size, init, update=True, pad_idx=0)

        # Initialize projection matrix between internal model states
        self.R_proj = ng.variable(
            axes=[
                self.embedding_axis,
                self.embedding_axis_proj],
            initial_value=init)

        if not self.use_match_type:
            # Initialize constant matrix of all candidate answers
            self.cands_mat = ng.constant(
                self.cands, axes=[
                    self.cand_axis, self.cand_rec_axis])
Esempio n. 9
0
def test_linear_accepts_axes_axis():
    """ Ensure that Linear.__init__ accepts an Axis as axes """
    Linear(axes=ng.make_axis(1), init=UniformInit(1.0, 1.0))
Esempio n. 10
0
def check_stacked_lstm(seq_len,
                       input_size,
                       hidden_size,
                       batch_size,
                       init_func,
                       return_seq=True,
                       backward=False,
                       reset_cells=False,
                       num_iter=2):

    Cin = ng.make_axis(input_size, name='Feature')
    REC = ng.make_axis(seq_len, name='REC')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng_1 = LSTM(hidden_size,
                         init_func,
                         activation=Tanh(),
                         gate_activation=Logistic(),
                         reset_cells=reset_cells,
                         return_sequence=return_seq,
                         backward=backward)
        lstm_ng_2 = LSTM(hidden_size + 1,
                         init_func,
                         activation=Tanh(),
                         gate_activation=Logistic(),
                         reset_cells=reset_cells,
                         return_sequence=return_seq,
                         backward=backward)

        out_ng_1 = lstm_ng_1(inp_ng)
        out_ng_2 = lstm_ng_2(out_ng_1)

        fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng)

        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_1_fun = copier_T(
            ex.executor(list(lstm_ng_1.W_input[k] for k in gates)))
        Whh_neon_1_fun = copier_T(
            ex.executor(list(lstm_ng_1.W_recur[k] for k in gates)))
        bh_neon_1_fun = copier(ex.executor(list(lstm_ng_1.b[k]
                                                for k in gates)))
        Wxh_neon_2_fun = copier_T(
            ex.executor(list(lstm_ng_2.W_input[k] for k in gates)))
        Whh_neon_2_fun = copier_T(
            ex.executor(list(lstm_ng_2.W_recur[k] for k in gates)))
        bh_neon_2_fun = copier(ex.executor(list(lstm_ng_2.b[k]
                                                for k in gates)))

        h_init_fun = ex.executor(lstm_ng_2.h_init)

        # fprop on random inputs for multiple iterations
        fprop_neon_2_list = []
        input_value_list = []

        for i in range(num_iter):
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon_2 = fprop_neon_fun_2(input_value).copy()

            # comparing outputs
            if return_seq is True:
                fprop_neon_2 = fprop_neon_2[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_2_list.append(fprop_neon_2)

            if reset_cells is False:
                # look at the last hidden states
                h_init_neon = fprop_neon_2[:, -1].reshape(-1, 1)
                h_init_ng = h_init_fun()
                ng.testing.assert_allclose(h_init_neon,
                                           h_init_ng,
                                           rtol=rtol,
                                           atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_1 = np.concatenate(Wxh_neon_1_fun(), 1)
        Whh_neon_1 = np.concatenate(Whh_neon_1_fun(), 1)
        bh_neon_1 = np.concatenate(bh_neon_1_fun())
        Wxh_neon_2 = np.concatenate(Wxh_neon_2_fun(), 1)
        Whh_neon_2 = np.concatenate(Whh_neon_2_fun(), 1)
        bh_neon_2 = np.concatenate(bh_neon_2_fun())

        # reference numpy LSTM
        lstm_ref_1 = RefLSTM()
        lstm_ref_2 = RefLSTM()
        WLSTM_1 = lstm_ref_1.init(input_size, hidden_size)
        WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size + 1)

        # make ref weights and biases the same with neon model
        WLSTM_1[0, :] = bh_neon_1
        WLSTM_1[1:input_size + 1, :] = Wxh_neon_1
        WLSTM_1[input_size + 1:] = Whh_neon_1
        WLSTM_2[0, :] = bh_neon_2
        WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2
        WLSTM_2[hidden_size + 1:] = Whh_neon_2

        # transpose input X and do fprop
        fprop_ref_2_list = []
        c0_1 = h0_1 = None
        c0_2 = h0_2 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref_1, cprev_1, hprev_1,
             batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1, c0_1, h0_1)
            (Hout_ref_2, cprev_2, hprev_2,
             batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2, c0_2, h0_2)

            if reset_cells is False:
                c0_1 = cprev_1
                h0_1 = hprev_1
                c0_2 = cprev_2
                h0_2 = hprev_2

            # the output needs transpose as well
            Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size,
                                            hidden_size + 1).T

            fprop_ref_2_list.append(Hout_ref_2)

        for i in range(num_iter):
            ng.testing.assert_allclose(fprop_neon_2_list[i],
                                       fprop_ref_2_list[i],
                                       rtol=rtol,
                                       atol=atol)
Esempio n. 11
0
def A():
    return ng.make_axis(2)
Esempio n. 12
0
def C():
    return ng.make_axis(length=200)
Esempio n. 13
0
def M():
    return ng.make_axis(length=3)
Esempio n. 14
0
def test_expand_dims(transformer_factory):
    """TODO."""
    C = ng.make_axis(name='C')
    D = ng.make_axis(name='D')
    N = ng.make_axis(name='N')

    max_new_axis_length = 4

    tests = [{
        'tensor': [[2, 5], [13, 5]],
        'tensor_axes': (N, D),
        'tensor_axes_lengths': (2, 2),
        'new_axis': C,
    }, {
        'tensor': 2,
        'tensor_axes': (),
        'tensor_axes_lengths': (),
        'new_axis': D
    }]

    for test in tests:
        for new_axis_length in range(1, max_new_axis_length + 1):
            tensor_axes = test['tensor_axes']
            tensor_axes_lengths = test['tensor_axes_lengths']

            for dim in range(len(tensor_axes) + 1):
                ex = ExecutorFactory()
                for axis, length in zip(tensor_axes, tensor_axes_lengths):
                    axis.length = length

                new_axis = test['new_axis']
                new_axis.length = new_axis_length

                tensor_np = np.array(test['tensor'], dtype=np.float32)
                tensor = ng.placeholder(tensor_axes)

                expanded = ng.ExpandDims(tensor, new_axis, dim)
                expander_fun = ex.executor(expanded, tensor)

                num_deriv_fun = ex.numeric_derivative(expanded, tensor, delta)
                sym_deriv_fun = ex.derivative(expanded, tensor)

                expanded_shape = tensor_np.shape[:dim] \
                    + (new_axis.length,) + tensor_np.shape[dim:]
                expanded_strides = tensor_np.strides[:dim] \
                    + (0,) + tensor_np.strides[dim:]
                expanded_np = np.ndarray(buffer=tensor_np,
                                         shape=expanded_shape,
                                         strides=expanded_strides,
                                         dtype=tensor_np.dtype)

                expanded_result = expander_fun(tensor_np)
                assert np.array_equal(expanded_np, expanded_result)

                # Test backpropagation
                numeric_deriv = num_deriv_fun(tensor_np)
                sym_deriv = sym_deriv_fun(tensor_np)
                assert np.allclose(numeric_deriv,
                                   sym_deriv,
                                   rtol=rtol,
                                   atol=atol)
Esempio n. 15
0
def test_tensor_description_init(transformer_factory):
    with pytest.raises(ValueError):
        # TensorDescription axes require lengths
        TensorDescription(ng.make_axes(ng.make_axis()))
def make_axes(lengths):
    return ng.make_axes([ng.make_axis(length) for length in lengths])
Esempio n. 17
0
def test_padding(transformer_factory):
    """TODO."""
    C = ng.make_axis(name='C')
    D = ng.make_axis(name='D')
    M = ng.make_axis(name='M')
    N = ng.make_axis(name='N')

    tests = [{
        'tensor': [[1, 3], [2, 5]],
        'tensor_axes': (C, D),
        'padding': [(0, 1), (1, 0)],
        'padded_axes': (M, N),
        'axes_lengths': {
            C: 2,
            D: 2,
            M: 3,
            N: 3
        }
    }, {
        'tensor': [[1, 4, 5], [1, 4, 6]],
        'tensor_axes': (C, D),
        'padding': [(0, 1), 1],
        'padded_axes': None,
        'axes_lengths': {
            C: 2,
            D: 3
        }
    }]

    for test in tests:
        ex = ExecutorFactory()
        for axis, length in test['axes_lengths'].items():
            axis.length = length
        tensor_axes = test['tensor_axes']
        tensor_np = np.array(test['tensor'], dtype='float32')
        tensor = ng.placeholder(tensor_axes)
        padding = test['padding']
        padded_axes = test['padded_axes']
        padded = ng.pad(tensor, padding, padded_axes)
        computed_val_fun = ex.executor(padded, tensor)

        # Test backpropagation
        numeric_deriv_fun = ex.numeric_derivative(padded, tensor, delta)
        sym_deriv_fun = ex.derivative(padded, tensor)

        def to_tuple(p):
            """
            TODO.

            Arguments:
              p: TODO

            Returns:

            """
            return (p, p) if isinstance(p, int) else p

        np_padding = tuple(to_tuple(p) for p in padding)
        expected_val = np.pad(tensor_np, np_padding, mode='constant')

        computed_val = computed_val_fun(tensor_np)
        assert np.array_equal(expected_val, computed_val)

        numeric_deriv = numeric_deriv_fun(tensor_np)
        sym_deriv = sym_deriv_fun(tensor_np)

        assert np.allclose(numeric_deriv, sym_deriv, rtol=rtol, atol=atol)
Esempio n. 18
0
def train_and_test(data_dir):
    train_file = os.path.join(data_dir, "Train-28x28_cntk_text.txt")
    test_file = os.path.join(data_dir, "Test-28x28_cntk_text.txt")

    input_dim = 784
    output_dim = 10

    input_var = C.input(input_dim)
    label_var = C.input(output_dim)

    cntk_model = create_model(input_var / 256.0, 2, 400, output_dim)

    cntk_loss = C.cross_entropy_with_softmax(cntk_model, label_var)
    cntk_error = C.classification_error(cntk_model, label_var)

    learning_rate = 0.2
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(cntk_model.parameters, lr_schedule)
    trainer = C.Trainer(cntk_model, (cntk_loss, cntk_error), [learner])

    batch_size = 64

    # ngraph import begin ==================================================================
    ng_model, ng_placeholders = CNTKImporter(
        batch_size=batch_size).import_model(cntk_model)

    ng_labels = ng.placeholder(
        [ng.make_axis(output_dim),
         ng.make_axis(batch_size, 'N')])
    ng_placeholders.append(ng_labels)

    transformer = ng.transformers.make_transformer()

    ng_loss = cross_entropy_with_softmax(ng_model, ng_labels)
    parallel_update = CommonSGDOptimizer(learning_rate).minimize(
        ng_loss, ng_loss.variables())
    training_fun = transformer.computation([ng_loss, parallel_update],
                                           *ng_placeholders)

    ng_error = classification_error(ng_model, ng_labels)
    test_fun = transformer.computation(ng_error, *ng_placeholders)
    # ngraph import end ====================================================================

    reader_train = create_reader(train_file, True, input_dim, output_dim)
    train_input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    num_samples = 60000
    num_epochs = 10
    num_minibatches_to_train = (num_samples * num_epochs) / batch_size
    for _ in range(0, int(num_minibatches_to_train)):
        data = reader_train.next_minibatch(batch_size,
                                           input_map=train_input_map)
        trainer.train_minibatch(data)

        # ngraph train
        features_batch = np.moveaxis(np.squeeze(data[input_var].asarray()), 0,
                                     -1)
        labels_batch = np.moveaxis(np.squeeze(data[label_var].asarray()), 0,
                                   -1)
        training_fun(features_batch, labels_batch)

    reader_test = create_reader(test_file, False, input_dim, output_dim)
    test_input_map = {
        input_var: reader_test.streams.features,
        label_var: reader_test.streams.labels
    }

    cntk_result = 0.0
    ng_error = 0.0
    num_samples = 10000
    num_minibatches_to_test = num_samples // batch_size
    for _ in range(num_minibatches_to_test):
        data = reader_test.next_minibatch(batch_size, input_map=test_input_map)
        cntk_result += trainer.test_minibatch(data)

        # ngraph test
        features_batch = np.moveaxis(np.squeeze(data[input_var].asarray()), 0,
                                     -1)
        labels_batch = np.moveaxis(np.squeeze(data[label_var].asarray()), 0,
                                   -1)
        ng_error += test_fun(features_batch, labels_batch)

    print("Average CNTK test error: {0:.2f}%".format(cntk_result * 100 /
                                                     num_minibatches_to_test))
    print("Average ngraph test error: {0:.2f}%".format(
        ng_error * 100 / num_minibatches_to_test))

    C.softmax(cntk_model).save(os.path.join(MNIST, "MNIST.dnn"))
Esempio n. 19
0
def shape_to_axes(shape):
    # axis 0 is batch in Caffe2 layouts: NCHW and NHWC
    return [ng.make_axis(s) for s in shape] if shape else ng.make_axis()
Esempio n. 20
0
def test_convolution(transformer_factory):
    """
    test convolution forward path
    """
    N = 128
    C, K = 3, 8
    D, T = 1, 1
    H = W = 32
    R = S = 2

    padding = dict(pad_d=0, pad_h=0, pad_w=0)
    strides = dict(str_d=1, str_h=1, str_w=1)
    conv_params = padding.copy()
    conv_params.update(strides)

    ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N])
    ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K])
    ax_i.set_shape((C, D, H, W, N))
    ax_f.set_shape((C, T, R, S, K))
    ax_o = ng.make_axes([
        ng.make_axis(ax_f.role_axes(ar.Channelout)[0].length,
                     name='C',
                     roles=[ar.Channel]),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_d'],
                     strides['str_d'],
                     role=ar.Depth),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_h'],
                     strides['str_h'],
                     role=ar.Height),
        spatial_axis(ax_i,
                     ax_f,
                     padding['pad_w'],
                     strides['str_w'],
                     role=ar.Width), ax.N
    ])

    inputs = ng.placeholder(axes=ax_i)
    filters = ng.placeholder(axes=ax_f)

    # randomly initialize
    input_value = rng.uniform(-1, 1, ax_i)
    filter_value = rng.uniform(-1, 1, ax_f)

    assert input_value.shape == ax_i.lengths
    assert filter_value.shape == ax_f.lengths

    inputs = ng.placeholder(ax_i)
    filters = ng.placeholder(ax_f)

    output = ng.convolution(conv_params, inputs, filters, axes=ax_o)
    targets = ng.placeholder(axes=output.axes)

    costs = ng.cross_entropy_binary(ng.sigmoid(output), targets)
    error = ng.sum(costs, out_axes=()) / ng.batch_size(costs)
    d_inputs = ng.deriv(error, inputs)
    d_filters = ng.deriv(error, filters)

    targets_value = rng.uniform(.1, 0.9, output.axes)

    conv_executor = executor([output, error, d_inputs, d_filters], inputs,
                             filters, targets)
    result_ng, err_ng, gradI_ng, gradF_ng = conv_executor(
        input_value, filter_value, targets_value)

    # Now compute reference values via NEON
    NervanaObject.be.bsz = N
    neon_layer = Convolution(fshape=(R, S, K),
                             padding=padding,
                             strides=strides)

    inp = neon_layer.be.array(input_value.reshape(C * H * W * D, N))
    neon_layer.W = neon_layer.be.array(filter_value.reshape(C * R * S * T, K))
    neon_layer.dW = neon_layer.be.empty_like(neon_layer.W)
    neon_layer.configure((C, H, W))
    neon_layer.prev_layer = True
    neon_layer.allocate()
    neon_layer.set_deltas(DummyDeltaBuffers())

    result_ne = neon_layer.fprop(inp).get().reshape(output.axes.lengths)

    act_result_ne = 1. / (1.0 + np.exp(-result_ne))
    err = neon_layer.be.array(
        (act_result_ne - targets_value).reshape(-1, N) / float(N))
    gradI_ne = neon_layer.bprop(err).get().reshape(ax_i.lengths)
    gradF_ne = neon_layer.dW.get().reshape(ax_f.lengths)

    # Compare fprop
    np.testing.assert_allclose(result_ng, result_ne, rtol=0, atol=1e-6)

    # Compare bprop
    np.testing.assert_allclose(gradI_ng, gradI_ne, rtol=0, atol=1e-6)

    # Compare update
    np.testing.assert_allclose(gradF_ng, gradF_ne, rtol=0, atol=1e-4)
Esempio n. 21
0
def test_tensor_dot_tensor(transformer_factory):
    """TODO."""
    C = ng.make_axis().named('C')
    D = ng.make_axis().named('D')
    H = ng.make_axis().named('H')
    N = ng.make_axis().named('N')

    tests = [
        {
            'tensor1': [[1, 2], [4, 5], [3, 4]],
            'tensor1_axes': (C, D - 1),
            'tensor2': [2, 5],
            'tensor2_axes': (D,),
            'expected_output': [12, 33, 26],
            'axes_lengths': {C: 3, D: 2}
        },
        {
            'tensor1': [[1, 4, 3], [2, 5, 4]],
            'tensor1_axes': (D - 1, C),
            'tensor2': [2, 5],
            'tensor2_axes': (D,),
            'expected_output': [12, 33, 26],
            'axes_lengths': {C: 3, D: 2}
        },
        {
            'tensor1': [[[1, 4], [2, 5]], [[7, 12], [13, 2]]],
            'tensor1_axes': (N, D - 1, C - 1),
            'tensor2': [[[3, 6], [7, 2]], [[9, 8], [10, 4]]],
            'tensor2_axes': (H, D, C),
            'expected_output': [[51, 81], [188, 297]],
            'axes_lengths': {N: 2, D: 2, C: 2, H: 2}
        },
        {
            'tensor1': [1, 2],
            'tensor1_axes': (C,),
            'tensor2': [7, 11, 13],
            'tensor2_axes': (D,),
            'expected_output': [[7, 11, 13], [14, 22, 26]],
            'axes_lengths': {C: 2, D: 3}
        },
        {
            'tensor1': [[1, 4], [6, 2]],
            'tensor1_axes': (C - 1, D - 1),
            'tensor2': [[1, 4], [6, 2]],
            'tensor2_axes': (C, D),
            'expected_output': 57,
            'axes_lengths': {C: 2, D: 2}
        }
    ]

    for test in tests:
        # set up axis
        for axis, length in test['axes_lengths'].items():
            axis.length = length

        # set up tensors
        tensor1 = ng.placeholder(test['tensor1_axes'])
        value1 = np.array(test['tensor1'], dtype=np.float32)

        tensor2 = ng.placeholder(test['tensor2_axes'])
        value2 = np.array(
            test['tensor2'], dtype=np.float32
        )

        # compute outputs
        expected_output = np.array(test['expected_output'], dtype=np.float32)

        with ExecutorFactory() as ex:
            dot = ng.dot(tensor1, tensor2)
            evaluated_fun = ex.executor(dot, tensor1, tensor2)

            deriv1_fun_num = ex.numeric_derivative(dot, tensor1, 1e-3, tensor2)
            deriv1_fun_sym = ex.derivative(dot, tensor1, tensor2)

            deriv2_fun_num = ex.numeric_derivative(dot, tensor2, 1e-3, tensor1)
            deriv2_fun_sym = ex.derivative(dot, tensor2, tensor1)

            # assert outputs are equal
            evaluated = evaluated_fun(value1, value2)
            np.testing.assert_equal(evaluated, expected_output)

            # assert derivative wrt to both tensors is the same when computed
            # symbolically by ngraph and numerically
            deriv1_val_num = deriv1_fun_num(value1, value2)
            deriv1_val_sym = deriv1_fun_sym(value1, value2)
            ng.testing.assert_allclose(deriv1_val_num, deriv1_val_sym, rtol=1e-2, atol=1e-2)

            deriv2_val_num = deriv2_fun_num(value2, value1)
            deriv2_val_sym = deriv2_fun_sym(value2, value1)
            ng.testing.assert_allclose(deriv2_val_num, deriv2_val_sym, rtol=1e-2, atol=1e-2)
Esempio n. 22
0
def test_slice(transformer_factory):
    """TODO."""

    C = ng.make_axis(name='C')
    D = ng.make_axis(name='D')

    tests = [{
        'tensor': [[1, 3], [2, 5]],
        'tensor_axes': (C, D),
        'slice': [0, 1],
        'sliced_axes': (),
        'axes_lengths': {
            C: 2,
            D: 2
        },
        'expected': 3
    }, {
        'tensor': [[1, 3], [2, 5]],
        'tensor_axes': (C, D),
        'slice': [slice(None), 0],
        'sliced_axes': (C, ),
        'axes_lengths': {
            C: 2,
            D: 2
        },
        'expected': [1, 2]
    }, {
        'tensor': [[1, 3], [2, 5]],
        'tensor_axes': (C, D),
        'slice': [1, slice(None)],
        'sliced_axes': (D, ),
        'axes_lengths': {
            C: 2,
            D: 2
        },
        'expected': [2, 5]
    }, {
        'tensor': [[1, 4, 5], [2, 5, 6]],
        'tensor_axes': (C, D),
        'slice': [1, slice(1, 3)],
        'sliced_axes': None,
        'axes_lengths': {
            C: 2,
            D: 3
        },
        'expected': [5, 6]
    }, {
        'tensor': [[1, 4, 5], [2, 5, 6]],
        'tensor_axes': (C, D),
        'slice': [1, slice(None, None, -1)],
        'sliced_axes': None,
        'axes_lengths': {
            C: 2,
            D: 3
        },
        'expected': [6, 5, 2]
    }, {
        'tensor': [[1, 4, 5], [2, 5, 6]],
        'tensor_axes': (C, D),
        'slice': [slice(None, None, -1),
                  slice(None, None, -1)],
        'sliced_axes': None,
        'axes_lengths': {
            C: 2,
            D: 3
        },
        'expected': [[6, 5, 2], [5, 4, 1]]
    }]

    for test in tests:
        ex = ExecutorFactory()
        for axis, length in test['axes_lengths'].items():
            axis.length = length
        tensor_axes = test['tensor_axes']

        tensor_np = np.array(test['tensor'], dtype='float32')
        tensor = ng.placeholder(tensor_axes)
        expected = np.array(test['expected'], dtype='float32')

        s = test['slice']
        s_axes = test['sliced_axes']

        sliced = ng.Slice(tensor, s, s_axes)
        sliced_val_fun = ex.executor(sliced, tensor)

        num_deriv_fun = ex.numeric_derivative(sliced, tensor, delta)
        # Test backpropagation
        sym_deriv_fun = ex.derivative(sliced, tensor)

        sliced_val = sliced_val_fun(tensor_np)
        assert np.array_equal(sliced_val, expected)

        numeric_deriv = num_deriv_fun(tensor_np)
        sym_deriv = sym_deriv_fun(tensor_np)

        assert np.allclose(numeric_deriv, sym_deriv, rtol=rtol, atol=atol)
Esempio n. 23
0
def test_dot_sum_backprop(transformer_factory):
    delta = 1e-3
    rtol = atol = 1e-2

    C = ng.make_axis(length=2).named('C')
    N = ng.make_axis(length=3, name='N')

    x_axes = ng.make_axes((C - 1, N))
    y_axes = ng.make_axes((C,))
    x_np = np.random.random(x_axes.lengths).astype('float32')
    y_np = np.random.random(y_axes.lengths).astype('float32')

    x_np[...] = [[1.0, 0.0, 1.0], [2.0, 0.0, 3.0]]
    y_np[...] = [-1.0, 1.0]

    x = ng.placeholder(x_axes)
    y = ng.placeholder(y_axes)
    d = ng.dot(x, y)
    s = ng.sum(d, out_axes=())

    with ExecutorFactory() as ex:
        s_fun = ex.executor(s, x, y)
        d_fun = ex.executor(d, x, y)

        dd_dx_fun_num = ex.numeric_derivative(d, x, delta, y)
        dd_dx_fun_sym = ex.derivative(d, x, y)

        dd_dy_fun_num = ex.numeric_derivative(d, y, delta, x)
        dd_dy_fun_sym = ex.derivative(d, y, x)

        ds_dx_fun_num = ex.numeric_derivative(s, x, delta, y)
        ds_dx_fun_sym = ex.derivative(s, x, y)

        ds_dy_fun_num = ex.numeric_derivative(s, y, delta, x)
        ds_dy_fun_sym = ex.derivative(s, y, x)

        # assert outputs are equal
        d_np = x_np.T.dot(y_np)
        d_val = d_fun(x_np, y_np)
        ng.testing.assert_allclose(d_np, d_val, rtol=rtol, atol=atol)

        dd_dx_val_num = dd_dx_fun_num(x_np, y_np)
        dd_dx_val_sym = dd_dx_fun_sym(x_np, y_np)
        ng.testing.assert_allclose(dd_dx_val_num, dd_dx_val_sym, rtol=rtol, atol=atol)

        dd_dy_val_num = dd_dy_fun_num(y_np, x_np)
        dd_dy_val_sym = dd_dy_fun_sym(y_np, x_np)
        ng.testing.assert_allclose(dd_dy_val_num, dd_dy_val_sym, rtol=rtol, atol=atol)

        s_np = np.sum(d_np)
        s_val = s_fun(x_np, y_np)
        ng.testing.assert_allclose(s_val, s_np, rtol=rtol, atol=atol)

        # assert derivative wrt to both tensors is the same when computed
        # symbolically by ngraph and numerically
        ds_dx_val_num = ds_dx_fun_num(x_np, y_np)
        ds_dx_val_sym = ds_dx_fun_sym(x_np, y_np)
        ng.testing.assert_allclose(ds_dx_val_num, ds_dx_val_sym, rtol=rtol, atol=atol)

        ds_dy_val_num = ds_dy_fun_num(y_np, x_np)
        ds_dy_val_sym = ds_dy_fun_sym(y_np, x_np)
        ng.testing.assert_allclose(ds_dy_val_num, ds_dy_val_sym, rtol=rtol, atol=atol)
Esempio n. 24
0
def test_conv(transformer_factory):
    """
    TODO: make this more interesting
    """
    N, C, K = 64, 32, 32
    D, H, W = 1, 32, 32
    T, R, S = 1, 3, 3

    pad_d, pad_h, pad_w = 0, 0, 0
    str_d, str_h, str_w = 1, 1, 1
    dil_d, dil_h, dil_w = 1, 1, 1

    M = output_dim(D, T, pad_d, str_d)
    P = output_dim(H, R, pad_h, str_h)
    Q = output_dim(W, S, pad_w, str_w)

    padding = dict(pad_d=pad_d, pad_h=pad_h, pad_w=pad_w)
    strides = dict(str_d=str_d, str_h=str_h, str_w=str_w)
    dilation = dict(dil_d=dil_d, dil_h=dil_h, dil_w=dil_w)
    conv_params = padding.copy()
    conv_params.update(strides)
    conv_params.update(dilation)

    ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N])
    ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K])
    ax_i.set_shape((C, D, H, W, N))
    ax_f.set_shape((C, T, R, S, K))

    ax_o = ng.make_axes([
        ng.make_axis(name='C', roles=[ar.features_input]),
        ng.make_axis(name='D', roles=[ar.features_0]),
        ng.make_axis(name='H', roles=[ar.features_1]),
        ng.make_axis(name='W', roles=[ar.features_2]), ax.N
    ])

    ax_o[:-1].set_shape((K, M, P, Q))

    inputs = ng.placeholder(axes=ax_i)
    filters = ng.placeholder(axes=ax_f)

    # randomly initialize
    input_value = rng.uniform(-0.5, 0.5, ax_i)
    filter_value = rng.uniform(-0.5, 0.5, ax_f)
    error_value = rng.uniform(-0.5, 0.5, ax_o)

    assert input_value.shape == ax_i.lengths
    assert filter_value.shape == ax_f.lengths

    inputs = ng.placeholder(ax_i)
    filters = ng.placeholder(ax_f)
    errors = ng.placeholder(ax_o)

    output = ng.convolution(conv_params, inputs, filters, axes=ax_o)
    bprop_out = bprop_conv(errors, inputs, filters, output)
    updat_out = update_conv(errors, inputs, filters, output)

    with executor([output, bprop_out, updat_out], inputs, filters,
                  errors) as conv_executor:
        result_ng, gradI_ng, gradF_ng = conv_executor(input_value,
                                                      filter_value,
                                                      error_value)

    # Compute reference with NumPy
    result_np, gradI_np, gradF_np = reference_conv(C, N, K, D, H, W, T, R, S,
                                                   M, P, Q, pad_d, pad_h,
                                                   pad_w, str_d, str_h, str_w,
                                                   input_value, filter_value,
                                                   error_value)

    # Compare fprop
    assert np.allclose(result_ng, result_np, rtol=0, atol=0.5)

    # Compare bprop
    assert np.allclose(gradI_ng, gradI_np, rtol=0, atol=0.5)

    # Compare update
    assert np.allclose(gradF_ng, gradF_np, rtol=0, atol=2)
Esempio n. 25
0
def N():
    return ng.make_axis(length=1)
Esempio n. 26
0
def test_scatter_gather_node_axes():
    ax_A = ng.make_axis(64)
    ax_B = ng.make_axis(128)
    ax_C = ng.make_axis(255)

    tests = [{
        'axes': ng.make_axes([ax_A]),
        'parallel_axis': ax_A,
        'slices': [[slice(0, 32, 1)], [slice(32, 64, 1)]],
        'devices': (0, 1)
    }, {
        'axes':
        ng.make_axes([ax_A, ax_B]),
        'parallel_axis':
        ax_A,
        'slices': [[slice(0, 21, 1), slice(None)],
                   [slice(21, 42, 1), slice(None)],
                   [slice(42, 64, 1), slice(None)]],
        'devices': (0, 1, 2)
    }, {
        'axes':
        ng.make_axes([ax_A, ax_B, ax_C]),
        'parallel_axis':
        ax_A,
        'slices': [[slice(0, 12, 1), slice(None),
                    slice(None)], [slice(12, 24, 1),
                                   slice(None),
                                   slice(None)],
                   [slice(24, 36, 1),
                    slice(None), slice(None)],
                   [slice(36, 48, 1),
                    slice(None), slice(None)],
                   [slice(48, 64, 1),
                    slice(None), slice(None)]],
        'devices': (0, 1, 2, 3, 4)
    }, {
        'axes':
        ng.make_axes([ax_A, ax_B, ax_C]),
        'parallel_axis':
        ax_C,
        'slices': [[slice(None), slice(None),
                    slice(0, 127, 1)],
                   [slice(None), slice(None),
                    slice(127, 255, 1)]],
        'devices': (0, 1)
    }]

    for t in tests:
        gather_send_node = Gather_Send(from_node=ng.placeholder(()),
                                       axes=t['axes'],
                                       queue=None,
                                       device=None,
                                       device_id=None)
        assert t['axes'] == gather_send_node.axes

        gather_recv_node = Gather_Recv(axes=t['axes'],
                                       dtype=np.float32,
                                       parallel_axis=t['parallel_axis'],
                                       queues=None,
                                       send_node=gather_send_node,
                                       device=None,
                                       device_id=None,
                                       from_id=t['devices'])
        assert t['axes'] == gather_recv_node.axes
        assert t['slices'] == gather_recv_node.slices

        scatter_send_node = Scatter_Send(from_node=ng.placeholder(()),
                                         axes=t['axes'],
                                         parallel_axis=t['parallel_axis'],
                                         queues=None,
                                         device=None,
                                         device_id=None,
                                         to_id=t['devices'])
        assert t['axes'] == scatter_send_node.axes
        assert t['slices'] == scatter_send_node.slices

        scatter_recv_node = Scatter_Recv(axes=t['axes'],
                                         dtype=np.float32,
                                         queue=None,
                                         send_node=scatter_send_node,
                                         device=None,
                                         device_id=None)
        assert t['axes'] == scatter_recv_node.axes
Esempio n. 27
0
def check_lstm(seq_len,
               input_size,
               hidden_size,
               batch_size,
               init_func,
               return_seq=True,
               backward=False,
               reset_cells=False,
               num_iter=2):

    Cin = ng.make_axis(input_size, name='Feature')
    REC = ng.make_axis(seq_len, name='REC')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng = LSTM(hidden_size,
                       init_func,
                       activation=Tanh(),
                       gate_activation=Logistic(),
                       reset_cells=reset_cells,
                       return_sequence=return_seq,
                       backward=backward)

        out_ng = lstm_ng(inp_ng)

        fprop_neon_fun = copier(ex.executor((out_ng, lstm_ng.h_init), inp_ng))

        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_fun = copier_T(
            ex.executor(list(lstm_ng.W_input[k] for k in gates)))
        Whh_neon_fun = copier_T(
            ex.executor(list(lstm_ng.W_recur[k] for k in gates)))
        bh_neon_fun = copier(ex.executor(list(lstm_ng.b[k] for k in gates)))

        fprop_neon_list = []
        input_value_list = []

        for i in range(num_iter):
            # fprop on random inputs
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon, h_init_neon = fprop_neon_fun(input_value)

            if return_seq is True:
                fprop_neon = fprop_neon[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_list.append(fprop_neon)

            if reset_cells is False:
                # look at the last hidden states
                ng.testing.assert_allclose(fprop_neon[:, -1].reshape(-1, 1),
                                           h_init_neon,
                                           rtol=rtol,
                                           atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        Wxh_neon = Wxh_neon_fun()
        Whh_neon = Whh_neon_fun()
        bh_neon = bh_neon_fun()

        # reference numpy LSTM
        lstm_ref = RefLSTM()
        WLSTM = lstm_ref.init(input_size, hidden_size)

        # make ref weights and biases with neon model
        WLSTM[0, :] = np.concatenate(bh_neon)
        WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1)
        WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1)

        # transpose input X and do fprop
        fprop_ref_list = []
        c0 = h0 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref, cprev, hprev,
             batch_cache) = lstm_ref.forward(inp_ref, WLSTM, c0, h0)
            if reset_cells is False:
                c0 = cprev
                h0 = hprev

            # the output needs transpose as well
            Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
            fprop_ref_list.append(Hout_ref)

        for i in range(num_iter):
            ng.testing.assert_allclose(fprop_neon_list[i],
                                       fprop_ref_list[i],
                                       rtol=rtol,
                                       atol=atol)
Esempio n. 28
0
def feature_axis(input_size):
    return ng.make_axis(input_size, name='Fin')
Esempio n. 29
0
def test_linear_invalid_batch_axes():
    with pytest.raises(ValueError):
        Linear(axes=ng.make_axis(1, name='N'), init=UniformInit(1.0, 1.0))
Esempio n. 30
0
 def make_placeholders(self):
     ax.N.length = self.batch_size
     time_axis = ng.make_axis(length=self.time_steps).named('time')
     p_axes = ng.make_axes([ax.N, time_axis])
     return {k: ng.placeholder(p_axes) for k in self.data_arrays.keys()}
Esempio n. 31
0
def test_linear_invalid_shadow_axes():
    with pytest.raises(ValueError):
        Linear(axes=make_shadow_axis(ng.make_axis(1, name='A')),
               init=UniformInit(1.0, 1.0))
Esempio n. 32
0
loss = ng.cross_entropy_multi(output_prob,
                              ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                              usebits=True)
mean_cost = ng.mean(loss, out_axes=[])
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss)

# inference graph
with Layer.inference_mode_on():
    enc_out_inference = enc(one_hot_enc_out)

    # Create decoder placeholders
    axes = one_hot_dec_out.axes
    axes = axes - axes.recurrent_axis() + ng.make_axis(length=1, name="REC")
    decoder_input_inference = ng.placeholder(axes, name="input")
    decoder_state_inference = ng.placeholder(enc_out_inference.axes,
                                             name="state")
    dec_out_inference = dec(decoder_input_inference,
                            init_state=decoder_state_inference)
    inference_out = linear(dec_out_inference)

encoder_computation = ng.computation(enc_out_inference, inputs["inp_txt"])
decoder_computation = ng.computation([inference_out, dec_out_inference],
                                     decoder_input_inference,
                                     decoder_state_inference)

######################
# Train Loop
Esempio n. 33
0
# Initialize Answer Pointer Cell
answer_init = AnswerPointer_withAttention(
    params_dict,
    hidden_size,
    init=W_in_a,
    init_h2h=W_rec_a,
    bias_init=b_a,
    activation=Tanh(),
    gate_activation=Logistic(),
    reset_cells=True)


# Make  Required Axes
# Axis with length of batch size
N = ng.make_axis(length=params_dict['batch_size'], name='N')
# Axis with length of max question
REC = ng.make_axis(length=max_question, name='REC')
# Axis with length of hidden unit size
F = ng.make_axis(length=hidden_size, name='F')
# Axis with length of embedding size
F_embed = ng.make_axis(length=300, name='F_embed')
# Axis with length 1
dummy_axis = ng.make_axis(length=1, name='dummy_axis')
# Axis with length of answer span
span = ng.make_axis(length=2, name='span')


# Set up drop out layer
dropout_val = ng.slice_along_axis(inputs['dropout_val'], N, 0)
dropout_1 = Dropout_Modified(keep=dropout_val)
Esempio n. 34
0
def test_conv_flatten_deriv(transformer_factory, n4_hw12_c3_5x5):
    """
    Test deriv of conv followed by flatten
    """
    cf = ConvParams(**n4_hw12_c3_5x5)

    axes_rsck = ng.make_axes([cf.ax_f[2], cf.ax_f[3], cf.ax_f[0], cf.ax_f[-1]])
    axes_rsck_prime = ng.make_axes([
        ng.make_axis(name=ax.name + 'p', length=ax.length) for ax in axes_rsck
    ])
    axes_nmpqk = ng.make_axes(
        [cf.ax_o[-1], cf.ax_o[1], cf.ax_o[2], cf.ax_o[3], cf.ax_o[0]])

    # broadcast input / filter axes
    input_var = ng.variable(cf.ax_i).named('input')
    input_val = np.ones(input_var.axes.lengths)

    filter_rsck_prime = ng.variable(axes_rsck_prime).named('filter')
    filter_var = filter_rsck_prime
    filter_rsck = ng.cast_axes(filter_rsck_prime, axes_rsck).named('frsck')
    filter_trsck = ng.expand_dims(filter_rsck, cf.ax_f[1], 0).named('ftrsck')
    filter_ctrsk = ng.axes_with_order(filter_trsck,
                                      axes=cf.ax_f).named('ctrsk')

    # convolution
    output_kmpqn = ng.convolution(cf.conv_params,
                                  input_var,
                                  filter_ctrsk,
                                  axes=cf.ax_o)
    output_nmpqk = ng.axes_with_order(output_kmpqn, axes=axes_nmpqk)

    # slice away the oD
    out_slicing = [slice(None), 0, slice(None), slice(None), slice(None)]
    output_npqk = ng.tensor_slice(output_nmpqk, out_slicing)

    output = ng.flatten_at(output_npqk, idx=1)

    # cost and grad
    cost = ng.sum(output, out_axes=())

    filter_val = np.ones(filter_var.axes.lengths)

    with ExecutorFactory() as factory:

        conv_comp = factory.executor(output, filter_var, input_var)
        grad_filter_num_comp = factory.numeric_derivative(
            cost, filter_var, 1.0, input_var)
        grad_filter_sym_comp = factory.derivative(cost, filter_var, input_var)

        grad_input_num_comp = factory.numeric_derivative(
            cost, input_var, 1.0, filter_var)
        grad_input_sym_comp = factory.derivative(cost, input_var, filter_var)

        conv_val = conv_comp(filter_val, input_val)
        conv_val_num = np.empty_like(conv_val)
        conv_val_num.fill(np.prod(cf.ax_f.lengths[:-1]))
        assert ng.testing.allclose(conv_val, conv_val_num)

        grad_filter_num_val = grad_filter_num_comp(filter_val, input_val)
        grad_filter_sym_val = grad_filter_sym_comp(filter_val, input_val)
        assert ng.testing.allclose(grad_filter_num_val, grad_filter_sym_val)

        grad_input_num_val = grad_input_num_comp(input_val, filter_val)
        grad_input_sym_val = grad_input_sym_comp(input_val, filter_val)
        assert ng.testing.allclose(grad_input_num_val, grad_input_sym_val)
Esempio n. 35
0
    def _expand_input_axes(self, inputs):
        """
        Expand 1D or 2D input into 3D input.

        Arguments:
            axes: Convolution input's axes.

        Returns:
            Expanded list of input's axes.
        """
        axes = inputs.axes
        dim = len(axes)
        batch = axes.batch_axis()

        if dim == 5:
            C, D, H, W, N = axes
        elif dim == 4:
            if batch:
                C, H, W, N = axes
                D = ng.make_axis(1)
            else:
                C, D, H, W = axes
                N = ng.make_axis(1, 'N')
        elif dim == 3:
            if batch:
                H, W, N = axes
                C = ng.make_axis(1)
                D = ng.make_axis(1)
            else:
                C, H, W = axes
                D = ng.make_axis(1)
                N = ng.make_axis(1, 'N')
        elif dim == 2:
            if batch:
                H, N = axes
                C = ng.make_axis(1)
                D = ng.make_axis(1)
                W = ng.make_axis(1)
            else:
                H, W = axes
                C = ng.make_axis(1)
                D = ng.make_axis(1)
                N = ng.make_axis(1, 'N')
        else:
            raise ValueError("Convolution input must have 2 to 5 axes.")

        return ng.broadcast(inputs, [C, D, H, W, N])
Esempio n. 36
0
def batch_axis(request):
    return ng.make_axis(request.param, name='N')
Esempio n. 37
0
    def _make_out_axes(self, shape):
        """
        Make output convolution axes.

        Arguments:
            shape: CNTK convolution output shape.

        Returns:
            List of dynamic output axes.
        """
        dim = len(shape)
        if dim == 4:
            M = ng.make_axis(shape[1])
            oH = ng.make_axis(shape[2])
            oW = ng.make_axis(shape[3])
        elif dim == 3:
            M = ng.make_axis(1)
            oH = ng.make_axis(shape[1])
            oW = ng.make_axis(shape[2])
        elif dim == 2:
            M = ng.make_axis(1)
            oH = ng.make_axis(shape[1])
            oW = ng.make_axis(1)
        elif dim == 1:
            M = ng.make_axis(1)
            oH = ng.make_axis(1)
            oW = ng.make_axis(1)

        return [M, oH, oW]
Esempio n. 38
0
wikimovies = WIKIMOVIES(args.data_dir,
                        subset=args.subset,
                        reparse=args.reparse,
                        mem_source=args.mem_mode)

ndata = wikimovies.data_dict['train']['query']['data'].shape[0]
num_iterations = ndata // args.batch_size

train_set = ArrayIterator(wikimovies.data_dict['train'],
                          batch_size=args.batch_size,
                          total_iterations=num_iterations)
test_set = ArrayIterator(wikimovies.data_dict['test'],
                         batch_size=args.batch_size)
inputs = train_set.make_placeholders()
vocab_axis = ng.make_axis(length=wikimovies.vocab_size, name='vocab_axis')

memn2n = KVMemN2N(num_iterations, args.batch_size, args.emb_size, args.nhops,
                  wikimovies.story_length, wikimovies.memory_size,
                  wikimovies.vocab_size, vocab_axis, args.use_v_luts)
# Compute answer predictions
a_pred, _ = memn2n(inputs)

loss = ng.cross_entropy_multi(a_pred,
                              ng.one_hot(inputs['answer'], axis=vocab_axis),
                              usebits=True)

mean_cost = ng.sum(loss, out_axes=[])

optimizer = Adam(learning_rate=args.lr)