Ejemplo n.º 1
0
    'gamma': (1 / 250.)**(1 / 3.),
    'schedule': [22, 44, 65]
}
optimizer = GradientDescentMomentum(lr_schedule,
                                    0.0,
                                    wdecay=0.0005,
                                    iteration=inputs['iteration'])
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, "all")

with closing(ngt.make_transformer()) as transformer:
    train_function = transformer.add_computation(train_computation)

    if args.no_progress_bar:
        ncols = 0
    else:
        ncols = 100

    tpbar = tqdm(unit="batches", ncols=ncols, total=args.num_iterations)
    interval_cost = 0.0

    for step, data in enumerate(train_set):
        data['iteration'] = step
        feed_dict = {inputs[k]: data[k] for k in inputs.keys()}
        output = train_function(feed_dict=feed_dict)
Ejemplo n.º 2
0
                     labels=inputs['answer'], drop=dropout_val)

# Inference Mode for validation dataset:
with Layer.inference_mode_on():
    eval_outputs = dict(logits=ng.stack(logits_concat, span, 1),
                        labels=inputs['answer'], drop=drop_pointer)


# Now bind the computations we are interested in
print('generating transformer')
eval_frequency = 20
val_frequency = np.ceil(len(train['para']['data']) / params_dict['batch_size'])
train_error_frequency = 1000

# Create Transformer
transformer = ngt.make_transformer()
train_computation = make_bound_computation(transformer, train_outputs, inputs)
valid_computation = make_bound_computation(transformer, eval_outputs, inputs)


'''
TODO: Include feature to Save and load weights
'''

#Ensure batch size is greater than 0
assert(params_dict['batch_size'] > 0)

# Start Itearting through
epoch_no = 0

for idx, data in enumerate(train_set):
Ejemplo n.º 3
0
 def __enter__(self):
     self.transformer = ngt.make_transformer()
     return self
Ejemplo n.º 4
0
    Preprocess(functor=cifar_mean_subtract),
    Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()),
    Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())
])

optimizer = GradientDescentMomentum(0.1, 0.9)
output_prob = seq1.train_outputs(inputs['image'])
errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label'])
loss = ng.cross_entropy_multi(output_prob,
                              ng.one_hot(inputs['label'], axis=ax.Y))
mean_cost = ng.mean(loss, out_axes=())
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors)

# Now bind the computations we are interested in
transformer = ngt.make_transformer()
train_computation = make_bound_computation(transformer, train_outputs, inputs)
loss_computation = make_bound_computation(transformer, loss_outputs, inputs)

cbs = make_default_callbacks(output_file=args.output_file,
                             frequency=args.iter_interval,
                             train_computation=train_computation,
                             total_iterations=args.num_iterations,
                             eval_set=valid_set,
                             loss_computation=loss_computation,
                             use_progress_bar=args.progress_bar)

loop_train(train_set, train_computation, cbs)
Ejemplo n.º 5
0
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------

from __future__ import print_function
import tensorflow as tf
import ngraph.transformers as ngt
from ngraph.frontends.tensorflow.tf_importer.importer import TFImporter

# tensorflow ops
x = tf.constant(1.)
y = tf.constant(2.)
f = x + y

# import
importer = TFImporter()
importer.import_graph_def(tf.get_default_graph().as_graph_def())

# get handle
f_ng = importer.get_op_handle(f)

# execute
f_result = ngt.make_transformer().computation(f_ng)()
print(f_result)
def linear_regression(iter_num, lrate, gamma, step_size, noise_scale):
    # data multiplier
    m = 3
    # batch_len and data
    xs_np = np.array(
        [[0, 0], [1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [-1.0, -1.0]], dtype='f')
    ys_np = np.array([[0.5 * m], [2.5 * m], [4.5 * m], [6.5 * m], [-1.5 * m]],
                     dtype='f')
    batch_len = len(ys_np)

    # with these values we have the following target weight and bias
    # to be approximated after computation:
    target_b = 0.5 * m
    target_w = np.array([1.0, 1.0]) * m

    # noise amplitude and noise generation
    noise_l = np.array(noise_scale * np.random.randn(batch_len), dtype='f')
    noise = [[i] for i in noise_l]

    # caffe2 init network
    init_net = core.Net("init")
    ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
    ITER = init_net.ConstantFill([],
                                 "ITER",
                                 shape=[1],
                                 value=0,
                                 dtype=core.DataType.INT32)

    # for the parameters to be learned: we randomly initialize weight
    # being output scalar, and two variables, W is 1x2, X is 2x1
    W = init_net.UniformFill([], "W", shape=[1, 2], min=-1., max=1.)
    B = init_net.ConstantFill([], "B", shape=[1], value=0.0)
    print('Created init net.')

    # caffe2 train net
    train_net = core.Net("train")

    # definition of external inputs: X, ground truth and noisy version of truth
    workspace.FeedBlob('X', xs_np)
    workspace.FeedBlob('Y_gt', ys_np)
    workspace.FeedBlob('Y_noise', ys_np + noise)
    train_net.AddExternalInput("X")
    train_net.AddExternalInput("Y_noise")
    train_net.AddExternalInput("Y_gt")

    # now, for the normal linear regression prediction, this is all we need.
    Y_pred = train_net.FC(["X", W, B], "Y_pred")

    # when it will be computing the loss, we want to refer to the noisy version of the truth:
    dist = train_net.SquaredL2Distance(["Y_noise", Y_pred], "dist")
    loss = dist.AveragedLoss([], ["loss"])

    # Caffe2 creation of the initialization and training nets, needed to have objects created
    # and therefore handlers can be obtained by the importer
    workspace.CreateNet(init_net)
    workspace.CreateNet(train_net)

    # importing in ngraph caffe2 network
    print("\n\n---------------------ngraph behaviour:")
    importer = C2Importer()
    importer.parse_net_def(net_def=train_net.Proto(),
                           init_net_def=init_net.Proto(),
                           c2_workspace=workspace)

    # Get handles to the various objects we are interested to for ngraph computation
    y_gt_ng, x_ng, w_ng, b_ng, y_pred_ng, dist_ng, loss_ng =  \
        importer.get_op_handle(['Y_noise', 'X', 'W', 'B', 'Y_pred', 'dist', 'loss'])

    # setting learning rate for ngraph, that matches the one that it will be used for caffe2 below
    lr_params = {
        'name': 'step',
        'base_lr': lrate,
        'gamma': gamma,
        'step': step_size
    }

    SGD = util.CommonSGDOptimizer(lr_params)
    parallel_update = SGD.minimize(loss_ng, [w_ng, b_ng])
    transformer = ngt.make_transformer()
    update_fun = transformer.computation(
        [loss_ng, w_ng, b_ng, parallel_update], x_ng, y_gt_ng,
        SGD.get_iter_buffer())

    true_iter = [0]
    # ngraph actual computation
    for i in range(iter_num // batch_len):
        for xs, ys in zip(xs_np, ys_np + noise):
            loss_val, w_val, b_val, _ = update_fun(xs, ys, i)
            # print("N it: %s W: %s, B: %s loss %s " % (i, w_val, b_val, loss_val))
            true_iter[0] += 1

    print("Ngraph loss %s " % (loss_val))

    # end of ngraph part

    # caffe2 backward pass and computation to compare results with ngraph
    gradient_map = train_net.AddGradientOperators([loss])

    # Increment the iteration by one.
    train_net.Iter(ITER, ITER)

    # Caffe2 backward pass and computation
    # Get gradients for all the computations above and do the weighted sum
    LR = train_net.LearningRate(ITER,
                                "LR",
                                base_lr=-lrate,
                                policy="step",
                                stepsize=step_size,
                                gamma=gamma)
    train_net.WeightedSum([W, ONE, gradient_map[W], LR], W)
    train_net.WeightedSum([B, ONE, gradient_map[B], LR], B)
    workspace.RunNetOnce(init_net)
    workspace.CreateNet(train_net)

    for i in range(iter_num):
        workspace.RunNet(train_net.Proto().name)
        # print("During training, loss is: {}".format(workspace.FetchBlob("loss")))

    print("Caffe2 loss is: {}".format(workspace.FetchBlob("loss")))
    # end of caffe2 part

    # printing out results
    print(
        "Done {} iterations over the batch data, with noise coefficient set to {}"
        .format(iter_num, noise_scale))
    print("Caffe2 after training, W is: {}".format(workspace.FetchBlob("W")))
    print("Caffe2 after training, B is: {}".format(workspace.FetchBlob("B")))
    print("Ngraph after training, W is: {}".format(w_val))
    print("Ngraph after training, B is: {}".format(b_val))
    print("Target W was: {}".format(target_w))
    print("Target B was: {}".format(target_b))

    assert (workspace.FetchBlob("loss") < 0.01)
    assert (loss_val < 0.01)
Ejemplo n.º 7
0
# provide outputs for bound computation
train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred)

with Layer.inference_mode_on():
    a_pred_inference, attention_inference = memn2n(inputs)
    eval_loss = ng.cross_entropy_multi(
        a_pred_inference, inputs['answer'], usebits=True)

interactive_outputs = dict(
    test_preds=a_pred_inference,
    attention=attention_inference)
eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference)

# Train Loop
with closing(ngt.make_transformer()) as transformer:
    # bind the computations
    train_computation = make_bound_computation(
        transformer, train_outputs, inputs)
    loss_computation = make_bound_computation(
        transformer, eval_outputs, inputs)
    interactive_computation = make_bound_computation(
        transformer, interactive_outputs, inputs)

    weight_saver.setup_save(transformer=transformer, computation=train_outputs)

    if args.restore and os.path.exists(weights_save_path):
        print("Loading weights from {}".format(weights_save_path))
        weight_saver.setup_restore(
            transformer=transformer,
            computation=train_outputs,
Ejemplo n.º 8
0
def mnist_mlp(args):
    mnist = input_data.read_data_sets(args.data_dir, one_hot=False)

    train_x, train_y = mnist.train.next_batch(args.batch)
    # we have to feed blobs with some data, to give them valid shape,
    # because ngraph will import this shape
    workspace.FeedBlob('train_x', train_x)
    # currently caffe2 accepts only int32 data type
    workspace.FeedBlob('train_y', train_y.astype('int32'))

    init_net = core.Net('init')
    main_net = core.Net('main')

    # definition of number of neurons for each hidden layer
    fc_size = [784, 512, 128, 10]
    init_net.UniformFill([],
                         'fc_w1',
                         shape=[fc_size[1], fc_size[0]],
                         min=-.5,
                         max=.5)
    init_net.UniformFill([],
                         'fc_w2',
                         shape=[fc_size[2], fc_size[1]],
                         min=-.5,
                         max=.5)
    init_net.UniformFill([],
                         'fc_w3',
                         shape=[fc_size[3], fc_size[2]],
                         min=-.5,
                         max=.5)
    init_net.UniformFill([], 'fc_b1', shape=[fc_size[1]], min=-.5, max=.5)
    init_net.UniformFill([], 'fc_b2', shape=[fc_size[2]], min=-.5, max=.5)
    init_net.UniformFill([], 'fc_b3', shape=[fc_size[3]], min=-.5, max=.5)

    main_net.FC(['train_x', 'fc_w1', 'fc_b1'], 'FC1')
    main_net.Relu('FC1', 'activ1')
    main_net.FC(['activ1', 'fc_w2', 'fc_b2'], 'FC2')
    main_net.Relu('FC2', 'activ2')
    main_net.FC(['activ2', 'fc_w3', 'fc_b3'], 'FC3')
    main_net.Softmax('FC3', 'softmax')
    main_net.LabelCrossEntropy(['softmax', 'train_y'], 'xent')
    main_net.AveragedLoss('xent', 'loss')

    # Ngraph part
    if ng_on:
        print('>>>>>>>>>>>>>> Ngraph')
        # import graph_def
        importer = C2Importer()
        importer.parse_net_def(net_def=main_net.Proto(),
                               init_net_def=init_net.Proto(),
                               c2_workspace=workspace)

        # get handle of ngraph ops
        x_train_ng, y_train_ng, loss_ng, \
            fc_w1_ng, fc_w2_ng, fc_w3_ng, fc_b1_ng, fc_b2_ng, fc_b3_ng = importer.get_op_handle(
                ['train_x', 'train_y', 'loss',
                 'fc_w1', 'fc_w2', 'fc_w3', 'fc_b1', 'fc_b2', 'fc_b3'])

        # setting learning rate for ngraph,
        # that matches the one that it will be used for caffe2 below
        alpha = ng.placeholder(axes=(), initial_value=[args.lrate])

        # transformer and computations
        parallel_update = util.CommonSGDOptimizer(args.lrate) \
            .minimize(loss_ng, [fc_w1_ng, fc_w2_ng, fc_w3_ng, fc_b1_ng, fc_b2_ng, fc_b3_ng])
        transformer = ngt.make_transformer()
        update_fun = transformer.computation([loss_ng, parallel_update], alpha,
                                             x_train_ng, y_train_ng)

        # train
        # ngraph actual computation
        for i in range(args.max_iter):
            train_x, train_y = mnist.train.next_batch(args.batch)
            lr = args.lrate * (1 + args.gamma * i)**(-args.power)
            loss_val, _ = update_fun(lr, train_x, train_y)
            if args.verbose and i % log_interval == 0:
                print('iter %s, loss %s ' % (i, loss_val))
    # ======================================
    if c2_on:
        mnist = input_data.read_data_sets(args.data_dir, one_hot=False)
        print('>>>>>>>>>>>>>> Caffe')
        # caffe2 backward pass and computation to compare results with ngraph
        init_net.ConstantFill([], 'ONE', shape=[1], value=1.)
        init_net.ConstantFill([],
                              'ITER',
                              shape=[1],
                              value=0,
                              dtype=core.DataType.INT32)
        gradient_map = main_net.AddGradientOperators(['loss'])

        # Increment the iteration by one.
        main_net.Iter('ITER', 'ITER')

        # Caffe2 backward pass and computation
        # Get gradients for all the computations above and do the weighted sum
        main_net.LearningRate('ITER',
                              'LR',
                              base_lr=-args.lrate,
                              policy='inv',
                              power=args.power,
                              gamma=args.gamma)

        main_net.WeightedSum(['fc_w1', 'ONE', gradient_map['fc_w1'], 'LR'],
                             'fc_w1')
        main_net.WeightedSum(['fc_w2', 'ONE', gradient_map['fc_w2'], 'LR'],
                             'fc_w2')
        main_net.WeightedSum(['fc_w3', 'ONE', gradient_map['fc_w3'], 'LR'],
                             'fc_w3')
        main_net.WeightedSum(['fc_b1', 'ONE', gradient_map['fc_b1'], 'LR'],
                             'fc_b1')
        main_net.WeightedSum(['fc_b2', 'ONE', gradient_map['fc_b2'], 'LR'],
                             'fc_b2')
        main_net.WeightedSum(['fc_b3', 'ONE', gradient_map['fc_b3'], 'LR'],
                             'fc_b3')
        workspace.RunNetOnce(init_net)
        workspace.CreateNet(main_net)

        for i in range(args.max_iter):
            train_x, train_y = mnist.train.next_batch(args.batch)
            workspace.FeedBlob('train_x', train_x)
            workspace.FeedBlob('train_y', train_y.astype('int32'))
            workspace.RunNet(main_net.Proto().name)
            if args.verbose and i % log_interval == 0:
                print('Iter: {}, C2 loss is: {}'.format(
                    i, workspace.FetchBlob('loss')))
        # end of caffe2 part

    if ng_on:
        print('Ngraph loss is: %s' % loss_val)
    if c2_on:
        print('Caffe2 loss is: {}'.format(workspace.FetchBlob('loss')))
Ejemplo n.º 9
0
# ----------------------------------------------------------------------------
# Copyright 2016 Nervana Systems Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------

from __future__ import print_function
import ngraph.transformers as ngt
from ngraph.frontends.caffe.cf_importer.importer import parse_prototxt

model = "sum.prototxt"
# import graph from the prototxt
op_map = parse_prototxt(model, verbose=True)
# get the op handle for any layer
op = op_map.get("D")
# execute the op handle
res = ngt.make_transformer().computation(op)()
print("Result is:", res)
# EOF
Ejemplo n.º 10
0
def test_empty_finalize():
    """Evaluating an empty NumPyTransformer shouldn't raise any exceptions."""
    ngt.make_transformer().initialize()
Ejemplo n.º 11
0
def train_network(model, train_set, valid_set, batch_size, epochs, log_file):
    '''
    Trains the predefined network. Trains the model and saves the progress in
    the log file that is defined in the arguments

    model(object): Defines the model in Neon
    train_set(object): Defines the training set
    valid_set(object): Defines the validation set
    args(object): Training arguments
    batch_size(int): Minibatch size
    epochs(int): Number of training epoch
    log_file(string): File name to store trainig logs for plotting

    '''

    # Form placeholders for inputs to the network
    # Iterations needed for learning rate schedule
    inputs = train_set.make_placeholders(include_iteration=True)

    # Convert labels into one-hot vectors
    one_hot_label = ng.one_hot(inputs['label'], axis=ax.Y)

    learning_rate_policy = {
        'name': 'schedule',
        'schedule': list(np.arange(2, epochs, 2)),
        'gamma': 0.6,
        'base_lr': 0.001
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.9,
                                        wdecay=0.005,
                                        iteration=inputs['iteration'])

    # Define graph for training
    train_prob = model(inputs['video'])
    train_loss = ng.cross_entropy_multi(train_prob, one_hot_label)
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])

    with closing(ngt.make_transformer()) as transformer:

        # Define graph for calculating validation set error and misclassification rate
        # Use inference mode for validation to avoid dropout in forward pass
        with Layer.inference_mode_on():
            inference_prob = model(inputs['video'])
            errors = ng.not_equal(ng.argmax(inference_prob), inputs['label'])
            eval_loss = ng.cross_entropy_multi(inference_prob, one_hot_label)
            eval_outputs = {'cross_ent_loss': eval_loss, 'misclass': errors}

            eval_computation = make_bound_computation(transformer,
                                                      eval_outputs, inputs)

        train_outputs = {'batch_cost': batch_cost}
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)

        interval_cost = 0.0

        # Train in epochs
        logs = {'train': [], 'validation': [], 'misclass': []}
        for epoch in trange(epochs, desc='Epochs'):

            # Setup the training bar
            numBatches = train_set.ndata // batch_size
            tpbar = tqdm(unit='batches',
                         ncols=100,
                         total=numBatches,
                         leave=False)

            train_set.reset()
            valid_set.reset()

            train_log = []
            for step, data in enumerate(train_set):
                data = dict(data)
                data['iteration'] = epoch  # learning schedule based on epochs
                output = train_computation(data)
                train_log.append(float(output['batch_cost']))

                tpbar.update(1)
                tpbar.set_description("Training {:0.4f}".format(
                    float(output['batch_cost'])))
                interval_cost += float(output['batch_cost'])
            tqdm.write("Epoch {epch}  complete. "
                       "Avg Train Cost {cost:0.4f}".format(epch=epoch,
                                                           cost=interval_cost /
                                                           step))
            interval_cost = 0.0
            tpbar.close()
            validation_loss = run_validation(valid_set, eval_computation)
            tqdm.write("Avg losses: {}".format(validation_loss))
            logs['train'].append(train_log)
            logs['validation'].append(validation_loss['cross_ent_loss'])
            logs['misclass'].append(validation_loss['misclass'])

            # Save log data and plot at the end of each epoch
            with open(log_file, 'wb') as f:
                pickle.dump(logs, f)
            plot_logs(logs=logs)