Ejemplo n.º 1
0
def build_regressor_computations():
    train_preds = predictions(encoder, affine_layer, inputs['X'])
    train_loss = ng.squared_L2(train_preds - inputs['y'])

    # Cost calculation
    batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    with Layer.inference_mode_on():
        eval_preds = predictions(encoder, affine_layer, inputs['X'])
        eval_loss = ng.mean(ng.squared_L2(eval_preds - inputs['y']), out_axes=())
    loss_computation = ng.computation([eval_loss], "all")

    return train_computation, loss_computation
Ejemplo n.º 2
0
def build_seq2seq_computations():
    # Training loss, optimizer
    train_decoded = recurrent_model.encode_and_decode(encoder, decoder,
                                                      inputs['X'], previous)
    train_loss = ng.squared_L2(target - train_decoded)
    batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    # Evaluation loss
    with Layer.inference_mode_on():
        eval_decoded = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes)
        eval_loss = ng.mean(ng.squared_L2(target - eval_decoded), out_axes=())
    loss_computation = ng.computation([eval_loss], "all")
    return train_computation, loss_computation
Ejemplo n.º 3
0
    def __call__(self, in_obj):

        in_axes = in_obj.axes
        if in_axes.channel_axis() is None:
            red_axes = ng.make_axes(in_axes.recurrent_axis()) + in_axes.batch_axes()
        else:
            red_axes = in_axes - in_axes.channel_axis()

        out_axes = in_axes - red_axes

        in_obj = ng.flatten(in_obj, out_axes | red_axes.flatten(force=True))
        if self.gamma is None:
            self.gvar = self.gvar or ng.persistent_tensor(axes=out_axes, initial_value=1.0)
            self.gmean = self.gmean or ng.persistent_tensor(axes=out_axes, initial_value=0.0)
            self.gamma = ng.variable(axes=out_axes,
                                     initial_value=self.init_gamma,
                                     scope=self.scope).named('gamma')
            self.beta = ng.variable(axes=out_axes,
                                    initial_value=self.init_beta,
                                    scope=self.scope).named('beta')

        xmean = ng.mean(in_obj, out_axes=out_axes)
        xvar = ng.variance(in_obj, out_axes=out_axes)

        if Layer.inference_mode:
            return ng.unflatten(self.gamma * ((in_obj - self.gmean) *
                                ng.reciprocal(ng.sqrt(self.gvar + self.eps))) + self.beta)
        else:
            return ng.sequential([
                ng.assign(self.gmean, self.gmean * self.rho + xmean * (1.0 - self.rho)),
                ng.assign(self.gvar, self.gvar * self.rho + xvar * (1.0 - self.rho)),
                ng.unflatten(self.gamma * ((in_obj - xmean) *
                             ng.reciprocal(ng.sqrt(xvar + self.eps))) + self.beta)
            ])
Ejemplo n.º 4
0
    def train_outputs(self, in_obj):
        in_axes = in_obj.axes.sample_axes()
        red_axes = ng.make_axes()
        if len(in_axes.role_axes(ar.features_input)) != 0:
            red_axes += in_axes.sample_axes() - in_axes.role_axes(
                ar.features_input)
        red_axes += in_obj.axes.batch_axes()
        out_axes = in_axes - red_axes

        self.gamma = self.gamma or ng.variable(
            axes=out_axes, initial_value=1.0).named('gamma')
        self.beta = self.beta or ng.variable(axes=out_axes,
                                             initial_value=0.0).named('beta')
        self.gvar = self.gvar or ng.persistent_tensor(axes=out_axes,
                                                      initial_value=1.0)
        self.gmean = self.gmean or ng.persistent_tensor(axes=out_axes,
                                                        initial_value=0.0)

        xmean = ng.mean(in_obj, reduction_axes=red_axes)
        xvar = ng.variance(in_obj, reduction_axes=red_axes)
        return ng.sequential([
            ng.assign(self.gmean,
                      self.gmean * self.rho + xmean * (1.0 - self.rho)),
            ng.assign(self.gvar,
                      self.gvar * self.rho + xvar * (1.0 - self.rho)),
            self.gamma * (in_obj - xmean) / ng.sqrt(xvar + self.eps) +
            self.beta
        ])
Ejemplo n.º 5
0
def create_loss_and_learner(model,
                            labels,
                            learning_rate,
                            momentum_coef=0.0,
                            wdecay=0.0,
                            nesterov=False,
                            gradient_clip_norm=None,
                            gradient_clip_value=None):
    """
    Auxiliary function to create loss function (cross entropy and softmax)
    and trainer using stochastic gradient descent with momentum.

    Arguments:
        model - imported model
        labels - placeholder for one-hot labels array
        learning_rate - learning rate for trainer
        momentum_coef - coefficient of momentum (deafult 0.0)
        wdecay - amount of weight decay (default 0.0)
        nesterov - use nesterov accelerated gradient (dafault False)
        gradient_clip_norm - target gradient norm (default None)
        gradient_clip_value - value to element-wise clip gradients (default None)

    Returns:
        Loss function (mean for batch)
    """
    if model.axes.lengths != labels.axes.lengths:
        labels = ng.Transpose(labels)
    assert model.axes.lengths == labels.axes.lengths
    model = ng.cast_axes(model, axes=labels.axes)

    loss = ng.cross_entropy_multi(ng.softmax(model), labels)
    optimizer = GradientDescentMomentum(learning_rate, momentum_coef, wdecay,
                                        gradient_clip_norm,
                                        gradient_clip_value, nesterov)
    return ng.sequential([optimizer(loss), ng.mean(loss, out_axes=())])
Ejemplo n.º 6
0
    def _build_module(self, input_layer):
        # Dueling Network
        # state value tower - V
        output_axis = ng.make_axis(self.num_actions, name='q_values')

        state_value = neon.Sequential([
            neon.Affine(nout=256,
                        activation=neon.Rectlin(),
                        weight_init=self.weights_init,
                        bias_init=self.biases_init),
            neon.Affine(nout=1,
                        weight_init=self.weights_init,
                        bias_init=self.biases_init)
        ])(input_layer)

        # action advantage tower - A
        action_advantage_unnormalized = neon.Sequential([
            neon.Affine(nout=256,
                        activation=neon.Rectlin(),
                        weight_init=self.weights_init,
                        bias_init=self.biases_init),
            neon.Affine(axes=output_axis,
                        weight_init=self.weights_init,
                        bias_init=self.biases_init)
        ])(input_layer)
        action_advantage = action_advantage_unnormalized - ng.mean(
            action_advantage_unnormalized)

        repeated_state_value = ng.expand_dims(
            ng.slice_along_axis(state_value, state_value.axes[0], 0),
            output_axis, 0)

        # merge to state-action value function Q
        self.output = repeated_state_value + action_advantage
Ejemplo n.º 7
0
    def _build_module(self, input_layer):
        # This is almost exactly the same as Dueling Network but we predict the future measurements for each action

        multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead

        # actions expectation tower (expectation stream) - E
        with name_scope("expectation_stream"):
            expectation_stream = neon.Sequential([
                neon.Affine(nout=256, activation=neon.Rectlin(),
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Affine(nout=multistep_measurements_size,
                            weight_init=self.weights_init, bias_init=self.biases_init)
            ])(input_layer)

        # action fine differences tower (action stream) - A
        with name_scope("action_stream"):
            action_stream_unnormalized = neon.Sequential([
                neon.Affine(nout=256, activation=neon.Rectlin(),
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Affine(nout=self.num_actions * multistep_measurements_size,
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Reshape((self.num_actions, multistep_measurements_size))
            ])(input_layer)
            action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)

        repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
        repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)

        # merge to future measurements predictions
        self.output = repeated_expectation_stream + action_stream
Ejemplo n.º 8
0
def test_sequential_side(M):
    x1_np = 2
    x2_np = 3
    b_np = 1
    x_np = np.array([1, 2, 3], dtype=np.float32)

    x = ng.variable([M], initial_value=x_np)
    x1 = ng.persistent_tensor(axes=(), initial_value=x1_np)
    x2 = ng.persistent_tensor(axes=(), initial_value=x2_np)
    x1_vo = ng.value_of(x1)
    x2_vo = ng.value_of(x2)
    b = ng.persistent_tensor(axes=(), initial_value=b_np)

    y = ng.sequential([
        x1_vo, x2_vo,
        ng.assign(x1,
                  ng.sum(x, out_axes=()) + x1 * b + (1 - b)),
        ng.assign(x2,
                  ng.mean(x, out_axes=()) + x2 * b + (1 - b)), x * 2
    ])

    with ExecutorFactory() as ex:
        main_effect = ex.executor((y, x1_vo, x2_vo, x1, x2))
        current_values = ex.executor((x1, x2))

        # Run main path #1
        y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect(
        )
        y_np = x_np * 2

        assert np.allclose(y_val, y_np)
        assert np.allclose(x1_init_val, x1_np)
        assert np.allclose(x2_init_val, x2_np)
        x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np)
        x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np)
        assert np.allclose(x1_final_val, x1_np)
        assert np.allclose(x2_final_val, x2_np)

        x1_val, x2_val = current_values()
        assert np.allclose(x1_val, x1_np)
        assert np.allclose(x2_val, x2_np)

        # Run main path #2 (Should be the same as before)
        y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect(
        )
        y_np = x_np * 2

        assert np.allclose(y_val, y_np)
        assert np.allclose(x1_init_val, x1_np)
        assert np.allclose(x2_init_val, x2_np)
        x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np)
        x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np)
        assert np.allclose(x1_final_val, x1_np)
        assert np.allclose(x2_final_val, x2_np)
Ejemplo n.º 9
0
    def ReduceElements(self, cntk_op, inputs):
        """
        Returns a reduction operation (max, min, mean, sum, prod) or a calculation which matches
        CNTK's LogSum reduction (`reduce_log_sum_exp` function).

        Arguments:
            cntk_op: CNTK operation to be imported.
            inputs: List of inputs to this node.

        Returns:
            A ngraph Op.
        """
        assert len(inputs) == 1

        reduction_op_name = cntk_op.attributes.get('reductionOpName')
        # CNTK API defines a reductionKeepDimensions flag, but we currently don't use it
        # keep_dimensions = cntk_op.attributes.get('reductionKeepDimensions', False)

        cntk_op_attribute_axes = []
        if cntk_op.attributes.get('axisVec'):
            cntk_op_attribute_axes.extend(cntk_op.attributes.get('axisVec'))
        elif cntk_op.attributes.get('axis'):
            cntk_op_attribute_axes.append(cntk_op.attributes.get('axis'))

        # CNTK axes are numbered in reverse order: the last axis is labeled 0, the previous 1, etc.
        reduction_axes_indexes = [len(inputs[0].axes) - 1 - i
                                  for (_, _, i) in cntk_op_attribute_axes]
        reduction_ng_axes_list = [axis for (i, axis) in enumerate(inputs[0].axes)
                                  if i in reduction_axes_indexes]
        reduction_ng_axes = ng.Axes(axes=reduction_ng_axes_list)

        if reduction_op_name == 'Max':
            return ng.max(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid)

        if reduction_op_name == 'Min':
            return ng.min(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid)

        if reduction_op_name == 'Mean':
            return ng.mean(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid)

        if reduction_op_name == 'Sum':
            return ng.sum(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid)

        if reduction_op_name == 'Prod':
            return ng.prod(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid)

        if reduction_op_name == 'LogSum':
            return ng.log(ng.sum(ng.exp(inputs[0]), reduction_axes=reduction_ng_axes))\
                .named(cntk_op.uid)

        raise NotImplementedError('CNTKImporter: ReduceElements does not support operation %s',
                                  reduction_op_name)
Ejemplo n.º 10
0
    def AveragedLoss(self, c2_op, inputs):
        """
        Computes average loss for the batch.

        Arguments:
            c2_op: OperatorDef object, the caffe2 node to convert.
            inputs: List of ngraph Ops as inputs to this node.

        Returns:
            A ngraph Op corresponding to the caffe2 node.
        """
        x = inputs[0]
        reduction_axes = x.axes.batch_axes() if x.axes.batch_axes() else ng.make_axes(x.axes[0])
        return ng.mean(x, reduction_axes=reduction_axes)
Ejemplo n.º 11
0
def run_mini_ds2_benchmark(args, **kwargs):
    device_id = kwargs.get('device_id')

    inputs, train_set, eval_set = generate_ds2_data(args.max_length,
                                                    args.str_w, args.nout,
                                                    args.nbands,
                                                    args.batch_size,
                                                    args.num_iterations)

    model_out = get_mini_ds2(inputs, args.nfilters, args.filter_width,
                             args.str_w, args.nbands, args.depth,
                             args.hidden_size, args.batch_norm,
                             args.hetr_device, device_id)

    if args.bprop:
        with ng.metadata(device=args.hetr_device,
                         device_id=device_id,
                         parallel=ax.N):
            loss = ng.ctc(model_out, ng.flatten(inputs["char_map"]),
                          inputs["audio_length"], inputs["trans_length"])

            optimizer = GradientDescentMomentum(learning_rate=2e-5,
                                                momentum_coef=0.99,
                                                gradient_clip_norm=400,
                                                nesterov=args.nesterov)

            updates = optimizer(loss)
            mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())])
            bprop_computation_op = ng.computation(mean_cost, "all")

        benchmark = Benchmark(bprop_computation_op, train_set, inputs,
                              args.backend, args.hetr_device)
        Benchmark.print_benchmark_results(
            benchmark.time(args.num_iterations,
                           args.skip_iter,
                           'ds2_bprop',
                           args.visualize,
                           preprocess=True))
    else:
        fprop_computation_op = ng.computation(model_out, "all")

        benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs,
                                    args.backend, args.hetr_device)
        Benchmark.print_benchmark_results(
            benchmark_fprop.time(args.num_iterations,
                                 args.skip_iter,
                                 'ds2_fprop',
                                 args.visualize,
                                 preprocess=True))
Ejemplo n.º 12
0
def test_mean(transformer_factory, input_tensor):
    inputs = input_tensor
    targets = ng.placeholder(inputs.axes)

    inp_stat = ng.mean(inputs, reduction_axes=inputs.axes.batch_axes())
    err = ng.sum(inp_stat - targets, out_axes=())
    with executor(err, inputs, targets) as comp_func:

        input_value = rng.uniform(-1, 1, inputs.axes)
        target_value = rng.uniform(-1, 1, targets.axes)
        ng_f_res = comp_func(input_value, target_value)

        np_f_res = np.sum(np.mean(input_value, axis=1, keepdims=True) - target_value)

        ng.testing.assert_allclose(np_f_res, ng_f_res, atol=1e-4, rtol=1e-4)
Ejemplo n.º 13
0
def run_resnet_benchmark(dataset,
                         num_iterations,
                         n_skip,
                         batch_size,
                         device_id,
                         transformer_type,
                         device,
                         bprop=True,
                         batch_norm=False,
                         visualize=False,
                         stage_depth=1):
    inputs, data, train_set = get_fake_data(dataset, batch_size,
                                            num_iterations)

    # Running forward propagation
    model_out = get_mini_resnet(inputs,
                                dataset,
                                device,
                                device_id,
                                batch_norm=batch_norm,
                                stage_depth=stage_depth)

    # Running back propagation
    if bprop:
        with ng.metadata(device=device, device_id=device_id, parallel=ax.N):
            optimizer = GradientDescentMomentum(0.01, 0.9)
            train_loss = ng.cross_entropy_multi(
                model_out, ng.one_hot(inputs['label'], axis=ax.Y))

            batch_cost = ng.sequential(
                [optimizer(train_loss),
                 ng.mean(train_loss, out_axes=())])
            batch_cost_computation_op = ng.computation(batch_cost, "all")
        benchmark = Benchmark(batch_cost_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop',
                           visualize, 'device_id'))
    else:
        fprop_computation_op = ng.computation(model_out, 'all')
        benchmark = Benchmark(fprop_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(
            benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop',
                           visualize))
Ejemplo n.º 14
0
def test_mean(transformer_factory):
    ax = ng.name_scope('x')
    ax.N = ng.make_axis(128, batch=True)
    ax.Y = ng.make_axis(100)

    inputs = ng.placeholder([ax.Y, ax.N])
    targets = ng.placeholder([ax.Y, ax.N])

    inp_stat = ng.mean(inputs, reduction_axes=inputs.axes.batch_axes())
    err = ng.sum(inp_stat - targets, out_axes=())
    comp_func = executor(err, inputs, targets)

    input_value = rng.uniform(-1, 1, inputs.axes)
    target_value = rng.uniform(-1, 1, targets.axes)
    ng_f_res = comp_func(input_value, target_value)

    np_f_res = np.sum(np.mean(input_value, axis=1, keepdims=True) - target_value)

    np.testing.assert_allclose(np_f_res, ng_f_res, atol=1e-4, rtol=1e-4)
Ejemplo n.º 15
0
def cross_entropy_with_softmax(model, labels):
    """
    Auxiliary function to add cross entropy and softmax (loss function)
    to imported model for training.

    Arguments:
        model - imported model
        labels - placeholder for one-hot labels array

    Returns:
        Loss function (mean for batch)
    """
    if model.axes.lengths != labels.axes.lengths:
        model = ng.Transpose(model)
    assert model.axes.lengths == labels.axes.lengths
    model = ng.cast_axes(model, axes=labels.axes)

    loss = ng.cross_entropy_multi(ng.softmax(model), labels)
    return ng.mean(loss, out_axes=())
Ejemplo n.º 16
0
def classification_error(model, labels):
    """
    Auxiliary function to add classification error function to
    imported model for testing.

    Arguments:
        model - imported model
        labels - placeholder for one-hot labels array

    Returns:
        Classification error function (mean for batch)
    """
    try:
        errors = ng.not_equal(
            ng.argmax(model, out_axes=[labels.axes.batch_axis()]),
            ng.argmax(labels, out_axes=[labels.axes.batch_axis()]))
    except ValueError:
        errors = ng.not_equal(ng.argmax(model), ng.argmax(labels))

    return ng.mean(errors, out_axes=())
Ejemplo n.º 17
0
    def CrossEntropyWithSoftmax(self, cntk_op, inputs):
        """
        Computes the softmax cross entropy between the inputs[0] and inputs[1].

        Arguments:
            cntk_op: CNTK operation to be imported.
            inputs: List of inputs to this node.

        Returns:
            A ngraph Op.
        """
        cast_0, cast_1 = squeeze_axes(inputs)

        if cast_0.axes.lengths != cast_1.axes.lengths:
            cast_0 = ng.Transpose(cast_0)
        assert cast_0.axes.lengths == cast_1.axes.lengths

        cast_0 = ng.cast_axes(cast_0, axes=cast_1.axes)
        loss = ng.cross_entropy_multi(ng.softmax(cast_0), cast_1)

        return ng.mean(loss, out_axes=()).named(cntk_op.uid)
Ejemplo n.º 18
0
def run_cifar_benchmark(n_iter=10,
                        n_skip=5,
                        batch_size=4,
                        transformer_type='cpu'):
    inputs, data, train_set = get_fake_cifar(batch_size, n_iter)
    model = get_mini_resnet(inputs)
    optimizer = GradientDescentMomentum(0.01, 0.9)

    train_loss = ng.cross_entropy_multi(model(inputs['image']),
                                        ng.one_hot(inputs['label'], axis=ax.Y))

    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    batch_cost_computation_op = ng.computation(batch_cost, "all")

    feed_dict = fill_feed_dict(train_set, inputs)
    benchmarks = dict()
    benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op,
                                                   transformer_type, feed_dict,
                                                   n_skip, n_iter)
    print_benchmark_results(benchmarks)
Ejemplo n.º 19
0
def test_multi_computations(hetr_device):
    if hetr_device == 'gpu':
        pytest.xfail("enable after gpu exgraph")
    axes_x = ng.make_axes([ax_A, ax_B])
    x = ng.placeholder(axes=axes_x)
    y = ng.placeholder(())
    with ng.metadata(device_id=('0', '1'), parallel=ax_A):
        f = x**2
        out = y - ng.mean(f, out_axes=())

    np_x = np.random.randint(10, size=axes_x.lengths)
    np_y = np.random.randint(10)
    with closing(ngt.make_transformer_factory('hetr',
                                              device=hetr_device)()) as t:
        comp = t.computation(out, x, y)
        another_comp = t.computation(f, x)

        res_comp = comp(np_x, np_y)
        res_another_comp = another_comp(np_x)
        ref_comp = np_y - np.mean(np_x**2)
        np.testing.assert_array_equal(res_comp, ref_comp)
        np.testing.assert_array_equal(res_another_comp, np_x**2)
Ejemplo n.º 20
0
def run_resnet_benchmark(dataset, n_iter, n_skip, batch_size, device_id,
                         transformer_type, device, bprop=False, visualize=False):
    inputs, data, train_set = get_fake_data(dataset, batch_size, n_iter)
    model_out = get_mini_resnet(inputs, dataset, device_id)

    # Running forward propagation
    fprop_computation_op = ng.computation(model_out, 'all')
    benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device)
    Benchmark.print_benchmark_results(benchmark_fprop.time(n_iter, n_skip,
                                                           dataset + '_msra_fprop', visualize))

    # Running back propagation
    if bprop:
        optimizer = GradientDescentMomentum(0.01, 0.9)
        train_loss = ng.cross_entropy_multi(model_out,
                                            ng.one_hot(inputs['label'], axis=ax.Y))

        batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
        batch_cost_computation_op = ng.computation(batch_cost, "all")

        benchmark = Benchmark(batch_cost_computation_op, train_set, inputs,
                              transformer_type, device)
        Benchmark.print_benchmark_results(benchmark.time(n_iter, n_skip,
                                          dataset + '_msra_bprop', visualize))
if args.modeltype == "TCN":
    optimizer = Adam(learning_rate=args.lr,
                     gradient_clip_value=args.grad_clip_value)
else:
    optimizer = GradientDescentMomentum(
        learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)

# Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value)
fwd_prop = model(inputs['X'])
fwd_prop = ng.axes_with_order(fwd_prop, out_axes)
train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y'])

with Layer.inference_mode_on():
    preds = model(inputs['X'])
    preds = ng.axes_with_order(preds, out_axes)
eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=())
eval_computation = ng.computation([eval_loss], "all")
predict_computation = ng.computation([preds], "all")

# Cost calculation
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_computation = ng.computation(batch_cost, "all")

trainer = TimeseriesTrainer(optimizer,
                            train_computation,
                            eval_computation,
                            predict_computation,
                            inputs,
                            model_graph=[model],
Ejemplo n.º 22
0
        'name': 'schedule',
        'schedule': [32000, 48000],
        'gamma': 0.1,
        'base_lr': 0.1
    }

    optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                        momentum_coef=0.9,
                                        wdecay=0.0001,
                                        iteration=inputs['iteration'])
    label_indices = inputs['label']
    train_loss = ng.cross_entropy_multi(resnet(inputs['image']),
                                        ng.one_hot(label_indices, axis=ax.Y))
    batch_cost = ng.sequential(
        [optimizer(train_loss),
         ng.mean(train_loss, out_axes=())])
    train_computation = ng.computation(batch_cost, "all")

    with Layer.inference_mode_on():
        inference_prob = resnet(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              label_indices)
        eval_loss = ng.cross_entropy_multi(
            inference_prob, ng.one_hot(label_indices, axis=ax.Y))
        eval_loss_names = ['cross_ent_loss', 'misclass']
        eval_computation = ng.computation([eval_loss, errors], "all")

    # Now bind the computations we are interested in
    transformer = ngt.make_transformer()
    train_function = transformer.add_computation(train_computation)
    eval_function = transformer.add_computation(eval_computation)
Ejemplo n.º 23
0
# Input specification
ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image']
ax.D.length = 1
ax.N.length = args.batch_size
ax.Y.length = 10

# placeholders with descriptive names
inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]),
              label=ng.placeholder([ax.N]))

optimizer = GradientDescentMomentum(0.01, 0.9)
output_prob = seq1.train_outputs(inputs['image'])
errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label'])
loss = ng.cross_entropy_multi(output_prob,
                              ng.one_hot(inputs['label'], axis=ax.Y))
mean_cost = ng.mean(loss, out_axes=())
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors)

# Now bind the computations we are interested in
transformer = ngt.make_transformer()
train_computation = make_bound_computation(transformer, train_outputs, inputs)
loss_computation = make_bound_computation(transformer, loss_outputs, inputs)

cbs = make_default_callbacks(output_file=args.output_file,
                             frequency=args.iter_interval,
                             train_computation=train_computation,
                             total_iterations=args.num_iterations,
                             eval_set=valid_set,
Ejemplo n.º 24
0
                               ng.one_hot(label1, axis=ax.Y), usebits=False)

loss2 = ng.cross_entropy_multi(logits2,
                               ng.one_hot(label2, axis=ax.Y), usebits=False)

# Total Loss
train_loss = loss1 + loss2

# Set optimizer (no learning rate scheduler used)
optimizer = Adam(learning_rate=2e-3)


print('compiling the graph')
# Cost set up
batch_cost = ng.sequential(
    [optimizer(train_loss), ng.mean(train_loss, out_axes=())])

# Predicted class is the max probability out of the 2=3
# Required Outputs- Batch Cost, Train Probability,misclass train
train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'],
                     logits=ng.stack(logits_concat, span, 1),
                     labels=inputs['answer'], drop=dropout_val)

# Inference Mode for validation dataset:
with Layer.inference_mode_on():
    eval_outputs = dict(logits=ng.stack(logits_concat, span, 1),
                        labels=inputs['answer'], drop=drop_pointer)


# Now bind the computations we are interested in
print('generating transformer')
    def __call__(self,
                 in_obj,
                 channel_axes="C",
                 spatial_axes=("D", "H", "W"),
                 **kwargs):
        """
        Arguments:
            in_obj (Op): Input op
            channel_axes (str): name of the expected channel axis type - defaults to "C"
            spatial_axes (tuple): names of expected depth, height and width axis types - defaults
                                  to "D", "H", and "W"
        """
        if isinstance(spatial_axes, dict):
            spatial_axes = tuple(
                spatial_axes.get(name, name) for name in ("D", "H", "W"))
        elif isinstance(spatial_axes, tuple):
            if len(spatial_axes) < 3:
                raise ValueError(
                    "spatial_axes must have length 3 (e.g. ('D', 'H', 'W'))")
            spatial_axes = tuple(
                name if name else default
                for name, default in zip(spatial_axes, ("D", "H", "W")))

        orig_axes = in_obj.axes
        in_obj = reorder_spatial_axes(in_obj, channel_axes, spatial_axes)
        channel_axes = in_obj.axes.get_by_names(channel_axes)
        spatial_axes = in_obj.axes.get_by_names(*spatial_axes)

        filter_axes = self._filter_axes(channel_axes, spatial_axes)

        # mark 'K' as a shadow axis for the initializers.
        axes_map = shadow_axes_map(filter_axes.find_by_name('K'))
        filter_axes = ng.make_axes([
            axis if axis.name != 'K' else list(axes_map.keys())[0]
            for axis in filter_axes
        ])

        if not self.initialized:
            if not self.weight_norm:
                self.W = ng.variable(axes=filter_axes,
                                     initial_value=self.init,
                                     metadata={
                                         "label": LABELS["weight"]
                                     }).named("W")
            else:
                self.v = ng.variable(axes=filter_axes,
                                     initial_value=self.init,
                                     metadata={
                                         "label": LABELS["weight"]
                                     }).named("v")
                out_axes = ng.make_axes(
                    [filter_axes.get_by_names("K__NG_SHADOW")])
                v_norm = ng.mean(ng.square(self.v), out_axes=out_axes)
                self.g = ng.variable(axes=out_axes,
                                     initial_value=self.init,
                                     metadata={
                                         "label": LABELS["weight"]
                                     }).named("g")
                self.W = self.g * self.v * ng.reciprocal(
                    ng.sqrt(v_norm + 1e-3))
        else:
            if filter_axes != self.W.axes:
                raise ValueError(
                    ("{layer_name} layer has already been initialized with an "
                     "input object which has resulted in filter axes: "
                     "{existing_filter_axes}. This new input object has axes: "
                     "{input_axes}, which implies the need for filter axes: "
                     "{new_filter_axes} which are different than the existing "
                     "filter axes.").format(
                         layer_name=self.name,
                         existing_filter_axes=self.W.axes,
                         input_axes=in_obj.axes,
                         new_filter_axes=filter_axes,
                     ))

        output = ng.map_roles(
            self._conv_op(in_obj, channel_axes, spatial_axes), axes_map)
        # Reorder the output to match the input order
        output_axis_order = ng.make_axes(
            [output.axes.find_by_name(ax.name)[0] for ax in orig_axes])
        # Remove introduced axes. If their length is > 1, then perhaps they should be kept
        slices = [
            0 if (ax not in orig_axes) and ax.length == 1 else slice(None)
            for ax in output.axes
        ]
        output = ng.tensor_slice(output, slices)
        # New axes with length > 1 may have been introduced. Add them to the end.
        output_axis_order = output_axis_order | output.axes
        return ng.axes_with_order(output, output_axis_order)
Ejemplo n.º 26
0
    layer_0 = LookupTable(50, 100, init, update=False)
else:
    layer_0 = Preprocess(functor=expand_onehot)

# model initialization
seq1 = Sequential([layer_0,
                   rlayer,
                   Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y,))])

optimizer = RMSProp()

train_prob = seq1(inputs['inp_txt'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                                    usebits=True)
batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['inp_txt'])
eval_loss = ng.cross_entropy_multi(inference_prob,
                                   ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
                                   usebits=True)
eval_outputs = dict(cross_ent_loss=eval_loss)

# Now bind the computations we are interested in
with closing(ngt.make_transformer()) as transformer:
    train_computation = make_bound_computation(transformer, train_outputs, inputs)
    loss_computation = make_bound_computation(transformer, eval_outputs, inputs)

    cbs = make_default_callbacks(output_file=args.output_file,
Ejemplo n.º 27
0
 def __init__(self):
     self.ng_computation = lambda Y, T: ng.mean(ng.square(Y - T),
                                                out_axes=()) / 2.
Ejemplo n.º 28
0
# image placeholder
C = ng.make_axis(name='C', length=1)
D = ng.make_axis(name='D', length=1)
H = ng.make_axis(name='H', length=28)
W = ng.make_axis(name='W', length=28)
image_axes = ng.make_axes([C, D, H, W, N])
image = ng.placeholder(axes=image_axes)

# build network graph
generated = generator(z)
D1 = discriminator(image)
D2 = discriminator(generated)

loss_d = -ng.log(D1) - ng.log(1 - D2)
mean_cost_d = ng.mean(loss_d, out_axes=[])
loss_g = -ng.log(D2)
mean_cost_g = ng.mean(loss_g, out_axes=[])

optimizer_d = make_optimizer(name='discriminator_optimizer')
optimizer_g = make_optimizer(name='generator_optimizer')
updates_d = optimizer_d(loss_d, subgraph=discriminator)
updates_g = optimizer_g(loss_g, subgraph=generator)

# compile computations
generator_train_inputs = {'noise': z}
discriminator_train_inputs = {'image': image, 'noise': z}

generator_train_outputs = {
    'batch_cost': mean_cost_g,
    'updates': updates_g,
Ejemplo n.º 29
0
    loss = ng.ctc(output, ng.flatten(inputs["char_map"]),
                  ng.flatten(inputs["audio_length"]),
                  ng.flatten(inputs["char_map_length"]))

    optimizer = GradientDescentMomentum(
        args.lr,
        momentum_coef=args.momentum,
        gradient_clip_norm=args.gradient_clip_norm,
        nesterov=args.nesterov)

    start = time.time()
    updates = optimizer(loss)
    stop = time.time()
    logger.debug("Optimizer graph creation took {} seconds".format(stop -
                                                                   start))
    mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())])

    # Create computation and initialize the transformer to allocate weights
    train_computation = ng.computation([mean_cost, output], "all")
    if inference is True:
        with Layer.inference_mode_on():
            eval_output = ds2(inputs["audio"],
                              spatial_axes={
                                  "H": "frequency",
                                  "W": "time"
                              })
        eval_computation = ng.computation(eval_output, "all")

    # Now bind the computations we are interested in
    with closing(ngt.make_transformer()) as transformer:
        train_function = transformer.add_computation(train_computation)
Ejemplo n.º 30
0
gradient_penalty = ng.square(grad_norm - 1)

if args.loss_type == "WGAN-GP":
    gp = args.gp_scale * gradient_penalty
    weight_clipping = None

elif args.loss_type == "WGAN":  # standard WGAN with no gradient penalty
    gp = None
    weight_clipping = args.w_clip

if gp:
    loss_d = D1 - D2 + gp
else:
    loss_d = D1 - D2

mean_cost_d = ng.mean(loss_d, out_axes=[])

loss_g = D2
mean_cost_g = ng.mean(loss_g, out_axes=[])

mean_grad_norm = ng.mean(grad_norm, out_axes=[])

optimizer_d = make_optimizer(name='discriminator_optimizer', weight_clip_value=weight_clipping)
optimizer_g = make_optimizer(name='generator_optimizer')
updates_d = optimizer_d(loss_d, subgraph=discriminator)
updates_g = optimizer_g(loss_g, subgraph=generator)

# noise and data generators
train_set = DataGenerator((data_dim, args.batch_size), 0, data_type=args.data_type)
noise_gen = NormalNoise((noise_dim, args.batch_size), 0)
Ejemplo n.º 31
0
y_onehot = ng.one_hot(inputs['label'], axis=ax.Y)
train_prob_main = inception.seq2(inception.seq1(inputs['image']))
train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name})
train_loss_main = ng.cross_entropy_multi(train_prob_main,
                                         y_onehot,
                                         enable_softmax_opt=False)

train_prob_aux = inception.seq_aux(inception.seq1(inputs['image']))
train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name})
train_loss_aux = ng.cross_entropy_multi(train_prob_aux,
                                        y_onehot,
                                        enable_softmax_opt=False)

batch_cost = ng.sequential([
    optimizer(train_loss_main + 0.4 * train_loss_aux),
    ng.mean(train_loss_main, out_axes=())
])

train_computation = ng.computation([batch_cost], 'all')

# Build the computations for inference (evaluation)
with Layer.inference_mode_on():
    inference_prob = inception.seq2(inception.seq1(inputs['image']))
    slices = [
        0 if cx.name in ("H", "W") else slice(None)
        for cx in inference_prob.axes
    ]
    inference_prob = ng.tensor_slice(inference_prob, slices)
    inference_prob = ng.map_roles(inference_prob, {"C": "Y"})
    errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                          inputs['label'])