Esempio n. 1
0
def define_recurrent_layers(out_axes=None,
                            celltype='RNN',
                            recurrent_units=[32],
                            init=GlorotInit(),
                            return_sequence=True):
    layers = []
    for e, i in enumerate(recurrent_units):
        layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence
        if celltype == 'RNN':
            layers.append(
                Recurrent(nout=i,
                          init=init,
                          backward=False,
                          activation=Tanh(),
                          return_sequence=layer_return_sequence))
        elif celltype == 'LSTM':
            layers.append(
                LSTM(nout=i,
                     init=init,
                     backward=False,
                     activation=Tanh(),
                     gate_activation=Logistic(),
                     return_sequence=layer_return_sequence))
    if out_axes is not None:
        affine_layer = Affine(weight_init=init,
                              bias_init=init,
                              activation=Identity(),
                              axes=out_axes)
        layers.append(affine_layer)
    return layers
Esempio n. 2
0
ax.Y.length = time_steps

# create iterator and placeholders for training data
train_set = TSPSequentialArrayIterator(data_arrays=tsp_data['train'],
                                       nfeatures=num_features,
                                       batch_size=args.batch_size,
                                       time_steps=time_steps,
                                       total_iterations=args.num_iterations)
inputs = train_set.make_placeholders()

# weight initializationn
init = UniformInit(low=-0.08, high=0.08)

# build computational graph
enc = LSTM(args.hs, init, activation=Tanh(), reset_cells=True,
           gate_activation=Logistic(), return_sequence=True)
dec = LSTM(args.hs, init, activation=Tanh(), reset_cells=True,
           gate_activation=Logistic(), return_sequence=True)

if args.emb is True:
    # encoder input embedding
    hidden_feature_axis = ng.make_axis(length=args.hs, name='hidden_feature_axis')
    feature_axis = ng.make_axis(length=num_features, name='feature_axis')

    W_emb = ng.variable(axes=[hidden_feature_axis, feature_axis], initial_value=init)
    emb_enc_inputs = ng.dot(W_emb, inputs['inp_txt'])

    # decoder input embedding
    emb_dec_input = []
    ax.N.length = args.batch_size
    for i in range(ax.N.length):
Esempio n. 3
0
inputs = train_set.make_placeholders()
ax.Y.length = len(tree_bank_data.vocab)


def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.layer_type == "lstm":
    rlayer1 = LSTM(hidden_size,
                   init,
                   activation=Tanh(),
                   gate_activation=Logistic(),
                   return_sequence=True)
    rlayer2 = LSTM(hidden_size,
                   init,
                   activation=Tanh(),
                   gate_activation=Logistic(),
                   return_sequence=True)

# model initialization
seq1 = Sequential([
    Preprocess(functor=expand_onehot), rlayer1, rlayer2,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp(gradient_clip_value=gradient_clip_value)
Esempio n. 4
0
                filter_init,
                strides=1,
                padding=1,
                activation=lrelu,
                batch_norm=True),
    Convolution((1, 1, 16),
                filter_init,
                strides=1,
                padding=0,
                activation=lrelu,
                batch_norm=True),
    Convolution((7, 7, 1),
                filter_init,
                strides=1,
                padding=0,
                activation=Logistic(),
                batch_norm=False)
]
discriminator = Sequential(conv_layers, name="Discriminator")

# noise placeholder
N = ng.make_axis(name='N', length=args.batch_size)
noise_ax_names = 'CDHW'
noise_axes = ng.make_axes([
    ng.make_axis(name=nm, length=l) for nm, l in zip(noise_ax_names, noise_dim)
])
z_ax = noise_axes + N
z = ng.placeholder(axes=z_ax)

# image placeholder
C = ng.make_axis(name='C', length=1)
Esempio n. 5
0
np.random.seed(args.rng_seed)

# Create the dataloader
train_data, valid_data = MNIST(args.data_dir).load_data()
train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations)
valid_set = ArrayIterator(valid_data, args.batch_size)

inputs = train_set.make_placeholders()
ax.Y.length = 10

######################
# Model specification
seq1 = Sequential([Preprocess(functor=lambda x: x / 255.),
                   Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
                   Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())])

optimizer = GradientDescentMomentum(0.1, 0.9)

output_prob = seq1.train_outputs(inputs['image'])
errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label'])
loss = ng.cross_entropy_binary(output_prob, ng.one_hot(inputs['label'], axis=ax.Y))
mean_cost = ng.mean(loss, out_axes=())
updates = optimizer(loss)

train_outputs = dict(batch_cost=mean_cost, updates=updates)
loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors)

# Now bind the computations we are interested in
transformer = ngt.make_transformer()
train_computation = make_bound_computation(transformer, train_outputs, inputs)
Esempio n. 6
0
def check_lstm(seq_len, input_size, hidden_size,
               batch_size, init_func, return_seq=True, backward=False,
               reset_cells=False, num_iter=2):

    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, name='R')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                       reset_cells=reset_cells, return_sequence=return_seq,
                       backward=backward)

        out_ng = lstm_ng.train_outputs(inp_ng)

        fprop_neon_fun = ex.executor(out_ng, inp_ng)

        fprop_neon_list = []
        input_value_list = []

        for i in range(num_iter):
            # fprop on random inputs
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon = fprop_neon_fun(input_value).copy()

            if return_seq is True:
                fprop_neon = fprop_neon[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_list.append(fprop_neon)

            if reset_cells is False:
                # look at the last hidden states
                assert ng.testing.allclose(fprop_neon[:, -1].reshape(-1, 1),
                                           lstm_ng.h_init.value.get(None),
                                           rtol=rtol, atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon = [lstm_ng.W_input[k].value.get(None).copy().T for k in gates]
        Whh_neon = [lstm_ng.W_recur[k].value.get(None).copy().T for k in gates]
        bh_neon = [lstm_ng.b[k].value.get(None).copy() for k in gates]

        # reference numpy LSTM
        lstm_ref = RefLSTM()
        WLSTM = lstm_ref.init(input_size, hidden_size)

        # make ref weights and biases with neon model
        WLSTM[0, :] = np.concatenate(bh_neon)
        WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1)
        WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1)

        # transpose input X and do fprop
        fprop_ref_list = []
        c0 = h0 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref,
                                                                     WLSTM,
                                                                     c0, h0)
            if reset_cells is False:
                c0 = cprev
                h0 = hprev

            # the output needs transpose as well
            Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
            fprop_ref_list.append(Hout_ref)

        for i in range(num_iter):
            assert ng.testing.allclose(fprop_neon_list[i],
                                       fprop_ref_list[i], rtol=rtol, atol=atol)
Esempio n. 7
0
def check_stacked_lstm(seq_len, input_size, hidden_size,
                       batch_size, init_func, return_seq=True, backward=False,
                       reset_cells=False, num_iter=2):

    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, name='R')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng_1 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                         reset_cells=reset_cells, return_sequence=return_seq,
                         backward=backward)
        lstm_ng_2 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                         reset_cells=reset_cells, return_sequence=return_seq,
                         backward=backward)

        out_ng_1 = lstm_ng_1.train_outputs(inp_ng)
        out_ng_2 = lstm_ng_2.train_outputs(out_ng_1)

        fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng)

        # fprop on random inputs for multiple iterations
        fprop_neon_2_list = []
        input_value_list = []

        for i in range(num_iter):
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon_2 = fprop_neon_fun_2(input_value).copy()

            # comparing outputs
            if return_seq is True:
                fprop_neon_2 = fprop_neon_2[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_2_list.append(fprop_neon_2)

            if reset_cells is False:
                # look at the last hidden states
                assert ng.testing.allclose(fprop_neon_2[:, -1].reshape(-1, 1),
                                           lstm_ng_2.h_init.value.get(None),
                                           rtol=rtol, atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_1 = \
            np.concatenate([lstm_ng_1.W_input[k].value.get(None).copy().T for k in gates], 1)
        Whh_neon_1 = \
            np.concatenate([lstm_ng_1.W_recur[k].value.get(None).copy().T for k in gates], 1)
        bh_neon_1 =  \
            np.concatenate([lstm_ng_1.b[k].value.get(None).copy() for k in gates])
        Wxh_neon_2 = \
            np.concatenate([lstm_ng_2.W_input[k].value.get(None).copy().T for k in gates], 1)
        Whh_neon_2 = \
            np.concatenate([lstm_ng_2.W_recur[k].value.get(None).copy().T for k in gates], 1)
        bh_neon_2 = \
            np.concatenate([lstm_ng_2.b[k].value.get(None).copy() for k in gates])

        # reference numpy LSTM
        lstm_ref_1 = RefLSTM()
        lstm_ref_2 = RefLSTM()
        WLSTM_1 = lstm_ref_1.init(input_size, hidden_size)
        WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size)

        # make ref weights and biases the same with neon model
        WLSTM_1[0, :] = bh_neon_1
        WLSTM_1[1:input_size + 1, :] = Wxh_neon_1
        WLSTM_1[input_size + 1:] = Whh_neon_1
        WLSTM_2[0, :] = bh_neon_2
        WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2
        WLSTM_2[hidden_size + 1:] = Whh_neon_2

        # transpose input X and do fprop
        fprop_ref_2_list = []
        c0_1 = h0_1 = None
        c0_2 = h0_2 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref_1, cprev_1, hprev_1, batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1,
                                                                             c0_1, h0_1)
            (Hout_ref_2, cprev_2, hprev_2, batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2,
                                                                             c0_2, h0_2)

            if reset_cells is False:
                c0_1 = cprev_1
                h0_1 = hprev_1
                c0_2 = cprev_2
                h0_2 = hprev_2

            # the output needs transpose as well
            Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size, hidden_size).T

            fprop_ref_2_list.append(Hout_ref_2)

        for i in range(num_iter):
            assert ng.testing.allclose(fprop_neon_2_list[i],
                                       fprop_ref_2_list[i], rtol=rtol, atol=atol)
feature_axis = ng.make_axis(length=n_features, name="F")
out_axis = ng.make_axis(length=n_features, name="Fo")

in_axes = ng.make_axes([batch_axis, time_axis, feature_axis])
out_axes = ng.make_axes([batch_axis, time_axis, out_axis])

# Build placeholders for the created axes
inputs = dict(X=ng.placeholder(in_axes),
              y=ng.placeholder(out_axes),
              iteration=ng.placeholder(axes=()))

# define model
if args.modeltype == "TCN":
    affine_layer = Affine(axes=out_axis,
                          weight_init=GaussianInit(0, 0.01),
                          activation=Logistic())
    model = Sequential(
        [lambda op: ng.map_roles(op, {
            'F': 'C',
            'REC': 'W'
        })] +
        tcn(n_features, hidden_sizes, kernel_size=kernel_size,
            dropout=dropout).layers +
        [lambda op: ng.map_roles(op, {
            'C': 'F',
            'W': 'REC'
        })] + [affine_layer])
elif args.modeltype == "LSTM":
    model = Sequential(
        recurrent_model.define_model(out_axis,
                                     celltype=args.modeltype,
Esempio n. 9
0
generator_layers = [
    affine_layer(h_dim, Rectlin(), name='g0'),
    affine_layer(1, Identity(), name='g1')
]
generator = Sequential(generator_layers)

# discriminator
discriminator_layers = [
    affine_layer(2 * h_dim, Tanh(), name='d0'),
    affine_layer(2 * h_dim, Tanh(), name='d1')
]
if minibatch_discrimination:
    raise NotImplementedError
else:
    discriminator_layers.append(affine_layer(2 * h_dim, Tanh(), name='d2'))
discriminator_layers.append(affine_layer(1, Logistic(), name='d3'))
discriminator = Sequential(discriminator_layers)

# TODO discriminator pre-training

# dataloader
np.random.seed(1)
toy_gan_data = ToyGAN(batch_size, num_iterations)
train_data = toy_gan_data.load_data()
train_set = ArrayIterator(train_data, batch_size, num_iterations)
# reset seed for weights
np.random.seed(2)

# build network graph
inputs = train_set.make_placeholders()
Esempio n. 10
0
z_ax = noise_axes + N
z = ng.placeholder(axes=z_ax)

# image placeholder
C = ng.make_axis(name='C', length=1)
D = ng.make_axis(name='D', length=1)
H = ng.make_axis(name='H', length=28)
W = ng.make_axis(name='W', length=28)
image_axes = ng.make_axes([C, D, H, W, N])
image = ng.placeholder(axes=image_axes)

# DCGAN
if args.loss_type == "DCGAN":

    generator = make_generator(bn=True)
    discriminator = make_discriminator(bn=True, disc_activation=Logistic())

    # build network graph
    generated = generator(z)
    D1 = discriminator(image)
    D2 = discriminator(generated)

    weight_clip_value = None  # no weight clipping
    gp_scale = None  # no gradient penalty

    loss_d = -ng.log(D1) - ng.log(1 - D2)
    loss_g = -ng.log(D2)

# Wasserstein GAN
elif args.loss_type == "WGAN":
Esempio n. 11
0
def train_mnist_mlp(transformer_name,
                    data_dir=None,
                    rng_seed=12,
                    batch_size=128,
                    train_iter=10,
                    eval_iter=10):
    assert transformer_name in ['cpu', 'hetr']
    assert isinstance(rng_seed, int)

    # Apply this metadata to graph regardless of transformer,
    # but it is ignored for non-HeTr case
    hetr_device_ids = (0, 1)

    # use consistent rng seed between runs
    np.random.seed(rng_seed)

    # Data
    train_data, valid_data = MNIST(path=data_dir).load_data()
    train_set = ArrayIterator(train_data,
                              batch_size,
                              total_iterations=train_iter)
    valid_set = ArrayIterator(valid_data, batch_size)
    inputs = train_set.make_placeholders()
    ax.Y.length = 10

    # Model
    with ng.metadata(device_id=hetr_device_ids, parallel=ax.N):
        seq1 = Sequential([
            Preprocess(functor=lambda x: x / 255.),
            Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()),
            Affine(axes=ax.Y,
                   weight_init=GaussianInit(),
                   activation=Logistic())
        ])

        train_prob = seq1(inputs['image'])
        train_loss = ng.cross_entropy_binary(
            train_prob, ng.one_hot(inputs['label'], axis=ax.Y))

        optimizer = GradientDescentMomentum(0.1, 0.9)
        batch_cost = ng.sequential(
            [optimizer(train_loss),
             ng.mean(train_loss, out_axes=())])
        train_outputs = dict(batch_cost=batch_cost)

        with Layer.inference_mode_on():
            inference_prob = seq1(inputs['image'])
        errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
                              inputs['label'])
        eval_loss = ng.cross_entropy_binary(
            inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
        eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)

    # Runtime
    with closing(
            ngt.make_transformer_factory(transformer_name)()) as transformer:
        train_computation = make_bound_computation(transformer, train_outputs,
                                                   inputs)
        loss_computation = make_bound_computation(transformer, eval_outputs,
                                                  inputs)

        train_costs = list()
        for step in range(train_iter):
            out = train_computation(next(train_set))
            train_costs.append(float(out['batch_cost']))

        ce_loss = list()
        for step in range(eval_iter):
            out = loss_computation(next(valid_set))
            ce_loss.append(np.mean(out['cross_ent_loss']))

        return train_costs, ce_loss