def create_network(): ''' Define 3D convolutional network ''' # Define for weight initialization g1 = GaussianInit(mean=0., var=0.01) g5 = GaussianInit(mean=0., var=0.005) c0 = ConstantInit(val=0.) c1 = ConstantInit(val=1.) ax.Y.length = 101 padding = {'D': 1, 'H': 1, 'W': 1, 'C': 0} strides = {'D': 2, 'H': 2, 'W': 2, 'C': 1} layers = [ Convolution((3, 3, 3, 64), padding=padding, filter_init=g1, bias_init=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={ 'D': 1, 'H': 2, 'W': 2, 'C': 1 }), Convolution((3, 3, 3, 128), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(axes=ax.Y, weight_init=g1, bias_init=c0, activation=Softmax()) ] return Sequential(layers)
def test_ref_compare_rand(refgruargs): # run comparison with reference code # for Gaussian random init seq_len, input_size, hidden_size, batch_size = refgruargs check_rnn(seq_len, input_size, hidden_size, batch_size, GaussianInit(0.0, 1.0)) check_rnn(seq_len, input_size, hidden_size, batch_size, GaussianInit(0.0, 1.0), return_seq=False)
def test_ref_stacked(transformer_factory, reflstmargs): if transformer_factory.name == 'hetr': pytest.xfail("Hetr is expected to fail with code that checks side-effects") seq_len, input_size, hidden_size, batch_size, num_iter, reset_cells = reflstmargs check_stacked_lstm(seq_len, input_size, hidden_size, batch_size, GaussianInit(0.0, 0.1), reset_cells=reset_cells, num_iter=num_iter)
def test_ref_compare_rand(transformer_factory, reflstmargs): if transformer_factory.name == 'hetr': pytest.xfail("Hetr is expected to fail with code that checks side-effects") # run comparison with reference code # for Gaussian random init seq_len, input_size, hidden_size, batch_size, num_iter, reset_cells = reflstmargs check_lstm(seq_len, input_size, hidden_size, batch_size, GaussianInit(0.0, 0.1), reset_cells=reset_cells, num_iter=num_iter)
def test_ref_stacked(transformer_factory, reflstmargs): seq_len, input_size, hidden_size, batch_size, num_iter, reset_cells = reflstmargs check_stacked_lstm(seq_len, input_size, hidden_size, batch_size, GaussianInit(0.0, 0.1), reset_cells=reset_cells, num_iter=num_iter)
def __init__(self, nfilters, filter_width, str_w, nbands, depth, hidden_size, batch_norm=False, batch_norm_affine=False, batch_norm_conv=False, to_ctc=True): self.to_ctc = to_ctc # Initializers gauss = GaussianInit(0.01) glorot = GlorotInit() # 1D Convolution layer padding = dict(pad_h=0, pad_w=filter_width // 2, pad_d=0) strides = dict(str_h=1, str_w=str_w, str_d=1) dilation = dict(dil_d=1, dil_h=1, dil_w=1) conv_layer = Convolution((nbands, filter_width, nfilters), gauss, bias_init=ConstantInit(0), padding=padding, strides=strides, dilation=dilation, activation=Rectlin(), batch_norm=batch_norm_conv) # Add BiRNN layers deep_birnn = DeepBiRNN(depth, hidden_size, glorot, Rectlinclip(), batch_norm=batch_norm) # Add a single affine layer fc = Affine(nout=hidden_size, weight_init=glorot, activation=Rectlinclip(), batch_norm=batch_norm_affine) # Add the final affine layer # Softmax output is computed within the CTC cost function, so no activation is needed here. if self.to_ctc is False: activation = Softmax() else: activation = None final = Affine(axes=ax.Y, weight_init=glorot, activation=activation) layers = [conv_layer, deep_birnn, fc, final] super(Deepspeech, self).__init__(layers=layers)
def dilated_causal_conv_layer(kernel_size, n_filters, stride, dilation, init=GaussianInit(0, 0.01)): # define dilated causal convolution layer conv_layer = DilatedCausalConv(filter_shape=(kernel_size, n_filters), filter_init=init, strides=stride, dilation=dilation, padding='causal', batch_norm=False) return [conv_layer]
def test_ref_compare_rand(transformer_factory, reflstmargs): # run comparison with reference code # for Gaussian random init seq_len, input_size, hidden_size, batch_size, num_iter, reset_cells = reflstmargs check_lstm(seq_len, input_size, hidden_size, batch_size, GaussianInit(0.0, 0.1), reset_cells=reset_cells, num_iter=num_iter)
def make_layers(use_large, vocab_size): if use_large: init = GaussianInit(0., 0.02) else: init = GaussianInit(0., 0.05) layers = [] layers.append(make_embedding_layer(vocab_size)) layers.append(lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})) kernel_sizes = [7, 7, 3, 3, 3, 3] pool_layer_idxs = [0, 1, 5] conv_nout = 1024 if use_large else 256 fc_nout = 2048 if use_large else 1024 for i in range(6): conv_layer = Convolution( **conv_params(kernel_sizes[i], conv_nout, init)) layers.append(conv_layer) if i in pool_layer_idxs: pool_layer = Pooling(pool_shape=(3, ), strides=3) layers.append(pool_layer) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(axes=(ax.Y, ), weight_init=init, bias_init=ConstantInit(0.), activation=Softmax())) return layers
def __init__(self, num_iterations, batch_size, emb_size, nhops, story_length, memory_size, vocab_size, vocab_axis, use_v_luts): self.num_iterations = num_iterations self.batch_size = batch_size self.emb_size = emb_size self.nhops = nhops self.story_length = story_length self.memory_size = memory_size self.vocab_size = vocab_size self.use_v_luts = use_v_luts # Create graph # Make axes self.batch_axis = ng.make_axis(length=batch_size, name='N') self.sentence_axis = ng.make_axis(length=story_length, name='sentence_axis') self.sentence_rec_axis = ng.make_axis(length=story_length, name='REC') self.memory_axis = ng.make_axis(length=memory_size, name='memory_axis') self.val_len_axis = ng.make_axis(length=1, name='REC') self.embedding_axis = ng.make_axis(length=emb_size, name='F') self.vocab_axis = vocab_axis # weight initializationn self.init = GaussianInit(mean=0.0, std=0.1) # Create constant position encoding tensor to multiply elementwise with embedded words self.pos_enc = position_encoding(self.sentence_rec_axis, self.embedding_axis) # Weight sharing self.LUT_A = ModifiedLookupTable(self.vocab_size, self.emb_size, self.init, update=True, pad_idx=0, name='LUT_A') if use_v_luts: self.LUTs_C = [ ModifiedLookupTable(self.vocab_size, self.emb_size, self.init, update=True, pad_idx=0) for n in range(self.nhops) ]
def test_conv1d(transformer_factory, filter_width, num_filters, strides, padding, time_steps, feature_dimension, batch_size): dilation = 1 # reference conv does not support dilation F = ng.make_axis(name='F', length=feature_dimension) REC = ng.make_axis(name='REC', length=time_steps) N = ng.make_axis(name='N', length=batch_size) in_axes = ng.make_axes([F, REC, N]) inputs = ng.placeholder(axes=in_axes) input_vals = np.random.randn(*in_axes.lengths) filter_init = GaussianInit() conv1d = Convolution((filter_width, num_filters), filter_init, strides=strides, padding=padding, dilation=dilation, bias_init=None, activation=Rectlin(), batch_norm=None) result_op = conv1d(inputs, channel_axes='F', spatial_axes={'W': 'REC'}) with closing(ngt.make_transformer()) as transformer: result_comp = transformer.add_computation( ng.computation(result_op, inputs)) filter_vals = transformer.add_computation(ng.computation( conv1d.conv.W))() result_ng = result_comp(input_vals) result_np = np.squeeze( reference_conv1d(input_vals, filter_vals, lambda x: np.maximum(0, x))) ng.testing.assert_allclose(result_ng, result_np)
'axes': ('batch', ) } } train_set = ArrayIterator(train_data, batch_size=args.batch_size, total_iterations=args.num_iterations) inputs = train_set.make_placeholders(include_iteration=True) ax.Y.length = 1000 # number of outputs of last layer. # weight initialization init = UniformInit(low=-0.08, high=0.08) # Setup model seq1 = Sequential([ Convolution((3, 3, 64), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 128), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1),
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss
args = parser.parse_args() np.random.seed(args.rng_seed) # Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification seq1 = Sequential([Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())]) optimizer = GradientDescentMomentum(0.1, 0.9) output_prob = seq1.train_outputs(inputs['image']) errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label']) loss = ng.cross_entropy_binary(output_prob, ng.one_hot(inputs['label'], axis=ax.Y)) mean_cost = ng.mean(loss, out_axes=()) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors) # Now bind the computations we are interested in transformer = ngt.make_transformer()
args.batch_size = 32 # Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size) # noise source noise_dim = (2, 1, 3, 3) noise_generator = Noise(train_set.ndata, shape=noise_dim + (args.batch_size, ), seed=args.seed) # generator network g_scope = 'generator' filter_init = GaussianInit(var=0.05) relu = Rectlin(slope=0) deconv_layers = [ Deconvolution((1, 1, 16), filter_init, strides=1, padding=0, activation=relu, batch_norm=True), Deconvolution((3, 3, 192), filter_init, strides=1, padding=0, activation=relu, batch_norm=True,
memn2n = MemN2N_Dialog(babi.cands, babi.num_cands, babi.max_cand_len, babi.memory_size, babi.max_utt_len, babi.vocab_size, args.emb_size, args.batch_size, use_match_type=args.use_match_type, kb_ents_to_type=babi.kb_ents_to_type, kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs, match_type_idxs=babi.match_type_idxs, nhops=args.nhops, eps=args.eps, init=GaussianInit(mean=0.0, std=0.1)) a_pred, attention = memn2n(inputs) # specify loss function, calculate loss and update weights loss = ng.cross_entropy_multi(a_pred, inputs['answer'], usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=0.001) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred)
def __init__(self, cands, num_cands, max_cand_len, memory_size, max_utt_len, vocab_size, emb_size, batch_size, use_match_type=False, kb_ents_to_type=None, kb_ents_to_cand_idxs=None, match_type_idxs=None, nhops=3, eps=1e-6, init=GaussianInit(mean=0.0, std=0.1)): super(MemN2N_Dialog, self).__init__() self.cands = cands self.memory_size = memory_size self.max_utt_len = max_utt_len self.vocab_size = vocab_size self.num_cands = num_cands self.max_cand_len = max_cand_len self.batch_size = batch_size self.use_match_type = use_match_type self.kb_ents_to_type = kb_ents_to_type self.kb_ents_to_cand_idxs = kb_ents_to_cand_idxs self.match_type_idxs = match_type_idxs self.nhops = nhops self.eps = eps self.init = init # Make axes self.batch_axis = ng.make_axis(length=batch_size, name='N') self.sentence_rec_axis = ng.make_axis(length=max_utt_len, name='REC') self.memory_axis = ng.make_axis(length=memory_size, name='memory_axis') self.embedding_axis = ng.make_axis(length=emb_size, name='F') self.embedding_axis_proj = ng.make_axis(length=emb_size, name='F_proj') self.cand_axis = ng.make_axis(length=num_cands, name='cand_axis') self.cand_rec_axis = ng.make_axis(length=max_cand_len, name='REC') # Weight sharing of A's accross all hops input and output self.LUT_A = ModifiedLookupTable(vocab_size, emb_size, init, update=True, pad_idx=0) # Use lookuptable W to embed the candidate answers self.LUT_W = ModifiedLookupTable(vocab_size, emb_size, init, update=True, pad_idx=0) # Initialize projection matrix between internal model states self.R_proj = ng.variable( axes=[self.embedding_axis, self.embedding_axis_proj], initial_value=init) if not self.use_match_type: # Initialize constant matrix of all candidate answers self.cands_mat = ng.constant( self.cands, axes=[self.cand_axis, self.cand_rec_axis])
y_train = np.ones(shape=(args.batch_size), dtype=np.int32) train_data = {'image': {'data': X_train, 'axes': ('batch', 'C', 'height', 'width')}, 'label': {'data': y_train, 'axes': ('batch',)}} train_set = ArrayIterator(train_data, batch_size=args.batch_size, total_iterations=args.num_iterations) inputs = train_set.make_placeholders(include_iteration=True) ax.Y.length = 1000 # number of outputs of last layer. # weight initialization init = UniformInit(low=-0.08, high=0.08) # Setup model seq1 = Sequential([Convolution((11, 11, 64), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=3, strides=4), Pool2D(3, strides=2), Convolution((5, 5, 192), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=2), Pool2D(3, strides=2), Convolution((3, 3, 384), filter_init=GaussianInit(var=0.03), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.03), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.03), bias_init=init,
c0_2 = h0_2 = None for i in range(num_iter): input_value = input_value_list[i] inp_ref = input_value.copy().transpose([1, 2, 0]) (Hout_ref_1, cprev_1, hprev_1, batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1, c0_1, h0_1) (Hout_ref_2, cprev_2, hprev_2, batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2, c0_2, h0_2) if reset_cells is False: c0_1 = cprev_1 h0_1 = hprev_1 c0_2 = cprev_2 h0_2 = hprev_2 # the output needs transpose as well Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size, hidden_size).T fprop_ref_2_list.append(Hout_ref_2) for i in range(num_iter): assert ng.testing.allclose(fprop_neon_2_list[i], fprop_ref_2_list[i], rtol=rtol, atol=atol) if __name__ == '__main__': seq_len, input_size, hidden_size, batch_size = (8, 5, 16, 1) init = GaussianInit(0.0, 1) check_lstm(seq_len, input_size, hidden_size, batch_size, init, reset_cells=False) check_stacked_lstm(seq_len, input_size, hidden_size, batch_size, init, reset_cells=False)
time_axis = ng.make_axis(length=seq_len, name="REC") feature_axis = ng.make_axis(length=n_features, name="F") out_axis = ng.make_axis(length=n_features, name="Fo") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, time_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) # define model if args.modeltype == "TCN": affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Logistic()) model = Sequential( [lambda op: ng.map_roles(op, { 'F': 'C', 'REC': 'W' })] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + [lambda op: ng.map_roles(op, { 'C': 'F', 'W': 'REC' })] + [affine_layer]) elif args.modeltype == "LSTM": model = Sequential( recurrent_model.define_model(out_axis,
def affine_layer(h_dim, activation, name): return Affine(nout=h_dim, activation=activation, weight_init=GaussianInit(std=1.0), bias_init=ConstantInit(val=0.0), name=name)
'axes': ('N', ) } } train_set = ArrayIterator(train_data, batch_size=args.batch_size, total_iterations=args.num_iterations) inputs = train_set.make_placeholders(include_iteration=True) ax.Y.length = 1000 # number of outputs of last layer. # weight initialization init = UniformInit(low=-0.08, high=0.08) # Setup model seq1 = Sequential([ Convolution((3, 3, 64), filter_init=GaussianInit(std=0.01), bias_init=init, activation=Rectlin(), padding=1), Pooling((2, 2), strides=2), Convolution((3, 3, 128), filter_init=GaussianInit(std=0.01), bias_init=init, activation=Rectlin(), padding=1), Pooling((2, 2), strides=2), Convolution((3, 3, 256), filter_init=GaussianInit(std=0.01), bias_init=init, activation=Rectlin(), padding=1),
deltas_ref = deltas.copy().T.reshape(seq_len, batch_size, hidden_size).swapaxes(1, 2) inp_ref = input_value.transpose([1, 0, 2]) # reference numpy RNN rnn_ref = RefRecurrent(input_size, hidden_size) rnn_ref.Wxh[:] = Wxh_neon rnn_ref.Whh[:] = Whh_neon rnn_ref.bh[:] = bh_neon.reshape(rnn_ref.bh.shape) (dWxh_ref, dWhh_ref, db_ref, h_ref_list, dh_ref_list, d_out_ref) = rnn_ref.lossFun(inp_ref, deltas_ref, init_states=init_state_value) # comparing outputs if return_seq is False: h_ref_list = h_ref_list[:, -1].reshape(-1, 1) else: fprop_neon = fprop_neon[:, :, 0] np.testing.assert_allclose(fprop_neon, h_ref_list, rtol=0.0, atol=1.0e-5) return if __name__ == '__main__': seq_len, input_size, hidden_size, batch_size = (3, 3, 6, 1) init = GaussianInit(0.0, 0.1) check_rnn(seq_len, input_size, hidden_size, batch_size, init, False)
def residual_block(in_channels, out_channels, kernel_size, dilation, dropout=0.2, stride=1): # define two temporal blocks tb = [] for i in range(2): tb += temporal_block(out_channels, kernel_size, stride, dilation, dropout=dropout) main_path = Sequential(tb) # sidepath if in_channels != out_channels: side_path = Sequential([Convolution(filter_shape=(1, out_channels), filter_init=GaussianInit(0, 0.01), strides=1, dilation=1, padding='same', batch_norm=False)]) else: side_path = None # combine both return ResidualModule(main_path, side_path)