def __init__(self, stage_depth): nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] print(nfms) strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] layers = [Preprocess(functor=cifar_mean_subtract), Convolution(**conv_params(3, 16)), f_module(nfms[0], first=True)] for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling((8, 8), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), activation=Softmax())) super(residual_network, self).__init__(layers=layers)
def __init__(self, inputs, stage_depth, batch_norm=True, activation=True, preprocess=True): nfms = [ 2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth) ] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] layers = [] if preprocess: layers = Preprocess(functor=cifar_mean_subtract) parallel_axis = inputs['image'].axes.batch_axes() with ng.metadata(device_id=('1', '2'), parallel=parallel_axis[0]): layers.append( Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) self.layers = layers
###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=x.axes[0], initial_value=np.array([[104., 119., 127.]])) y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1) return y init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) ###################### # Input specification ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image'] ax.D.length = 1 ax.N.length = args.batch_size ax.Y.length = 10
inputs = train_set.make_placeholders() ax.Y.length = len(tree_bank_data.vocab) def expand_onehot(x): return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.use_lut: layer_0 = LookupTable(50, 100, init, update=True, pad_idx=0) else: layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh()) elif args.layer_type == "birnn": rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ])
def __init__(self, net_type, resnet_size, bottleneck, num_resnet_mods): # For CIFAR10 dataset if net_type == 'cifar10': # Number of Filters num_fils = [16, 32, 64] # Network Layers layers = [ # Subtracting mean as suggested in paper Preprocess(functor=cifar10_mean_subtract), # First Conv with 3x3 and stride=1 Convolution(**conv_params(3, 16)) ] first_resmod = True # Indicates the first residual module # Loop 3 times for each filter. for fil in range(3): # Lay out n residual modules so that we have 2n layers. for resmods in range(num_resnet_mods): if (resmods == 0): if (first_resmod): # Strides=1 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, strides=2) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp( num_fils[fil], net_type) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling([8, 8], pool_type='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True)) layers.append(Activation(Softmax())) # For I1K dataset elif net_type == "i1k": # Number of Filters num_fils = [64, 128, 256, 512] # Number of residual modules we need to instantiate at each level num_resnet_mods = num_i1k_resmods(resnet_size) # Network layers layers = [ # Subtracting mean Preprocess(functor=i1k_mean_subtract), # First Conv layer Convolution((7, 7, 64), strides=2, padding=3, batch_norm=True, activation=Rectlin(), filter_init=KaimingInit()), # Max Pooling Pooling([3, 3], strides=2, pool_type='max', padding=1) ] first_resmod = True # Indicates the first residual module for which strides are 1 # Loop 4 times for each filter for fil in range(4): # Lay out residual modules as in num_resnet_mods list for resmods in range(num_resnet_mods[fil]): if (resmods == 0): if (first_resmod): # Strides=1 and Convolution Side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, bottleneck=bottleneck) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, bottleneck=bottleneck, strides=2) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, bottleneck=bottleneck) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling([7, 7], pool_type='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True)) layers.append(Activation(Softmax())) else: raise NameError( "Incorrect dataset. Should be --dataset cifar10 or --dataset i1k" ) super(BuildResnet, self).__init__(layers=layers)
def __init__(self, inputs, dataset, stage_depth, batch_norm=False, activation=False, preprocess=False): nfms = [ 2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth) ] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] layers = [] if preprocess and dataset == 'cifar10': layers = Preprocess(functor=cifar_mean_subtract) layers.append(Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True, batch_norm=batch_norm)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride, batch_norm=batch_norm)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) if dataset == 'cifar10': ax.Y.length = 10 layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) elif dataset == 'i1k': ax.Y.length = 1000 layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) else: raise ValueError("Incorrect dataset provided") super(mini_residual_network, self).__init__(layers=layers)
if args.layer_type == "lstm": rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) # model initialization seq1 = Sequential([ Preprocess(functor=expand_onehot), rlayer1, rlayer2, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(gradient_clip_value=gradient_clip_value) train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on():
inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor( axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors)
decoder_input[:] = 0 decoder_input[index] = 1 tokens.append(index) return np.squeeze(np.array(tokens)).T def expand_onehot(x): return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) # model initialization one_hot_enc = Preprocess(functor=expand_onehot) enc = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False) one_hot_dec = Preprocess(functor=expand_onehot) dec = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=True) linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y)) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3,
parser = NgraphArgparser(description='Train simple mlp on mnist dataset') args = parser.parse_args() np.random.seed(args.rng_seed) # Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification seq1 = Sequential([Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())]) optimizer = GradientDescentMomentum(0.1, 0.9) output_prob = seq1.train_outputs(inputs['image']) errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label']) loss = ng.cross_entropy_binary(output_prob, ng.one_hot(inputs['label'], axis=ax.Y)) mean_cost = ng.mean(loss, out_axes=()) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors) # Now bind the computations we are interested in
def expand_onehot(x): return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.layer_type == "lstm": rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) # model initialization seq1 = Sequential([Preprocess(functor=expand_onehot), rlayer1, rlayer2, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y,))]) optimizer = RMSProp(gradient_clip_value=gradient_clip_value) train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt'])
train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification init_xav = XavierInit() seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Convolution((5, 5, 16), filter_init=init_xav, activation=Rectlin()), Pooling((2, 2), strides=2), Convolution((5, 5, 32), filter_init=init_xav, activation=Rectlin()), Pooling((2, 2), strides=2), Affine(nout=500, weight_init=init_xav, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_xav, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())])
inputs = train_set.make_placeholders() ax.Y.length = len(tree_bank_data.vocab) def expand_onehot(x): # Assign the recurrent role and property to the axis named 'time' x.axes.find_by_short_name('time')[0].add_role(ar.time) x.axes.find_by_short_name('time')[0].is_recurrent = True return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) # model initialization one_hot_enc = Preprocess(functor=expand_onehot) enc = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False) one_hot_dec = Preprocess(functor=expand_onehot) dec = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=True) linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y)) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3,
# +1 is for unknown token out_axis = ng.make_axis(length=len(shakes.vocab) + 1, name="out_feature_axis") in_axes = ng.make_axes([batch_axis, time_axis]) out_axes = ng.make_axes([batch_axis, time_axis]) # Build placeholders for the created axes inputs = { 'X': ng.placeholder(in_axes), 'y': ng.placeholder(out_axes), 'iteration': ng.placeholder(axes=()) } # Network Definition if (use_embedding is False): seq1 = Sequential([ Preprocess(functor=expand_onehot), LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(), return_sequence=True), Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis) ]) else: embedding_dim = 8 seq1 = Sequential([
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss
# download penn treebank # set shift_target to be False, since it is going to predict the same sequence tree_bank_data = PTB(path=args.data_dir, shift_target=False) ptb_data = tree_bank_data.load_data() train_set = SequentialArrayIterator(ptb_data['train'], batch_size=args.batch_size, time_steps=time_steps, total_iterations=args.num_iterations, reverse_target=True, get_prev_target=True) # weight initialization init = UniformInit(low=-0.08, high=0.08) # model initialization one_hot_enc = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) enc = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False) one_hot_dec = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) dec = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=True) linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ax.REC))