help='Weight clipping value for WGAN') parser.add_argument('--plot_dir', type=str, default='MNIST_Plots', help='Directory name to save the results') args = parser.parse_args() if not os.path.isdir(args.plot_dir): os.makedirs(args.plot_dir) # define noise dimension noise_dim = (2, 1, 3, 3) # common layer parameters filter_init = KaimingInit() relu = Rectlin(slope=0) lrelu = Rectlin(slope=0.2) # helper function def make_optimizer(name=None, weight_clip_value=None): optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9, epsilon=1e-8, weight_clip_value=weight_clip_value) return optimizer
# Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image'])
branch_1_output = self.branch_1(in_obj) branch_2_output = self.branch_2[0](in_obj) branch_2_output = self.branch_2[1](branch_2_output) branch_3_output = self.branch_3[0](in_obj) branch_3_output = self.branch_3[1](branch_3_output) branch_4_output = self.branch_4[0](in_obj) branch_4_output = self.branch_4[1](branch_4_output) outputs = [branch_1_output, branch_2_output, branch_3_output, branch_4_output] # This does the equivalent of neon's merge-broadcast return ng.concat_along_axis(outputs, branch_1_output.axes.channel_axis()) seq1 = Sequential([Convolution((7, 7, 64), padding=3, strides=2, activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Pool2D(fshape=3, padding=1, strides=2, op='max'), Convolution((1, 1, 64), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Convolution((3, 3, 192), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit(), padding=1), Pool2D(fshape=3, padding=1, strides=2, op='max'), Inception([(64,), (96, 128), (16, 32), (32,)]), Inception([(128,), (128, 192), (32, 96), (64,)]), Pool2D(fshape=3, padding=1, strides=2, op='max'), Inception([(192,), (96, 208), (16, 48), (64,)]), Inception([(160,), (112, 224), (24, 64), (64,)]), Inception([(128,), (128, 256), (24, 64), (64,)]), Inception([(112,), (144, 288), (32, 64), (64,)]),
'axes': ('batch', 'C', 'height', 'width')}, 'label': {'data': y_train, 'axes': ('batch',)}} train_set = ArrayIterator(train_data, batch_size=args.batch_size, total_iterations=args.num_iterations) inputs = train_set.make_placeholders(include_iteration=True) ax.Y.length = 1000 # number of outputs of last layer. # weight initialization init = UniformInit(low=-0.08, high=0.08) # Setup model seq1 = Sequential([Convolution((11, 11, 64), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=3, strides=4), Pool2D(3, strides=2), Convolution((5, 5, 192), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=2), Pool2D(3, strides=2), Convolution((3, 3, 384), filter_init=GaussianInit(var=0.03), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.03), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.03), bias_init=init, activation=Rectlin(), padding=1), Pool2D(3, strides=2),
feature_axis = ng.make_axis(length=feature_dim, name="F") out_axis = ng.make_axis(length=output_dim, name="Fo") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) preds_inputs = dict(X=inputs['X']) # define model n_hidden = list(map(int, args.n_hidden.split(","))) filter_shape = list(map(int, args.filter_shape.split(","))) if args.modeltype in ["RNN", "LSTM"]: seq1 = Sequential(recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=n_hidden, return_sequence=args.predict_seq).layers + [Rectlin()]) elif args.modeltype == "CNN": seq1 = convolutional_model.define_model(out_axis, filter_shapes=filter_shape, n_filters=n_hidden) layers_modified = [lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})] + seq1.layers + [Rectlin()] seq1 = Sequential(layers_modified) # Optimizer optimizer = RMSProp(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (squared L2 loss) fwd_prop = seq1(inputs['X']) train_loss = ng.squared_L2(fwd_prop - inputs['y']) # Cost calculation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all")
def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=[x.axes.channel_axis()], initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pooling((2, 2), strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pooling((2, 2), strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9)
inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]),
train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification init_xav = XavierInit() seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Convolution((5, 5, 16), filter_init=init_xav, activation=Rectlin()), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_xav, activation=Rectlin()), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_xav, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_xav, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost)
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss
parser = NgraphArgparser(description='MLP GAN example') args = parser.parse_args() # model parameters h_dim = 4 minibatch_discrimination = False num_iterations = 600 batch_size = 12 num_examples = num_iterations * batch_size # generator g_scope = 'generator' with Layer.variable_scope(g_scope): generator_layers = [ affine_layer(h_dim, Rectlin(), name='g0'), affine_layer(1, Identity(), name='g1') ] generator = Sequential(generator_layers) # discriminator d_scope = 'discriminator' with Layer.variable_scope(d_scope): discriminator_layers = [ affine_layer(2 * h_dim, Tanh(), name='d0'), affine_layer(2 * h_dim, Tanh(), name='d1') ] if minibatch_discrimination: raise NotImplementedError else: discriminator_layers.append(affine_layer(2 * h_dim, Tanh(), name='d2'))
def __init__(self): super(AffineLayer, self).__init__() self.layer = Affine(ConstantInit(0.0), nout=10, bias_init=ConstantInit(0.0), activation=Rectlin())