def __init__(self, inputs, dataset, stage_depth, batch_norm=False, activation=False, preprocess=False): nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] layers = [] if preprocess and dataset == 'cifar10': layers = Preprocess(functor=cifar_mean_subtract) layers.append(Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True, batch_norm=batch_norm)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride, batch_norm=batch_norm)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) if dataset == 'cifar10': ax.Y.length = 10 layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) elif dataset == 'i1k': ax.Y.length = 1000 layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) else: raise ValueError("Incorrect dataset provided") super(mini_residual_network, self).__init__(layers=layers)
def create_network(): ''' Define 3D convolutional network ''' # Define for weight initialization g1 = GaussianInit(mean=0., var=0.01) g5 = GaussianInit(mean=0., var=0.005) c0 = ConstantInit(val=0.) c1 = ConstantInit(val=1.) ax.Y.length = 101 padding = {'D': 1, 'H': 1, 'W': 1, 'C': 0} strides = {'D': 2, 'H': 2, 'W': 2, 'C': 1} layers = [ Convolution((3, 3, 3, 64), padding=padding, filter_init=g1, bias_init=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={ 'D': 1, 'H': 2, 'W': 2, 'C': 1 }), Convolution((3, 3, 3, 128), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(axes=ax.Y, weight_init=g1, bias_init=c0, activation=Softmax()) ] return Sequential(layers)
def make_generator(out_axis): generator = [Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(axes=out_axis, weight_init=w_init, bias_init=b_init, activation=None)] return Sequential(generator, name="Generator")
def make_discriminator(): discriminator = [Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=dim, weight_init=w_init, bias_init=b_init, activation=Rectlin()), Affine(nout=1, weight_init=w_init, bias_init=b_init, activation=None)] return Sequential(discriminator, name="Discriminator")
def __init__(self, nfilters, filter_width, str_w, nbands, depth, hidden_size, batch_norm=False, batch_norm_affine=False, batch_norm_conv=False, to_ctc=True): self.to_ctc = to_ctc # Initializers gauss = GaussianInit(0.01) glorot = GlorotInit() # 1D Convolution layer padding = dict(pad_h=0, pad_w=filter_width // 2, pad_d=0) strides = dict(str_h=1, str_w=str_w, str_d=1) dilation = dict(dil_d=1, dil_h=1, dil_w=1) conv_layer = Convolution((nbands, filter_width, nfilters), gauss, bias_init=ConstantInit(0), padding=padding, strides=strides, dilation=dilation, activation=Rectlin(), batch_norm=batch_norm_conv) # Add BiRNN layers deep_birnn = DeepBiRNN(depth, hidden_size, glorot, Rectlinclip(), batch_norm=batch_norm) # Add a single affine layer fc = Affine(nout=hidden_size, weight_init=glorot, activation=Rectlinclip(), batch_norm=batch_norm_affine) # Add the final affine layer # Softmax output is computed within the CTC cost function, so no activation is needed here. if self.to_ctc is False: activation = Softmax() else: activation = None final = Affine(axes=ax.Y, weight_init=glorot, activation=activation) layers = [conv_layer, deep_birnn, fc, final] super(Deepspeech, self).__init__(layers=layers)
def define_recurrent_layers(out_axes=None, celltype='RNN', recurrent_units=[32], init=GlorotInit(), return_sequence=True): layers = [] for e, i in enumerate(recurrent_units): layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence if celltype == 'RNN': layers.append( Recurrent(nout=i, init=init, backward=False, activation=Tanh(), return_sequence=layer_return_sequence)) elif celltype == 'LSTM': layers.append( LSTM(nout=i, init=init, backward=False, activation=Tanh(), gate_activation=Logistic(), return_sequence=layer_return_sequence)) if out_axes is not None: affine_layer = Affine(weight_init=init, bias_init=init, activation=Identity(), axes=out_axes) layers.append(affine_layer) return layers
def __init__(self, inputs, stage_depth, batch_norm=True, activation=True, preprocess=True): nfms = [ 2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth) ] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] layers = [] if preprocess: layers = Preprocess(functor=cifar_mean_subtract) parallel_axis = inputs['image'].axes.batch_axes() with ng.metadata(device_id=('1', '2'), parallel=parallel_axis[0]): layers.append( Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) self.layers = layers
def __init__(self, number_embeddings_features, tokens_in_embeddings, deep_parameters, deep_activation_fn, drop_out_rate=0.0): super(WideDeepClassifier, self).__init__(name="WideAndDeep") # Embeddings # Make the axes self.luts = [] for e in range(len(number_embeddings_features)): init_uniform = UniformInit(0, 1) # pad_idx have to be initialize to 0 explicitly. lut = LookupTable(tokens_in_embeddings[e], number_embeddings_features[e], init_uniform, pad_idx=0, update=True) self.luts.append(lut) # Model specification init_xavier = XavierInit() layers = [] for i in range(len(deep_parameters)): layers.append( Affine(nout=deep_parameters[i], weight_init=init_xavier, activation=deep_activation_fn)) if drop_out_rate > 0.0: layers.append(Dropout(keep=drop_out_rate)) layers.append(Affine(axes=tuple(), weight_init=init_xavier)) self.deep_layers = Sequential(layers) self.linear_layer = Affine(axes=tuple(), weight_init=init_xavier)
def make_layers(use_large, vocab_size): if use_large: init = GaussianInit(0., 0.02) else: init = GaussianInit(0., 0.05) layers = [] layers.append(make_embedding_layer(vocab_size)) layers.append(lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})) kernel_sizes = [7, 7, 3, 3, 3, 3] pool_layer_idxs = [0, 1, 5] conv_nout = 1024 if use_large else 256 fc_nout = 2048 if use_large else 1024 for i in range(6): conv_layer = Convolution( **conv_params(kernel_sizes[i], conv_nout, init)) layers.append(conv_layer) if i in pool_layer_idxs: pool_layer = Pooling(pool_shape=(3, ), strides=3) layers.append(pool_layer) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(axes=(ax.Y, ), weight_init=init, bias_init=ConstantInit(0.), activation=Softmax())) return layers
def define_model(out_axis, filter_shapes=[5], n_filters=[32], init=KaimingInit()): assert len(filter_shapes) == len(n_filters) layers = [] for e, (f, n) in enumerate(zip(filter_shapes, n_filters)): layers.append(Convolution(filter_shape=(f, n), filter_init=init, strides=1, padding="valid", dilation=1, activation=Rectlin(), batch_norm=True)) affine_layer = Affine(weight_init=init, bias_init=init, activation=Identity(), axes=out_axis) model = Sequential(layers + [affine_layer]) return model
def make_discriminator(bn=True, disc_activation=None, bias_init=None): conv_layers = [ Convolution((4, 4, 128), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=False, bias_init=bias_init) ] conv_layers.append( Convolution((4, 4, 256), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 512), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 1024), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Affine(weight_init=filter_init, activation=None, batch_norm=False, axes=ng.make_axes({ "C": 1, "H": 1, "W": 1 }))) return Sequential(conv_layers, name="Discriminator")
def __init__(self, stage_depth): nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] print(nfms) strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] layers = [Preprocess(functor=cifar_mean_subtract), Convolution(**conv_params(3, 16)), f_module(nfms[0], first=True)] for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling((8, 8), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), activation=Softmax())) super(residual_network, self).__init__(layers=layers)
def make_generator(bn=True, bias_init=None): deconv_layers = [ Affine(weight_init=filter_init, activation=None, batch_norm=False, axes=ng.make_axes({ "C": 1024, "H": 4, "W": 4 })), Deconvolution((4, 4, 512), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 256), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 128), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init) ] deconv_layers.append( Deconvolution((4, 4, 3), filter_init, strides=2, padding=1, activation=Tanh(), batch_norm=False, bias_init=bias_init)) return Sequential(deconv_layers, name="Generator")
def __init__(self, net_type, resnet_size, bottleneck, num_resnet_mods): # For CIFAR10 dataset if net_type == 'cifar10': # Number of Filters num_fils = [16, 32, 64] # Network Layers layers = [ # Subtracting mean as suggested in paper Preprocess(functor=cifar10_mean_subtract), # First Conv with 3x3 and stride=1 Convolution(**conv_params(3, 16)) ] first_resmod = True # Indicates the first residual module # Loop 3 times for each filter. for fil in range(3): # Lay out n residual modules so that we have 2n layers. for resmods in range(num_resnet_mods): if (resmods == 0): if (first_resmod): # Strides=1 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, strides=2) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp( num_fils[fil], net_type) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling([8, 8], pool_type='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True)) layers.append(Activation(Softmax())) # For I1K dataset elif net_type == "i1k": # Number of Filters num_fils = [64, 128, 256, 512] # Number of residual modules we need to instantiate at each level num_resnet_mods = num_i1k_resmods(resnet_size) # Network layers layers = [ # Subtracting mean Preprocess(functor=i1k_mean_subtract), # First Conv layer Convolution((7, 7, 64), strides=2, padding=3, batch_norm=True, activation=Rectlin(), filter_init=KaimingInit()), # Max Pooling Pooling([3, 3], strides=2, pool_type='max', padding=1) ] first_resmod = True # Indicates the first residual module for which strides are 1 # Loop 4 times for each filter for fil in range(4): # Lay out residual modules as in num_resnet_mods list for resmods in range(num_resnet_mods[fil]): if (resmods == 0): if (first_resmod): # Strides=1 and Convolution Side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, bottleneck=bottleneck) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, bottleneck=bottleneck, strides=2) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, bottleneck=bottleneck) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling([7, 7], pool_type='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True)) layers.append(Activation(Softmax())) else: raise NameError( "Incorrect dataset. Should be --dataset cifar10 or --dataset i1k" ) super(BuildResnet, self).__init__(layers=layers)
init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image'])
inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor( axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Now bind the computations we are interested in
def __init__(self, params_dict, nout, init, init_h2h=None, bias_init=None, activation=None, gate_activation=None, batch_norm=False, reset_cells=True, **kwargs): super(MatchLSTMCell_withAttention, self).__init__(**kwargs) self.init = params_dict['init'] max_question = params_dict['max_question'] max_para = params_dict['max_para'] hidden_size = nout # Axes # Axis for length of the hidden units self.hidden_rows = ng.make_axis(length=hidden_size, name='hidden_rows') # Axis for length of the hidden units self.F = ng.make_axis(length=hidden_size, name='F') # Axis for length of max question length self.hidden_cols_ques = ng.make_axis(length=max_question, name='hidden_cols_ques') # Axis with length of embedding sizes self.embed_axis = ng.make_axis(length=params_dict['embed_size'], name='embed_axis') # Recurrent axis for max question length self.REC = ng.make_axis(length=max_question, name='REC') # axis with size 1 self.dummy_axis = ng.make_axis(length=1, name='dummy_axis') # Axis for batch size self.N = ng.make_axis(length=params_dict['batch_size'], name='N') # Axis for the output of match lstm cell self.lstm_feature = ng.make_axis(length=2 * hidden_size, name='lstm_feature') # Length of final classification layer (maximum length of the # paragraph) self.ax = params_dict['ax'] self.ax.Y.length = max_para # Variables to be learnt during training (part of the attention network) # naming convention taken from teh paper self.W_p = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init) self.W_q = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init) self.W_r = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init) self.b_p = ng.variable(axes=self.hidden_rows, initial_value=self.init) self.w_lr = ng.variable(axes=[self.hidden_rows], initial_value=self.init) # Constants for creating masks and initial hidden states self.e_q = ng.constant(axes=[self.dummy_axis, self.hidden_cols_ques], const=np.ones([1, max_question])) self.e_q2 = ng.constant(axes=[self.F, self.dummy_axis], const=1) self.h_r_old = ng.constant(axes=[self.F, self.N], const=0) # Define variables for implementing the stacking operation. the default # stack op seems to be slow L1 = np.vstack( (np.eye(hidden_size), np.zeros([hidden_size, hidden_size]))) L2 = np.vstack((np.zeros([hidden_size, hidden_size]), np.eye(hidden_size))) self.ZX = ng.constant(const=L1, axes=[self.lstm_feature, self.F]) self.ZY = ng.constant(const=L2, axes=[self.lstm_feature, self.F]) # LSTM Cell Initialization (Code from the standard LSTM Cell in ngraph) self.nout = nout self.init = init self.init_h2h = init_h2h if init_h2h is not None else init self.bias_init = bias_init self.activation = activation if gate_activation is not None: self.gate_activation = gate_activation else: self.gate_activation = self.activation self.batch_norm = batch_norm self.reset_cells = reset_cells self.i2h = {} self.h2h = {} self.gate_transform = {} self.gate_output = {} for gate in self._gate_names: self.h2h[gate] = Linear(nout=self.nout, init=self.init_h2h[gate]) self.i2h[gate] = Affine(axes=self.h2h[gate].axes, weight_init=self.init[gate], bias_init=self.bias_init[gate], batch_norm=self.batch_norm) if gate is 'g': self.gate_transform[gate] = self.activation else: self.gate_transform[gate] = self.gate_activation self.out_axes = None
def __init__(self, params_dict, nout, init, init_h2h=None, bias_init=None, activation=None, gate_activation=None, batch_norm=False, reset_cells=True, **kwargs): super(AnswerPointer_withAttention, self).__init__(**kwargs) self.init_axes = params_dict['init'] max_question = params_dict['max_question'] max_para = params_dict['max_para'] hidden_size = nout # Axes # Axis for length of the hidden units self.hidden_rows = ng.make_axis(length=hidden_size, name='hidden_rows') # Axis for length of max_para self.hidden_cols_para = ng.make_axis(length=max_para, name='hidden_cols_para') # Axis for length of hidden unit size self.F = ng.make_axis(length=hidden_size, name='F') # Axis for length of max_question self.REC = ng.make_axis(length=max_question, name='REC') # Axis with length 1 self.dummy_axis = ng.make_axis(length=1, name='dummy_axis') # Axis with length of batch_size self.N = ng.make_axis(length=params_dict['batch_size'], name='N') # Axis with twice the length of hidden sizes self.lstm_feature_new = ng.make_axis(length=2 * hidden_size, name='lstm_feature') self.ax = params_dict['ax'] # Length of final classification layer (maximum length of the # paragraph) self.ax.Y.length = max_para # Variables self.V_answer = ng.variable( axes=[self.hidden_rows, self.lstm_feature_new], initial_value=self.init_axes) self.W_a = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init_axes) self.b_a = ng.variable(axes=self.hidden_rows, initial_value=self.init_axes) self.e_q = ng.constant(axes=[self.dummy_axis, self.hidden_cols_para], const=np.ones([1, max_para])) self.e_q2 = ng.constant(axes=[self.lstm_feature_new, self.dummy_axis], const=1) self.v_lr = ng.variable(axes=[self.hidden_rows], initial_value=self.init_axes) self.W_RNNx = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init_axes) self.W_RNNh = ng.variable(axes=[self.hidden_rows, self.F], initial_value=self.init_axes) # LSTM Cell Initialization self.nout = nout self.init = init self.init_h2h = init_h2h if init_h2h is not None else init self.bias_init = bias_init self.activation = activation if gate_activation is not None: self.gate_activation = gate_activation else: self.gate_activation = self.activation self.batch_norm = batch_norm self.reset_cells = reset_cells self.i2h = {} self.h2h = {} self.gate_transform = {} self.gate_output = {} for gate in self._gate_names: self.h2h[gate] = Linear(nout=self.nout, init=self.init_h2h[gate]) self.i2h[gate] = Affine(axes=self.h2h[gate].axes, weight_init=self.init[gate], bias_init=self.bias_init[gate], batch_norm=self.batch_norm) if gate is 'g': self.gate_transform[gate] = self.activation else: self.gate_transform[gate] = self.gate_activation self.out_axes = None
bgr_mean = ng.persistent_tensor(axes=x.axes[0], initial_value=np.array([[104., 119., 127.]])) y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1) return y init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) ###################### # Input specification ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image'] ax.D.length = 1 ax.N.length = args.batch_size ax.Y.length = 10 # placeholders with descriptive names inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]), label=ng.placeholder([ax.N])) optimizer = GradientDescentMomentum(0.01, 0.9)
init = UniformInit(low=-0.08, high=0.08) # model initialization one_hot_enc = Preprocess(functor=expand_onehot) enc = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False) one_hot_dec = Preprocess(functor=expand_onehot) dec = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=True) linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y)) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) # build network graph one_hot_enc_out = one_hot_enc.train_outputs(inputs['inp_txt']) one_hot_dec_out = one_hot_dec.train_outputs(inputs['prev_tgt']) enc_out = enc.train_outputs(one_hot_enc_out) dec_out = dec.train_outputs(one_hot_dec_out, init_state=enc_out) output_prob = linear.train_outputs(dec_out) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
time_axis = ng.make_axis(length=seq_len, name="REC") feature_axis = ng.make_axis(length=n_features, name="F") out_axis = ng.make_axis(length=n_features, name="Fo") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, time_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) # define model if args.modeltype == "TCN": affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Logistic()) model = Sequential( [lambda op: ng.map_roles(op, { 'F': 'C', 'REC': 'W' })] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + [lambda op: ng.map_roles(op, { 'C': 'F', 'W': 'REC' })] + [affine_layer]) elif args.modeltype == "LSTM": model = Sequential( recurrent_model.define_model(out_axis,
time_steps=time_steps, total_iterations=args.num_iterations) valid_set = SequentialArrayIterator(ptb_data['valid'], batch_size=args.batch_size, time_steps=time_steps) # weight initialization init = UniformInit(low=-0.08, high=0.08) # model initialization seq1 = Sequential([ Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)), Recurrent(hidden_size, init, activation=Tanh()), Affine(weight_init=init, activation=Softmax(), bias_init=init, axes=(ax.Y, ax.REC)) ]) # Bind axes lengths: ax.Y.length = len(tree_bank_data.vocab) ax.REC.length = time_steps ax.N.length = args.batch_size # placeholders with descriptive names inputs = dict(inp_txt=ng.placeholder([ax.REC, ax.N]), tgt_txt=ng.placeholder([ax.REC, ax.N])) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6) output_prob = seq1.train_outputs(inputs['inp_txt']) loss = ng.cross_entropy_multi(output_prob,
'iteration': ng.placeholder(axes=()) } # Network Definition if (use_embedding is False): seq1 = Sequential([ Preprocess(functor=expand_onehot), LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(), return_sequence=True), Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis) ]) else: embedding_dim = 8 seq1 = Sequential([ LookupTable(len(shakes.vocab) + 1, embedding_dim, init_uni, update=True), LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(),
def affine_layer(h_dim, activation, name): return Affine(nout=h_dim, activation=activation, weight_init=GaussianInit(std=1.0), bias_init=ConstantInit(val=0.0), name=name)
activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 512), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 512), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Affine(nout=4096, weight_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin()), Affine(nout=4096, weight_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(var=0.01), bias_init=init, activation=Softmax()) ]) # Learning rate change based on schedule from learning_rate_policies.py lr_schedule = { 'name': 'schedule', 'base_lr': 0.01,
previous_steps = [ng.constant(0., [batch_axis, feature_axis])] + [target_steps[i] for i in range(seq_len - 1)] previous = ng.stack(previous_steps, time_axis) # define model encoder_recurrent_units = list(map(int, args.n_hidden.split(","))) if args.bottleneck: decoder_recurrent_units = encoder_recurrent_units[::-1] else: decoder_recurrent_units = encoder_recurrent_units encoder = recurrent_model.RecurrentEncoder(celltype=args.modeltype, recurrent_units=encoder_recurrent_units, bottleneck=args.bottleneck) decoder = recurrent_model.RecurrentDecoder(out_axes=(feature_axis,), celltype=args.modeltype, recurrent_units=decoder_recurrent_units) affine_layer = Affine(weight_init=init_uni, bias_init=init_uni, activation=Identity(), axes=[out_axis]) # Optimizer optimizer = RMSProp(gradient_clip_value=args.grad_clip_value, learning_rate=args.lr) def predictions(encoder, affine_layer, inputs): encoded = encoder(inputs, combine=True) preds = affine_layer(encoded) preds = ng.axes_with_order(preds, rul_axes) return preds def build_seq2seq_computations(): # Training loss, optimizer train_decoded = recurrent_model.encode_and_decode(encoder, decoder,
else: layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh()) elif args.layer_type == "birnn": rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp() train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt'])
args = parser.parse_args() np.random.seed(args.rng_seed) # Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification seq1 = Sequential([Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())]) optimizer = GradientDescentMomentum(0.1, 0.9) output_prob = seq1.train_outputs(inputs['image']) errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label']) loss = ng.cross_entropy_binary(output_prob, ng.one_hot(inputs['label'], axis=ax.Y)) mean_cost = ng.mean(loss, out_axes=()) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors) # Now bind the computations we are interested in transformer = ngt.make_transformer()
padding=1), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(64, ), (96, 128), (16, 32), (32, )]), Inception([(128, ), (128, 192), (32, 96), (64, )]), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(192, ), (96, 208), (16, 48), (64, )]), Inception([(160, ), (112, 224), (24, 64), (64, )]), Inception([(128, ), (128, 256), (24, 64), (64, )]), Inception([(112, ), (144, 288), (32, 64), (64, )]), Inception([(256, ), (160, 320), (32, 128), (128, )]), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(256, ), (160, 320), (32, 128), (128, )]), Inception([(384, ), (192, 384), (48, 128), (128, )]), Pooling(pool_shape=(7, 7), strides=1, pool_type="avg"), Affine(axes=ax.Y, weight_init=XavierInit(), bias_init=bias_init, activation=Softmax()) ]) lr_schedule = { 'name': 'schedule', 'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image'])
inputs = { 'X': ng.placeholder(in_axes), 'y': ng.placeholder(out_axes), 'iteration': ng.placeholder(axes=()) } # Network Definition seq1 = Sequential([ LSTM(nout=recurrent_units, init=init_uni, backward=False, activation=Logistic(), gate_activation=Tanh(), return_sequence=predict_seq), Affine(weight_init=init_uni, bias_init=init_uni, activation=Identity(), axes=out_axis) ]) # Optimizer # Following policy will set the initial learning rate to 0.05 (base_lr) # At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005) # At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005) schedule = [num_iterations // 5, num_iterations // 2] learning_rate_policy = { 'name': 'schedule', 'schedule': schedule, 'gamma': 0.1, 'base_lr': 0.05 } optimizer = Adam(learning_rate=learning_rate_policy,