def __init__(self, inshape, hidshape, noutputs): ninput = np.prod(inshape) nhid = np.prod(hidshape) nparams = (ninput + 1) * nhid + (nhid * noutputs) # TODO: self.params = np.empty(nparams) self._grad = np.empty(nparams) inhidwts = ninput * nhid hidoutwts = nhid * noutputs self.layers = [ LinearLayer( inshape, hidshape, params=self.params[0:inhidwts], grad=self._grad[0:inhidwts] ), LogisticLayer( hidshape, params=self.params[inhidwts:(inhidwts + nhid)], grad=self._grad[inhidwts:(inhidwts + nhid)] ), LinearLayer( hidshape, noutputs, params=self.params[(inhidwts + nhid):], grad=self._grad[(inhidwts + nhid):] ), SoftmaxLayer() ]
def get_active_subnet(self, in_features, preserve_weight=True): sub_layer = LinearLayer(in_features, self.out_features, self.bias, dropout_rate=self.dropout_rate) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer sub_layer.linear.weight.data.copy_(self.linear.linear.weight.data[:self.out_features, :in_features]) if self.bias: sub_layer.linear.bias.data.copy_(self.linear.linear.bias.data[:self.out_features]) return sub_layer
def test_LinearLayer(): """ Test LinearLayer by comparing the network's computation to basic matrix multiplication. Here we generate a set of random inputs and weights, and compare the output of the feedforward path as well as the computed gradient for the weights in our own implementation and pythorch's native one. """ # Generate input data. images = torch.randn(2, 10) # Generate initial weights and masks. weights_mask = torch.randn(10, 10) weights_mask[weights_mask < 0] = 0 weights_mask[weights_mask > 0] = 1 initial_weights = torch.randn(10, 10) * weights_mask assert initial_weights.grad is None ############## our method ################ weights_ours = initial_weights.clone() weights_ours.requires_grad = True # Create the layer and compute the output. layer = LinearLayer(weights_ours, weights_mask) output_ours = layer.forward(images) loss_ours = ((output_ours - 1)**2).mean() loss_ours.backward() grad_ours = weights_ours.grad ############## pytorch's ################ weights_py = initial_weights.clone() weights_py.requires_grad = True # Create output without using our own layer implementation. def make_backward_hook(weight_mask): """ Helper function to create a backward hook for masking gradients. """ return lambda grad: grad * weight_mask weights_py.register_hook(make_backward_hook(weights_mask)) output_py = torch.mm(images, weights_py.t()) loss_py = ((output_py - 1)**2).mean() loss_py.backward() grad_py = weights_py.grad ############# compare ################ assert torch.all(torch.eq(output_ours, output_py)) assert torch.all(torch.eq(loss_ours, loss_py)) assert torch.all(torch.eq(grad_ours, grad_py))
def __init__(self, width_stages, n_cell_stages, stride_stages, dropout=0): super(NASNet, self).__init__() self.width_stages = width_stages self.n_cell_stages = n_cell_stages self.stride_stages = stride_stages in_channels = 32 first_cell_width = 16 # first conv layer self.first_conv = ConvLayer(3, in_channels, 3, 2, 1, 1, False, False, True, 'relu6', 0, 'weight_bn_act') # first block first_block_config = { "name": "MobileInvertedResidualBlock", "mobile_inverted_conv": { "name": "MBInvertedConvLayer", "in_channels": in_channels, "out_channels": first_cell_width, "kernel_size": 3, "stride": 1, "expand_ratio": 1 }, "shortcut": None } self.first_block = MobileInvertedResidualBlock.build_from_config( first_block_config) in_channels = first_cell_width # blocks self.blocks = nn.ModuleList() for width, n_cell, s in zip(self.width_stages, self.n_cell_stages, self.stride_stages): for i in range(n_cell): if i == 0: stride = s else: stride = 1 block = WSMobileInvertedResidualBlock(in_channels, width, stride) in_channels = width self.blocks.append(block) self.feature_mix_layer = ConvLayer(in_channels, 1280, 1, 1, 1, 1, False, False, True, 'relu6', 0, 'weight_bn_act') self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.classifier = LinearLayer(1280, 1000, True, False, None, dropout, 'weight_bn_act')
def test_numeric_gradient(self): l = LinearLayer(2, 3, 'random') weights = l.W.get() x = np.random.rand(2) grad = l.numeric_gradient(x) assert_almost_equal(grad, weights[:, 0:-1]) in_delta = np.random.rand(3) for i, d in enumerate(in_delta): aux = np.zeros(in_delta.size) aux[i] = in_delta[i] delta = l.backward(aux) gradient = l.numeric_gradient(x) assert_almost_equal(in_delta[i] * gradient[i, :], delta, decimal=5)
def make_neural_network(incoming, hidden_number=None): input_shape = lasagne.layers.get_output_shape(incoming) input_feature_number = input_shape[1] max_size = input_shape[2] last_layer = incoming res = [] last_input_number = input_feature_number for id, now_hidden_number in enumerate(hidden_number): now_layer = LinearLayer(incoming=last_layer, max_size=max_size, deepth=last_input_number, num_output=now_hidden_number) now_layer = lasagne.layers.BatchNormLayer(now_layer) if id == len(hidden_number) - 1: now_layer = lasagne.layers.NonlinearityLayer( incoming=now_layer, nonlinearity=lasagne.nonlinearities.sigmoid) else: now_layer = lasagne.layers.NonlinearityLayer( incoming=now_layer, nonlinearity=lasagne.nonlinearities.rectify) last_layer = now_layer last_input_number = now_hidden_number res.append(now_layer) return res
def test_LinearFunctionFA(): """Make sure that LinearLayerFA with symmetric weights behaves in the same way as LinearLayer.""" num_inputs = 10 # Generate weights and masks. weights_mask = torch.randn(10, 10) weights_mask[weights_mask < 0] = 0 weights_mask[weights_mask > 0] = 1 weights_ff = torch.randn(10, 10) * weights_mask weights_fb = weights_ff.clone() weights_ff.requires_grad = True weights_ff_initial = weights_ff.clone().detach() # Create the layer and compute the output. layerFA = LinearLayerFA(weights_ff, weights_fb, weights_mask) optFA = torch.optim.SGD([layerFA.weight_matrix], lr=0.01) layer = LinearLayer(weights_ff, weights_mask) opt = torch.optim.SGD([layer.weight_matrix], lr=0.01) for i in range(num_inputs): opt.zero_grad() optFA.zero_grad() # Generate input data images = torch.randn(2, 10) output = layer.forward(images) outputFA = layerFA.forward(images) loss = ((output - 1)**2).mean() lossFA = ((outputFA - 1)**2).mean() loss.backward() lossFA.backward() opt.step() optFA.step() assert torch.all(torch.eq(output, outputFA)) assert loss == lossFA assert torch.all(torch.eq(layerFA.weight_matrix, layer.weight_matrix)) assert torch.all(torch.eq(layerFA.weight_matrix.grad, \ layer.weight_matrix.grad))
def __init__(self, num_neurons: List[int]): self.layers = [] for index, neuron in enumerate(num_neurons[:-1]): print(neuron, num_neurons[index + 1]) layer = LinearLayer((neuron, num_neurons[index + 1])) self.layers.append(layer) layer = SigmoidLayer() self.layers.append(layer) self.loss_layer = None
def __init__(self, input_size, output_size): self.input_size = input_size self.output_size = output_size self.n1 = SumGroup( MulGroup( Sequential(LinearLayer(input_size + output_size, output_size), SigmoidLayer), GenericLayer), MulGroup( Sequential(LinearLayer(input_size + output_size, output_size), SigmoidLayer), Sequential(LinearLayer(input_size + output_size, output_size), TanhLayer))) self.n2 = MulGroup( Sequential(GenericLayer, TanhLayer), Sequential(LinearLayer(input_size + output_size, output_size), SigmoidLayer)) self.ct = np.zeros(output_size) self.ht = np.zeros(output_size)
def __init__(self, widths=[2, 2, 3, 2], lr=0.05, loss=L2Loss()): sigmoid = Sigmoid() self._layers = [] self._lr = lr for n_in, n_out in zip(widths[:-1], widths[1:]): linearLayer = LinearLayer(n_in, n_out, bias=True, scale=1 / np.sqrt(n_in)) self._layers.append(ActivatedLayer(linearLayer, sigmoid)) self._loss = loss
def test_update(self): l = LinearLayer(2, 6, 'ones') y = l.forward(np.array([2.0, 2.0])) dJdW = l.dJdW_gradient(np.array([1.0, 2.0, 3.0, 4.0, 1.0, 1.0])) self.assertEqual(l.W.get().shape, (6, 3)) self.assertEqual(dJdW.shape, (6, 3)) l = LinearLayer(2, 3, 'ones') y = l.forward(np.array([2.0, 2.0])) dJdW = l.dJdW_gradient(np.array([1.0, 2.0, 3.0])) self.assertEqual(l.W.get().shape, (3, 3)) self.assertEqual(dJdW.shape, (3, 3)) assert_array_equal( dJdW, np.matrix([[2.0, 2.0, 1.0], [4.0, 4.0, 2.0], [6.0, 6.0, 3.0]]))
def test_LinearLayer(self): l1 = LinearLayer(5, 6, 'ones') n = Sequential([l1]) y = n.forward(np.array([2.0, 1.0, 2.0, 3.0, 4.0])) self.assertEqual(y.shape, (6, )) assert_array_equal(y, np.array([ 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, ])) l2 = LinearLayer(6, 2, 'ones') n.add(l2) y = n.forward(np.array([2.0, 1.0, 2.0, 3.0, 4.0])) self.assertEqual(y.shape, (2, )) assert_array_equal(y, np.array([79.0, 79.0])) d = n.backward(np.array([2.0, 3.0])) self.assertEqual(d.shape, (5, )) assert_array_equal(d, np.array([30., 30., 30., 30., 30.]))
def main(): mnist_path = os.path.join(os.getcwd(), "MNIST") (train_images, train_labels), (test_images, test_labels) = load_data(mnist_path) layers = [ LinearLayer(32, 28**2, xavier), SigmoidLayer(), LinearLayer(32, 32, xavier), SigmoidLayer(), LinearLayer(10, 32, xavier), SigmoidLayer() ] net = NeuralNet(layers) np.seterr(over='ignore') train(net, train_images, train_labels, flatten_mnist_input, mnist_label_as_one_hot, epoch_count=1000, batch_size=1) confusion_matrix = DataFrame(np.zeros((10, 10)), index=range(10), columns=range(10)) evaluator = test(net, test_images, test_labels, confusion_matrix, flatten_mnist_input, highest_output_neuron, mnist_label_as_one_hot, title="POST-TRAIN") evaluator.plot()
def test_neuron_one_input(self): xv = np.array([0.5, 0.1, 0.5]) x = Input(['x'], 'x') Wv = np.array([[0.1, 0.1, 0.2], [0.5, 0.2, 0.2]]) W = MWeight(3, 2, weights=Wv) bv = np.array([0.3, 0.1]) b = VWeight(2, weights=bv) net = ComputationalGraphLayer(Sigmoid(W.dot(x) + b)) out = net.forward(xv) self.assertEqual(out.shape, (2, )) check_out = 1.0 / (1.0 + np.exp(-Wv.dot(xv) - bv)) assert_almost_equal(out, check_out) dJdy = net.backward(np.array([1.0, 1.0])) self.assertEqual(dJdy.shape, (3, )) assert_almost_equal(dJdy, np.sum(net.numeric_gradient(xv), 0)) assert_almost_equal(dJdy, (check_out * (1 - check_out)).dot(Wv)) net2 = Sequential( LinearLayer(3, 2, weights=np.hstack([Wv, bv.reshape(2, 1)])), SigmoidLayer) out2 = net2.forward(xv) assert_almost_equal(out, out2) dJdy2 = net.backward(np.array([1.0, 1.0])) assert_almost_equal(dJdy, dJdy2)
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] final_expand_width = [ make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] self.final_expand_width = final_expand_width last_channel = [ make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] self.last_channel = last_channel # stride_stages = [1, 2, 2, 2, 1, 2] stride_stages = [1, 2, 2, 2, 1, 1] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True] if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [1] + [max(self.depth_list)] * 5 width_list = [] for base_width in base_stage_width[:-2]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) input_channel = width_list[0] # first conv layer # if width_mult_list has only one elem if len(set(input_channel)) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) else: first_conv = DynamicConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='h_swish', ) first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1, stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0], ) first_block = MobileInvertedResidualBlock( first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = input_channel for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier if len(final_expand_width) == 1: final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) else: final_expand_layer = DynamicConvLayer( in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = DynamicConvLayer( in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1, use_bn=False, act_func='h_swish', ) if len(set(last_channel)) == 1: classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
for (img, target) in test: #print str(np.argmax(model.forward(test_data[ind])))+' '+str(np.argmax(test_targets[ind])) if np.argmax(model.forward(img)) != np.argmax(target): err += 1 print(1.0 - err / float(len(test))) * 100.0 if load_net: print "Load Network" model = StoreNetwork.load(name_net) else: print "New Network" #Two layer network model = Sequential([ NormalizationLayer(0, 255, -0.1, 0.1), LinearLayer(784, 10, weights='norm_random'), # TanhLayer, # LinearLayer(50, 10, weights='norm_random'), # TanhLayer, # NormalizationLayer(0,10,0,1), # SigmoidLayer() ]) # display = ShowTraining(epochs_num = epochs) trainer = Trainer(show_training=False) #, show_function = display.show) J_list, dJdy_list, J_test = trainer.learn( model=model, train=train, test=test,
def test_backward(self): l = LinearLayer(2, 6, 'ones') d = l.backward(np.array([1.0, 2.0, 3.0, 4.0, 1.0, 1.0])) self.assertEqual(d.shape, (2, )) assert_array_equal(d, np.array([12.0, 12.0]))
def test_forward(self): l = LinearLayer(2, 6, 'ones') y = l.forward(np.array([2.0, 5.0])) assert_array_equal(y, np.array([8.0, 8.0, 8.0, 8.0, 8.0, 8.0]))
def test_dim(self): l = LinearLayer(5, 6) y = l.forward(np.random.rand(5)) self.assertEqual(y.shape, (6, ))
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): """ Args: n_classes: 分类类数 bn_param: bn参数 dropout_rate: 用在哪些层里面呢 width_mult_list: 在单层layer重复一些操作[~~网络基础宽度缩放 X 并不是~~] ks_list: 卷积核的候选大小 expand_ratio_list: 网络宽度/channel数的扩大倍数 depth_list: 网络深度/layer的重复/堆叠次数 """ # int2list 将列表,元组,整数都变为一个列表 self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] # make_divisible 使得卷积channel数为8的倍数,并以8为基底3舍4入 final_expand_width = [ make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] last_channel = [ make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] # 步长,决定下采样; 激活函数; se指的是,难道是self-attention stride_stages = [1, 2, 2, 2, 1, 2] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True] # 深度的配置除了第一个卷积,其他五层都可能expand if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [1] + [max(self.depth_list)] * 5 # 宽度/channel数配置 width_list = [] for base_width in base_stage_width[:-2]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) # width_list好想和我想象的功能不太一样,我以为是初始channel的expand倍数 input_channel = width_list[0] # first conv layer if len(set(input_channel)) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) else: first_conv = DynamicConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='h_swish', ) first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1, stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0], ) first_block = MobileInvertedResidualBlock( first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = input_channel for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier if len(final_expand_width) == 1: final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) else: final_expand_layer = DynamicConvLayer( in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = DynamicConvLayer( in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1, use_bn=False, act_func='h_swish', ) if len(set(last_channel)) == 1: classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
# np.array([-1.0,-1.0,-1.0,-1.0,-1.0]), # np.array([1.0,1.0,1.0,1.0,1.0]) # ) # norm = NormalizationLayer( # np.array([0.0,0.0,0.0,-3.0]), # np.array([5.0,5.0,5.0,3.0]), # np.array([-1.0,-1.0,-1.0,-1.0]), # np.array([1.0,1.0,1.0,1.0]) # ) norm = NormalizationLayer(np.array([0.0, 0.0]), np.array([5.0, 5.0]), np.array([0.0, 0.0]), np.array([1.0, 1.0])) W1 = utils.SharedWeights('gaussian', 2 + 1, 2) W2 = utils.SharedWeights('gaussian', 2 + 1, 3) Q = Sequential( norm, LinearLayer(2, 2, weights=W1), TanhLayer, LinearLayer(2, 3, weights=W2), # TanhLayer ) W3 = utils.SharedWeights('gaussian', 2 + 1, 2) W4 = utils.SharedWeights('gaussian', 2 + 1, 3) # W3 = utils.SharedWeights(np.array([[10.0,-10.0,0.0],[-10.0,10.0,0.0]]),2+1,2) #W2 = utils.SharedWeights('gaussian',2+1,2) Q_hat = Sequential( norm, LinearLayer(2, 2, weights=W3), ReluLayer, LinearLayer(2, 3, weights=W4), # TanhLayer )
def _createFinalLayer(self,ni,nu): return LinearLayer(ni,nu)
else: # norm = NormalizationLayer( # np.array([0.0,0.0,-10.0,-10.0]), # np.array([5.0,5.0,10.0,10.0]), # np.array([-1.0,-1.0,-1.0,-1.0]), # np.array([1.0,1.0,1.0,1.0]) # ) norm = NormalizationLayer(np.array([0.0, 0.0]), np.array([5.0, 5.0]), np.array([-1.0, -1.0]), np.array([1.0, 1.0])) n = Sequential( norm, # LinearLayer(2,5,weights='gaussian'), # TanhLayer, #AddGaussian(1), LinearLayer(2, 4, weights='gaussian'), RandomGaussianLayer(1), SoftMaxLayer) agent = GenericAgent(n, 4, 40, 5.0) agent.set_training_options( Trainer(), NegativeLogLikelihoodLoss(), GradientDescentMomentum( learning_rate=0.1, momentum=0.7) #GradientDescent(learning_rate=0.2) ) start = np.array([3.5, 3.5]) obstacles = [ # np.array([2.5,2.5,1.0]) ]
def create_torch_layers(self, device=None): """ Create torch layers in self.torch_layers (with weight matrices and masks) based on self.connections and self.weights. Each layer has one weight matrix/weight mask. This function also deletes self.weights (the list of connection strengths), because it might get out of sync with the weight matrices once training starts. To get self.weights back (and get rid of torch layers), call delete_torch_layers. Args: device (str or torch.device, optional): The device to put weight matrices and masks on. """ if self.torch_layers is not None: raise RuntimeError( 'Torch layers already exist. If you want to re-create them ' '(e.g. on a different device), call delete_torch_layers before' ) # Create weight matrices and masks. self.torch_layers = [] for i in range(1, len( self.neurons_in_layer)): # no torch layer for input neurons # Find all neurons connecting to this layer. neurons_connecting_to_layer = set() # avoid duplicates for from_neuron, to_neuron in self.connections: to_layer = self.find_layer(to_neuron) if to_layer == i: neurons_connecting_to_layer.add(from_neuron) neurons_connecting_to_layer = sorted( list(neurons_connecting_to_layer)) weight_matrix, weight_mask = self.create_weight_matrix( self.connections, self.weights, from_neurons=neurons_connecting_to_layer, to_neurons=self.neurons_in_layer[i], device=device) if not self.train_only_outputs or i == len( self.neurons_in_layer) - 1: weight_matrix.requires_grad = True if self.use_random_feedback: # feedback alignment backward_weight_matrix = torch.randn_like(weight_matrix, requires_grad=False) backward_weight_matrix *= weight_mask self.torch_layers.append( LinearLayerFA(weight_matrix, backward_weight_matrix, weight_mask, from_neurons=neurons_connecting_to_layer, to_neurons=self.neurons_in_layer[i])) else: # normal backpropagation self.torch_layers.append( LinearLayer(weight_matrix, weight_mask, from_neurons=neurons_connecting_to_layer, to_neurons=self.neurons_in_layer[i])) # Delete self.weights so that it doesn't get out of sync with weight matrices during # training. self.weights = None
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() if base_stage_width == 'google': base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280] else: # ProxylessNAS Stage Width base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280] input_channel = [ make_divisible(base_stage_width[0] * width_mult, 8) for width_mult in self.width_mult_list ] first_block_width = [ make_divisible(base_stage_width[1] * width_mult, 8) for width_mult in self.width_mult_list ] last_channel = [ make_divisible(base_stage_width[-1] * width_mult, 8) if width_mult > 1.0 else base_stage_width[-1] for width_mult in self.width_mult_list ] # first conv layer if len(input_channel) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') else: first_conv = DynamicConvLayer(in_channel_list=int2list( 3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='relu6') # first block if len(first_block_width) == 1: first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(first_block_width), kernel_size=3, stride=1, expand_ratio=1, act_func='relu6', ) else: first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=first_block_width, kernel_size_list=3, expand_ratio_list=1, stride=1, act_func='relu6', ) first_block = MobileInvertedResidualBlock(first_block_conv, None) input_channel = first_block_width # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 stride_stages = [2, 2, 2, 1, 2, 1] if depth_list is None: n_block_list = [2, 3, 4, 3, 3, 1] self.depth_list = [4, 4] print('Use MobileNetV2 Depth Setting') else: n_block_list = [max(self.depth_list)] * 5 + [1] width_list = [] for base_width in base_stage_width[2:-1]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) for width, n_block, s in zip(width_list, n_block_list, stride_stages): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=int2list(input_channel, 1), out_channel_list=int2list(output_channel, 1), kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6', ) if stride == 1 and input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None mb_inverted_block = MobileInvertedResidualBlock( mobile_inverted_conv, shortcut) blocks.append(mb_inverted_block) input_channel = output_channel # 1x1_conv before global average pooling if len(last_channel) == 1: feature_mix_layer = ConvLayer( max(input_channel), max(last_channel), kernel_size=1, use_bn=True, act_func='relu6', ) classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: feature_mix_layer = DynamicConvLayer( in_channel_list=input_channel, out_channel_list=last_channel, kernel_size=1, stride=1, act_func='relu6', ) classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
# np.array([-1.0,-1.0,-1.0,-1.0,-1.0]), # np.array([1.0,1.0,1.0,1.0,1.0]) # ) # norm = NormalizationLayer( # np.array([0.0,0.0,0.0,-3.0]), # np.array([5.0,5.0,5.0,3.0]), # np.array([-1.0,-1.0,-1.0,-1.0]), # np.array([1.0,1.0,1.0,1.0]) # ) norm = NormalizationLayer(np.array([0.0, 0.0]), np.array([5.0, 5.0]), np.array([-1.0, -1.0]), np.array([1.0, 1.0])) W1 = utils.SharedWeights('gaussian', 2 + 1, 4) # W2 = utils.SharedWeights('gaussian',3+1,2) n = Sequential( norm, LinearLayer(2, 4, weights=W1), # TanhLayer, # #AddGaussian(1), # LinearLayer(3,2,weights=W2), RandomGaussianLayer(1), SoftMaxLayer) agent = GenericAgent(n, 4, 25, 0.0) agent.set_training_options( Trainer(show_training=True), NegativeLogLikelihoodLoss(), GradientDescentMomentum( learning_rate=0.1, momentum=0.5) #GradientDescent(learning_rate=0.2) )
class NASNet(BasicUnit): def __init__(self, width_stages, n_cell_stages, stride_stages, dropout=0): super(NASNet, self).__init__() self.width_stages = width_stages self.n_cell_stages = n_cell_stages self.stride_stages = stride_stages in_channels = 32 first_cell_width = 16 # first conv layer self.first_conv = ConvLayer(3, in_channels, 3, 2, 1, 1, False, False, True, 'relu6', 0, 'weight_bn_act') # first block first_block_config = { "name": "MobileInvertedResidualBlock", "mobile_inverted_conv": { "name": "MBInvertedConvLayer", "in_channels": in_channels, "out_channels": first_cell_width, "kernel_size": 3, "stride": 1, "expand_ratio": 1 }, "shortcut": None } self.first_block = MobileInvertedResidualBlock.build_from_config( first_block_config) in_channels = first_cell_width # blocks self.blocks = nn.ModuleList() for width, n_cell, s in zip(self.width_stages, self.n_cell_stages, self.stride_stages): for i in range(n_cell): if i == 0: stride = s else: stride = 1 block = WSMobileInvertedResidualBlock(in_channels, width, stride) in_channels = width self.blocks.append(block) self.feature_mix_layer = ConvLayer(in_channels, 1280, 1, 1, 1, 1, False, False, True, 'relu6', 0, 'weight_bn_act') self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.classifier = LinearLayer(1280, 1000, True, False, None, dropout, 'weight_bn_act') def forward(self, x, arch, bn_train=False): if bn_train: for m in self.modules(): if isinstance(m, nn.BatchNorm1d): m.train() x = self.first_conv(x) x = self.first_block(x) for i, block in enumerate(self.blocks): x = block(x, arch[i]) #x = self.last_block(x) if self.feature_mix_layer: x = self.feature_mix_layer(x) x = self.global_avg_pooling(x) x = x.view(x.size(0), -1) # flatten x = self.classifier(x) return x def get_flops(self, x): flop, x = self.first_conv.get_flops(x) for block in self.blocks: delta_flop, x = block.get_flops(x) flop += delta_flop if self.feature_mix_layer: delta_flop, x = self.feature_mix_layer.get_flops(x) flop += delta_flop x = self.global_avg_pooling(x) x = x.view(x.size(0), -1) # flatten delta_flop, x = self.classifier.get_flops(x) flop += delta_flop return flop, x def set_bn_param(self, bn_momentum, bn_eps): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.momentum = bn_momentum m.eps = bn_eps return def get_bn_param(self): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): return { 'momentum': m.momentum, 'eps': m.eps, } return None def init_model(self, model_init, init_div_groups=True): for m in self.modules(): if isinstance(m, nn.Conv2d): if model_init == 'he_fout': n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels if init_div_groups: n /= m.groups m.weight.data.normal_(0, math.sqrt(2. / n)) elif model_init == 'he_fin': n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels if init_div_groups: n /= m.groups m.weight.data.normal_(0, math.sqrt(2. / n)) else: raise NotImplementedError elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm1d): m.weight.data.fill_(1) m.bias.data.zero_() def weight_parameters(self): return self.parameters() @staticmethod def _make_divisible(v, divisor, min_val=None): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py :param v: :param divisor: :param min_val: :return: """ if min_val is None: min_val = divisor new_v = max(min_val, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: new_v += divisor return new_v
if load: lstm = GenericLayer.load('lstm.net') else: l = LSTMNet(vocab_size, hidden_size, Wi=Wi, Wf=Wf, Wc=Wc, Wo=Wo, bi=bi, bf=bf, bc=bc, bo=bo) lstm = Sequential( l, LinearLayer(hidden_size, vocab_size), ) sm = SoftMaxLayer() # lstm.on_message('init_nodes',20) # # x = to_one_hot_vect(char_to_ix['b'],vocab_size) # print len(x) # for i in range(20): # print lstm.forward(x,update = True) # # print lstm.backward(x) epochs = 100
targets = [to_one_hot_vect(target, num_classes) for target in targets] train = zip( np.array(data[:n * 9 / 10]).astype(np.float), np.array(targets[:n * 9 / 10]).astype(np.float)) test = zip( np.array(data[n / 10:]).astype(np.float), np.array(targets[n / 10:]).astype(np.float)) return train, test train, test = gen_data() model = Sequential([ LinearLayer(2, 20, weights='random'), TanhLayer(), #SigmoidLayer(), # HeavisideLayer(), # LinearLayer(10, 20, weights='random'), # SigmoidLayer(), LinearLayer(20, num_classes, weights='random', L1=0.001), # ReluLayer(), # SigmoidLayer() SoftMaxLayer() ]) # model = Sequential([ # LinearLayer(2, 5, weights='random'), # SigmoidLayer(), # #LinearLayer(3, 3, weights='random'),
def __init__(self, numpy_rng = numpy.random.RandomState(2**30), theano_rng=None, n_ins=601, n_outs=259, l1_reg = None, l2_reg = None, hidden_layers_sizes= [256, 256, 256, 256, 256], hidden_activation='tanh', output_activation='sigmoid'): print "DNN Initialisation" #logger = logging.getLogger("DNN initialization") self.sigmoid_layers = [] self.params = [] self.delta_params = [] self.n_layers = len(hidden_layers_sizes) self.n_ins = n_ins self.n_outs = n_outs #self.speaker_ID = [] self.output_activation = output_activation self.l1_reg = l1_reg self.l2_reg = l2_reg #vctk_class = Code_01.VCTK_feat_collection() assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy.random.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.matrix('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.tanh) ##T.nnet.sigmoid) # self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # add final layer if self.output_activation == 'linear': self.final_layer = LinearLayer(rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) elif self.output_activation == 'sigmoid': self.final_layer = SigmoidLayer( rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, activation=T.nnet.sigmoid) else: print ("This output activation function: %s is not supported right now!" %(self.output_activation)) sys.exit(1) self.params.extend(self.final_layer.params) self.delta_params.extend(self.final_layer.delta_params) ### MSE self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) ### L1-norm if self.l1_reg is not None: for i in xrange(self.n_layers): W = self.params[i * 2] self.finetune_cost += self.l1_reg * (abs(W).sum()) ### L2-norm if self.l2_reg is not None: for i in xrange(self.n_layers): W = self.params[i * 2] self.finetune_cost += self.l2_reg * T.sqr(W).sum()