def __init__(self, layers): self.layers = layers inp = 1 self.convs = [] self.deconvs = [] self.gap = M.pooling.AvgPool2d(3) for layer in layers: self.convs.append(conv_bn_relu_pool(inp, layer, 3, padding=1)) inp = layer inp += 10 for layer in layers[::-1]: # + [1,]: self.deconvs.append( transpose_conv_bn_relu(inp, layer, 5, padding=1, stride=2, relu=layer != 1)) inp = layer self.predict_layer = M.conv_bn.ConvBn2d(inp, 1, 1) self.fc_mean = M.Linear(layers[-1], layers[-1]) self.fc_var = M.Linear(layers[-1], layers[-1])
def __init__(self): self.mid_dim = 14 self.num_class = 2 super().__init__() self.fc0 = M.Linear(self.num_class, self.mid_dim, bias=True) self.fc1 = M.Linear(self.mid_dim, self.mid_dim, bias=True) self.fc2 = M.Linear(self.mid_dim, self.num_class, bias=True)
def __init__(self, channel_num): super(CARBBlock, self).__init__() self.conv1 = M.Sequential( M.Conv2d(channel_num, channel_num, kernel_size=3, padding=1, stride=1), M.ReLU(), M.Conv2d(channel_num, channel_num, kernel_size=3, padding=1, stride=1), ) # self.global_average_pooling = nn.AdaptiveAvgPool2d((1,1)) # B,C,H,W -> B,C,1,1 self.linear = M.Sequential(M.Linear(channel_num, channel_num // 2), M.ReLU(), M.Linear(channel_num // 2, channel_num), M.Sigmoid()) self.conv2 = M.Conv2d(channel_num * 2, channel_num, kernel_size=1, padding=0, stride=1) self.lrelu = M.LeakyReLU()
def test_grad_twice(): # model define model = M.Sequential(M.Linear(10, 20), M.Linear(20, 10), M.Linear(10, 5)) model.train() named_param = dict(list(model.named_parameters(requires_grad=True))) named_module = dict(list(model.named_children())) name_keys = list(named_param.keys()) params = list(named_param.values()) loss_fn = F.cross_entropy_with_softmax optimizer = optim.SGD(params, lr=0.003) # forward once optimizer.zero_grad() x1 = meg.tensor(np.random.randn(5, 10), dtype='float32') y1 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32') loss = loss_fn(model(x1), y1) grads = F.grad(loss, params, use_virtual_grad=False, return_zero_for_nodep=False) fast_weights = [p - 0.5 * g for g, p in zip(grads, params)] # manual update params replace_parameter(named_module, dict(zip(name_keys, fast_weights))) # forward twice x2 = meg.tensor(np.random.randn(5, 10), dtype='float32') y2 = meg.tensor(np.random.randint(0, 5, (5)), dtype='int32') loss2 = loss_fn(model(x2), y2) # got error replace_parameter(named_module, named_param) optimizer.backward(loss2) optimizer.step()
def __init__(self, cfg): super().__init__() self.cfg = cfg self.box_coder = layers.BoxCoder(cfg.rcnn_reg_mean, cfg.rcnn_reg_std) # roi head self.in_features = cfg.rcnn_in_features self.stride = cfg.rcnn_stride self.pooling_method = cfg.pooling_method self.pooling_size = cfg.pooling_size self.fc1 = M.Linear(256 * self.pooling_size[0] * self.pooling_size[1], 1024) self.fc2 = M.Linear(1024, 1024) for l in [self.fc1, self.fc2]: M.init.normal_(l.weight, std=0.01) M.init.fill_(l.bias, 0) # box predictor self.pred_cls = M.Linear(1024, cfg.num_classes + 1) self.pred_delta = M.Linear(1024, cfg.num_classes * 4) M.init.normal_(self.pred_cls.weight, std=0.01) M.init.normal_(self.pred_delta.weight, std=0.001) for l in [self.pred_cls, self.pred_delta]: M.init.fill_(l.bias, 0)
def __init__(self): super().__init__() self.quant = Float.QuantStub() self.linear = Float.Sequential(Float.Linear(3, 3), Float.Linear(3, 3)) self.dequant = Float.DequantStub() self.linear[0].bias[...] = Parameter(np.random.rand(3)) self.linear[1].bias[...] = Parameter(np.random.rand(3))
def __init__(self, in_channels, num_classes): super(InceptionAux, self).__init__() self.avgpool = M.AvgPool2d(5, padding=3) self.conv = BasicConv2d(in_channels, 128, kernel_size=1) self.fc1 = M.Linear(2048, 1024) self.fc2 = M.Linear(1024, num_classes)
def __init__(self, in_ch=3, num_classes=1000): ''' The AlexNet. args: in_ch: int, the number of channels of inputs num_classes: int, the number of classes that need to predict reference: "One weird trick for parallelizing convolutional neural networks"<https://arxiv.org/abs/1404.5997> ''' super(AlexNet, self).__init__() #the part to extract feature self.features = M.Sequential( M.Conv2d(in_ch, 64, kernel_size=11, stride=4, padding=11 // 4), M.ReLU(), M.MaxPool2d(kernel_size=3, stride=2), M.Conv2d(64, 192, kernel_size=5, padding=2), M.ReLU(), M.MaxPool2d(kernel_size=3, stride=2), M.Conv2d(192, 384, kernel_size=3, stride=1, padding=1), M.ReLU(), M.Conv2d(384, 256, kernel_size=3, stride=1, padding=1), M.ReLU(), M.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), M.ReLU(), M.MaxPool2d(kernel_size=3, stride=2), ) #global avg pooling self.avgpool = M.AdaptiveAvgPool2d((6, 6)) #classify part self.classifier = M.Sequential(M.Dropout(), M.Linear(256 * 6 * 6, 4096), M.ReLU(), M.Dropout(), M.Linear(4096, 4096), M.ReLU(), M.Linear(4096, num_classes))
def __init__(self, l1): super(TwolayerFC2, self).__init__() self.fc1 = M.Linear(2, l1) self.fc1.weight = W1 self.fc1.bias = B1 self.fc2 = M.Linear(l1, 2) self.fc2.weight = W2 self.fc2.bias = B2
def __init__(self, mode="normal"): super().__init__() self.data = np.random.random((10, 100)).astype(np.float32) self.data1 = np.random.random((10, 10, 10)).astype(np.float32) self.linear = M.Linear(100, 200, bias=False) self.linear_bias = M.Linear(200, 200, bias=True) self.linear_bias.bias = mge.Parameter( np.random.random(self.linear_bias.bias.shape).astype(np.float32)) self.mode = mode
def __init__(self, i, value_embedding, key_embedding): self.key_embedding = key_embedding super(TransformerBlock).__init__() self.position_encoding = M.Linear(L, key_embedding) self.init_map = M.Linear(i, key_embedding) self.value_mapping = M.Linear(key_embedding, value_embedding) self.key_mapping = M.Linear(key_embedding, key_embedding) self.query_mapping = M.Linear(key_embedding, key_embedding) self.norm = M.BatchNorm1d(key_embedding)
def __init__(self): super().__init__() # roi head self.fc1 = M.Linear(256*7*7, 1024) self.fc2 = M.Linear(1024, 1024) self.n = config.num_classes self.cls = M.Linear(1024,self.n) self.bbox = M.Linear(1024, 4 * self.n) self._init_weights()
def __init__(self): self.mid_dim = 14 self.num_class = 2 super().__init__() self.fc0 = M.Linear(self.num_class, self.mid_dim, bias=True) self.bn0 = M.BatchNorm1d(self.mid_dim) self.fc1 = M.Linear(self.mid_dim, self.mid_dim, bias=True) self.bn1 = M.BatchNorm1d(self.mid_dim) self.fc2 = M.Linear(self.mid_dim, self.num_class, bias=True) self.data = np.random.random((12, 2)).astype(np.float32)
def __init__(self, gate_channel, reduction_ratio=16, num_layers=1): super(ChannelGate, self).__init__() gate_channels = [gate_channel] gate_channels += [gate_channel // reduction_ratio] * num_layers gate_channels += [gate_channel] self.gate_c = M.Sequential( Flatten(), M.Linear(gate_channels[0], gate_channels[1]), M.BatchNorm1d(gate_channels[1]), M.ReLU(), M.Linear(gate_channels[-2], gate_channels[-1]))
def __init__(self, converter="normal"): self.converter = converter self.mid_dim = 14 self.num_class = 2 super().__init__() self.fc0 = M.Linear(self.num_class, self.mid_dim, bias=True) self.bn0 = M.BatchNorm1d(self.mid_dim) self.fc1 = M.Linear(self.mid_dim, self.mid_dim, bias=True) self.bn1 = M.BatchNorm1d(self.mid_dim) self.fc2 = M.Linear(self.mid_dim, self.num_class, bias=True) self.data = np.arange(24).reshape(12, 2).astype(np.float32)
def __init__(self): super().__init__() self.conv0 = M.Conv2d(1, 20, kernel_size=5, bias=False) self.bn0 = M.BatchNorm2d(20) self.relu0 = M.ReLU() self.pool0 = M.MaxPool2d(2) self.conv1 = M.Conv2d(20, 20, kernel_size=5, bias=False) self.bn1 = M.BatchNorm2d(20) self.relu1 = M.ReLU() self.pool1 = M.MaxPool2d(2) self.fc0 = M.Linear(500, 64, bias=True) self.relu2 = M.ReLU() self.fc1 = M.Linear(64, 10, bias=True)
def __init__(self): super().__init__() # roi head self.fc1 = M.Linear(256 * 7 * 7, 1024) self.fc2 = M.Linear(1024, 1024) for l in [self.fc1, self.fc2]: M.init.msra_uniform_(l.weight, a=1) M.init.fill_(l.bias, 0) # box predictor self.pred_cls = M.Linear(1024, config.num_classes) self.pred_delta = M.Linear(1024, config.num_classes * 4) M.init.normal_(self.pred_cls.weight, std=0.01) M.init.normal_(self.pred_delta.weight, std=0.001) for l in [self.pred_cls, self.pred_delta]: M.init.fill_(l.bias, 0)
def worker(): rank = dist.get_rank() size = dist.get_world_size() x = mge.tensor(np.random.randn(1, rank * 2 + 2), dtype=np.float32) m = M.Linear(rank * 2 + 2, rank * 2 + 4) gm = GradManager().attach(m.parameters()) opt = optim.SGD(m.parameters(), 1e-3, momentum=0.9) def train_func(x): with gm: if rank != 0: x = dist.functional.remote_recv(rank - 1, shape=(1, rank * 2 + 2), dtype=np.float32) y = m(x) if rank != size - 1: dist.functional.remote_send(y, dest_rank=rank + 1) gm.backward() else: y = y.mean() gm.backward(y) opt.step().clear_grad() train_funcs = [ train_func, trace(symbolic=False)(train_func), trace(symbolic=True)(train_func), ] for func in train_funcs: for i in range(3): func(x)
def __init__(self): super().__init__() self.classifier = None if dist.get_rank() == 0: self.features = M.Sequential( M.ConvBn2d(3, 64, 7, stride=2, padding=3, bias=False), M.MaxPool2d(kernel_size=3, stride=2, padding=1), BasicBlock(64, 64, 1), BasicBlock(64, 64, 1), ) elif dist.get_rank() == 1: self.features = M.Sequential( BasicBlock(64, 128, 2), BasicBlock(128, 128, 1), ) elif dist.get_rank() == 2: self.features = M.Sequential( BasicBlock(128, 256, 2), BasicBlock(256, 256, 1), ) elif dist.get_rank() == 3: self.features = M.Sequential( BasicBlock(256, 512, 2), BasicBlock(512, 512, 1), ) self.classifier = M.Linear(512, 1000)
def test_linear(): normal_net = Float.Linear(3, 3, bias=True) normal_net.eval() qat_net = QAT.Linear(3, 3, bias=True) qat_net.eval() disable_observer(qat_net) propagate_qconfig(qat_net, min_max_fakequant_qconfig) init_qat_net(qat_net) x = mge.tensor(np.random.normal(size=(3, 3)).astype("float32")) x = fake_quant(x, inp_scale) x.q_dict["scale"] = inp_scale x_int8 = quant(x, inp_scale) weight = np.random.normal(size=(3, 3)).astype("float32") bias = np.random.normal(size=(3,)).astype("float32") normal_net.weight.set_value(fake_quant(weight, weight_scale)) normal_net.bias.set_value(fake_quant(bias, inp_scale * weight_scale)) qat_net.weight.set_value(weight) qat_net.bias.set_value(bias) q_net = Q.Linear.from_qat_module(qat_net) q_net.eval() normal_out = fake_quant(normal_net(x), act_scale) qat_out = qat_net(x) q_out = q_net(x_int8).numpy() * act_scale np.testing.assert_allclose(qat_out, normal_out) np.testing.assert_allclose(q_out, normal_out.numpy())
def __init__(self): super().__init__() # 单信道图片, 两层 5x5 卷积 + ReLU + 池化 self.conv1 = M.Conv2d(1, 6, 5) self.relu1 = M.ReLU() self.pool1 = M.MaxPool2d(2, 2) self.conv2 = M.Conv2d(6, 16, 5) self.relu2 = M.ReLU() self.pool2 = M.MaxPool2d(2, 2) # 两层全连接 + ReLU self.fc1 = M.Linear(16 * 5 * 5, 120) self.relu3 = M.ReLU() self.fc2 = M.Linear(120, 84) self.relu4 = M.ReLU() # 分类器 self.classifier = M.Linear(84, 10)
def __init__(self): E = 32 self.t1 = TransformerBlock(28, E, E) self.t2 = TransformerBlock(E, E, E) self.t3 = TransformerBlock(E, E, E) self.t4 = TransformerBlock(E, E, E) self.fc = M.Linear(E, 28)
def __init__(self, name): super().__init__(name=name) self.quant = M.QuantStub() self.linear = M.Linear(3, 3, bias=True) self.dequant = M.DequantStub() self.linear.weight.name = "user-weight" self.linear.bias.name = "user-bias"
def __init__(self, iou_thresh, nheads, stage): super().__init__() assert iou_thresh >= 0.5 and nheads > 0 self.iou_thresh = iou_thresh self.nheads = nheads self.n = config.num_classes self.name = 'cascade_stage_{}'.format(stage) self.fc1 = M.Linear(256 * 7 * 7, 1024) self.fc2 = M.Linear(1024, 1024) self.relu = M.ReLU() self.n = config.num_classes self.p = M.Linear(1024, 5 * self.n * nheads) self._init_weights()
def __init__(self, name): super().__init__() self.stage_name = name # roi head self.fc1 = M.Linear(256*7*7, 1024) self.fc2 = M.Linear(1024, 1024) for l in [self.fc1, self.fc2]: M.init.msra_uniform_(l.weight, a=1) M.init.fill_(l.bias, 0) # box predictor self.pred_cls = M.Linear(1024, 2) self.pred_delta = M.Linear(1024, 4) for l in [self.pred_cls]: M.init.normal_(l.weight, std=0.01) M.init.normal_(l.bias, 0) for l in [self.pred_delta]: M.init.normal_(l.weight, std=0.001) M.init.normal_(l.bias, 0)
def __init__(self): super().__init__() self.conv1 = M.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = M.BatchNorm2d(64) self.avgpool = M.AvgPool2d(kernel_size=5, stride=5, padding=0) self.fc = M.Linear(64, 10)
def __init__(self, input_dim, features): super().__init__() layers = [] out_dim = input_dim num_layers = len(features) for i, feat in enumerate(features): layers.append(M.Linear(out_dim, feat)) if i != num_layers - 1: layers.append(M.ReLU()) out_dim = feat self.layers = M.Sequential(*layers)
def __init__(self, in_channels, num_classes, conv_block=None): super(InceptionAux, self).__init__() if conv_block is None: conv_block = BasicConv2d self.avgpool1 = M.AvgPool2d(5, 3) self.conv0 = conv_block(in_channels, 128, kernel_size=1) self.conv1 = conv_block(128, 768, kernel_size=5) self.avgpool = M.AvgPool2d(1) self.conv1.stddev = 0.01 self.fc = M.Linear(768, num_classes) self.fc.stddev = 0.001
def __init__(self, cfg, num_classes=1000, in_channels=3, init_weights=True, batch_norm=False): ''' VGGNet from paper "Very Deep Convolutional Networks For Large-Scale Image Recognition"<https://arxiv.org/pdf/1409.1556.pdf> ''' super(VGG, self).__init__() self.features = self._make_layers(in_channels, cfg, batch_norm) self.avgpool = M.AdaptiveAvgPool2d((7, 7)) self.classifier = M.Sequential(M.Linear(512 * 7 * 7, 4096), M.ReLU(), M.Dropout(), M.Linear(4096, 4096), M.ReLU(), M.Dropout(), M.Linear(4096, num_classes)) if init_weights: self._init_weights()
def __init__(self, block, num_blocks, num_classes=10): super(ResNet, self).__init__() self.in_planes = 16 self.conv1 = M.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = M.BatchNorm2d(16) self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) self.linear = M.Linear(64, num_classes) self.apply(_weights_init)