def test_forward(input, kernel_size=3, padding=1, stride=2, dilation=1): # input = (Variable(torch.FloatTensor(torch.randn(1, 1, 5, 5)), requires_grad=True), # Variable(torch.FloatTensor(torch.randn(1, 9)), requires_grad=True),) F = RRSVM.RRSVM_F(kernel_size, padding, stride, dilation=1, return_indices=True) analytical, analytical_indices = F(*input) analytical = analytical.data.numpy() analytical_indices = analytical_indices.data.numpy() numerical, numerical_indices = get_numerical_output( *input, kernel_size=kernel_size, padding=padding, stride=stride, dilation=1) atol = 1e-5 rtol = 1e-3 if not (np.absolute(numerical - analytical) <= (atol + rtol * np.absolute(numerical))).all(): print "Output Failed Foward Test" else: print "Ouput Pass Foward Test" if not (np.absolute(analytical_indices - numerical_indices) <= (atol + rtol * np.absolute(numerical))).all(): print "Indices Failed Foward Test" else: print "Indices Pass Foward Test"
def _make_layers(self, cfg): layers = [] in_channels = 3 for x in cfg: if x == 'M': layers += [ nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True) ] elif x == 'A': layers += [nn.AvgPool2d(kernel_size=2, stride=2)] elif x == 'O': layers += [ RRSVM.RRSVM_Module(in_channels, kernel_size=2, stride=2, return_indices=True, p_constraint=self.p_constraint) ] else: layers += [ nn.Conv2d(in_channels, x, kernel_size=3, padding=1), nn.BatchNorm2d(x), nn.ReLU(inplace=True) ] in_channels = x layers += [nn.AvgPool2d(kernel_size=1, stride=1)] return nn.Sequential(*layers)
def __init__(self, in_planes, out_planes, dropRate=0.0, useRRSVM=False): super(TransitionBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.relu = nn.ReLU(inplace=True) self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) self.droprate = dropRate if useRRSVM: self.pool = RRSVM.RRSVM_Module(in_channels=out_planes, kernel_size=2) else: self.pool = nn.AvgPool2d(kernel_size=2)
def test_forward(input, kernel_size=3, padding=1, stride=2, dilation=1): # input = (Variable(torch.FloatTensor(torch.randn(1, 1, 5, 5)), requires_grad=True), # Variable(torch.FloatTensor(torch.randn(1, 9)), requires_grad=True),) F = RRSVM.RRSVM_F(kernel_size, padding, stride, dilation=1, return_indices=True) # if torch.cuda.is_available(): # input = [i.cuda() for i in input] # else: # print("Cuda device not detected on this device") # sys.exit(-1) analytical, analytical_indices = F(*input) analytical = analytical.cpu().data.numpy() analytical_indices = analytical_indices.cpu().data.numpy() atol = 1e-5 rtol = 1e-3 if torch.cuda.is_available(): input = [i.cpu() for i in input] numerical, numerical_indices = get_numerical_output( *input, kernel_size=kernel_size, padding=padding, stride=stride, dilation=1) flag = True if not (np.absolute(numerical - analytical) <= (atol + rtol * np.absolute(numerical))).all(): print "Update Output Error" flag = False else: print "Update Output passed" # relative_loss = (numerical - analytical) / (numerical + 1e-6) # print "Max Diff: {:.04f}".format((np.abs(relative_loss).max())) input_np = input[0].data.cpu().numpy() if check_forward_indices(input_np, numerical_indices, analytical_indices, kernel_size, padding, stride, dilation): print "Passed, Indices Pass Forward Test" flag = True else: print "Failed, Indices Fail Foward Test" flag = False return flag
def __init__(self, n_classes=10, useRRSVM=False): super(GoogLeNet, self).__init__() self.pre_layers = nn.Sequential( nn.Conv2d(3, 192, kernel_size=3, padding=1), nn.BatchNorm2d(192), nn.ReLU(True), ) self.useRRSVM = useRRSVM self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) if not self.useRRSVM: self.pool1 = nn.MaxPool2d(3, stride=2, padding=1) self.pool2 = nn.MaxPool2d(3, stride=2, padding=1) self.pool3 = nn.AvgPool2d(8, stride=1) else: self.pool1 = RRSVM.RRSVM_Module(480, kernel_size=2, stride=2, return_indices=False) self.pool2 = RRSVM.RRSVM_Module(832, kernel_size=2, stride=2, return_indices=False) self.pool3 = RRSVM.RRSVM_Module(1024, kernel_size=8, stride=1, return_indices=False) self.linear = nn.Linear(1024, n_classes)
def test_gradient(input, kernel_size=3, padding=0, stride=1): F = RRSVM.RRSVM_F(kernel_size=kernel_size, padding=padding, stride=stride, dilation=1) test = gradcheck(lambda i, s: F(i, s), inputs=input, eps=1e-3, atol=1e-3, rtol=1e-3) if test == True: print("Gradient Check Passed!") else: print("Gradient Check Failed!")
def __init__(self, depth, n_classes, useRRSVM=False, growth_rate=12, reduction=0.5, bottleneck=True, dropRate=0.0): super(DenseNet3, self).__init__() in_planes = 2 * growth_rate n = (depth - 4) / 3 if bottleneck == True: n = n/2 block = BottleneckBlock else: block = BasicBlock # 1st conv before any dense block self.conv1 = nn.Conv2d(3, in_planes, kernel_size=3, stride=1, padding=1, bias=False) # 1st block self.block1 = DenseBlock(n, in_planes, growth_rate, block, dropRate) in_planes = int(in_planes+n*growth_rate) self.trans1 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate, useRRSVM=useRRSVM) in_planes = int(math.floor(in_planes*reduction)) # 2nd block self.block2 = DenseBlock(n, in_planes, growth_rate, block, dropRate) in_planes = int(in_planes+n*growth_rate) self.trans2 = TransitionBlock(in_planes, int(math.floor(in_planes*reduction)), dropRate=dropRate, useRRSVM=useRRSVM) in_planes = int(math.floor(in_planes*reduction)) # 3rd block self.block3 = DenseBlock(n, in_planes, growth_rate, block, dropRate) in_planes = int(in_planes+n*growth_rate) # global average pooling and classifier self.bn1 = nn.BatchNorm2d(in_planes) self.relu = nn.ReLU(inplace=True) if useRRSVM: self.pool = RRSVM.RRSVM_Module(in_channels=in_planes, kernel_size=8) else: self.pool = nn.AvgPool2d(kernel_size=8) self.fc = nn.Linear(in_planes, n_classes) self.in_planes = in_planes for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): super(Inception, self).__init__() # 1x1 conv branch self.b1 = nn.Sequential( nn.Conv2d(in_planes, n1x1, kernel_size=1), nn.BatchNorm2d(n1x1), nn.ReLU(True), ) # 1x1 conv -> 3x3 conv branch self.b2 = nn.Sequential( nn.Conv2d(in_planes, n3x3red, kernel_size=1), nn.BatchNorm2d(n3x3red), nn.ReLU(True), nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), nn.BatchNorm2d(n3x3), nn.ReLU(True), ) # 1x1 conv -> 5x5 conv branch self.b3 = nn.Sequential( nn.Conv2d(in_planes, n5x5red, kernel_size=1), nn.BatchNorm2d(n5x5red), nn.ReLU(True), nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), nn.BatchNorm2d(n5x5), nn.ReLU(True), nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), nn.BatchNorm2d(n5x5), nn.ReLU(True), ) # 3x3 pool -> 1x1 conv branch self.b4 = nn.Sequential( # nn.MaxPool2d(3, stride=1, padding=1), RRSVM.RRSVM_Module(in_channels=in_planes, kernel_size=3, stride=1, padding=1), nn.Conv2d(in_planes, pool_planes, kernel_size=1), nn.BatchNorm2d(pool_planes), nn.ReLU(True), )
def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10, useRRSVM=True): super(ResNeXt, self).__init__() self.cardinality = cardinality self.bottleneck_width = bottleneck_width self.in_planes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(num_blocks[0], 1) self.layer2 = self._make_layer(num_blocks[1], 2) self.layer3 = self._make_layer(num_blocks[2], 2) # self.layer4 = self._make_layer(num_blocks[3], 2) self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) self.useRRSVM = useRRSVM if self.useRRSVM: self.pool = RRSVM.RRSVM_Module(in_channels=1024, kernel_size=8) else: self.pool = nn.AvgPool2d(kernel_size=8)
def test_gradient(input, kernel_size=3, padding=0, stride=1): F = RRSVM.RRSVM_F(kernel_size=kernel_size, padding=padding, stride=stride, dilation=1) # if torch.cuda.is_available(): # input = [i.cuda() for i in input] # else: # print("Cuda device not detected on this device") # sys.exit(-1) test = gradcheck(lambda i, s: F(i, s), inputs=input, eps=1e-3, atol=1e-3, rtol=1e-3) # if test == True: # print("Gradient Check Passed!") # else: # print("Gradient Check Failed!") # return test
def __init__(self, cfg, useRRSVM=True): super(DPN, self).__init__() in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.last_planes = 64 self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) self.linear = nn.Linear( out_planes[3] + (num_blocks[3] + 1) * dense_depth[3], 10) if useRRSVM: self.pool = RRSVM.RRSVM_Module(in_channels=2560, kernel_size=4) else: self.pool = nn.AvgPool2d(kernel_size=4)
n_classes=n_classes, useRRSVM=True) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, momentum=0.9, weight_decay=1e-4) # was 5e-4 before p_constraint = False if args.positive_constraint: p_constraint = True positive_clipper = RRSVM.RRSVM_PositiveClipper() use_cuda = torch.cuda.is_available() and (args.gpu_id is not None or args.multiGpu) if use_cuda: if args.multiGpu: if args.gpu_id is None: # using all the GPUs device_count = torch.cuda.device_count() print("Using ALL {:d} GPUs".format(device_count)) model = nn.DataParallel( model, device_ids=[i for i in range(device_count)]).cuda() else: print("Using GPUs: {:s}".format(args.gpu_id)) device_ids = [int(x) for x in args.gpu_id] model = nn.DataParallel(model, device_ids=device_ids).cuda()