def test_module(batch_size, in_capsules, out_capsules, in_length, out_length, routing_type, share_weight, num_iterations, squash): num_in_capsules = None if share_weight else in_capsules module = CapsuleLinear(out_capsules, in_length, out_length, num_in_capsules, share_weight, routing_type, num_iterations, squash) x = torch.randn(batch_size, in_capsules, in_length) y_cpu, prob_cpu = module(x) y_cuda, prob_cuda = module.to('cuda')(x.to('cuda')) assert torch.allclose(y_cuda.cpu(), y_cpu) assert torch.allclose(prob_cuda.cpu(), prob_cpu)
def __init__(self, vocab_size, embedding_size, num_codebook, num_codeword, hidden_size, in_length, out_length, num_class, routing_type, embedding_type, classifier_type, num_iterations, num_repeat, dropout): super().__init__() self.in_length, self.out_length = in_length, out_length self.hidden_size, self.classifier_type = hidden_size, classifier_type self.embedding_type = embedding_type if embedding_type == 'cwc': self.embedding = CompositionalEmbedding(vocab_size, embedding_size, num_codebook, num_codeword, weighted=True) elif embedding_type == 'cc': self.embedding = CompositionalEmbedding(vocab_size, embedding_size, num_codebook, num_codeword, num_repeat, weighted=False) else: self.embedding = nn.Embedding(vocab_size, embedding_size) self.features = nn.GRU(embedding_size, self.hidden_size, num_layers=2, dropout=dropout, batch_first=True, bidirectional=True) if classifier_type == 'capsule' and routing_type == 'k_means': self.classifier = CapsuleLinear(out_capsules=num_class, in_length=self.in_length, out_length=self.out_length, in_capsules=self.hidden_size // self.in_length, share_weight=False, routing_type='k_means', num_iterations=num_iterations, bias=False) elif classifier_type == 'capsule' and routing_type == 'dynamic': self.classifier = CapsuleLinear(out_capsules=num_class, in_length=self.in_length, out_length=self.out_length, in_capsules=self.hidden_size // self.in_length, share_weight=False, routing_type='dynamic', num_iterations=num_iterations, bias=False) else: self.classifier = nn.Linear(in_features=self.hidden_size, out_features=num_class, bias=False)
def test_module(batch_size, in_capsules, out_capsules, in_length, out_length, routing_type, kwargs, share_weight, num_iterations): if share_weight: num_in_capsules = None else: num_in_capsules = in_capsules module = CapsuleLinear(out_capsules, in_length, out_length, num_in_capsules, share_weight, routing_type, num_iterations, **kwargs) x = torch.randn(batch_size, in_capsules, in_length) y_cpu = module(x) y_cuda = module.to('cuda')(x.to('cuda')) assert y_cuda.view(-1).tolist() == approx(y_cpu.view(-1).tolist(), abs=1e-5)
def __init__(self, data_type='MNIST', net_mode='Capsule', capsule_type='ps', routing_type='k_means', num_iterations=3, **kwargs): super(MixNet, self).__init__() self.net_mode = net_mode if data_type == 'MNIST' or data_type == 'FashionMNIST': self.conv1 = nn.Sequential( nn.Conv2d(1, 16, kernel_size=3, padding=1, bias=False)) else: self.conv1 = nn.Sequential( nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False)) layers = [] if data_type == 'STL10': basic_model = resnet(use_layer_4=True) else: basic_model = resnet() for name, module in basic_model.named_children(): if name == 'conv1' or isinstance(module, nn.Linear): continue if self.net_mode == 'Capsule' and capsule_type == 'ps' and isinstance( module, nn.AdaptiveAvgPool2d): continue layers.append(module) self.features = nn.Sequential(*layers) if self.net_mode == 'Capsule': if capsule_type == 'ps': self.classifier = CapsuleLinear(out_capsules=10, in_length=32, out_length=8, routing_type=routing_type, num_iterations=num_iterations, **kwargs) else: self.classifier = CapsuleLinear(out_capsules=10, in_length=32, out_length=8, in_capsules=32, share_weight=False, routing_type=routing_type, num_iterations=num_iterations, **kwargs) else: self.classifier = nn.Sequential( nn.Linear(in_features=1024, out_features=256), nn.ReLU(), nn.Linear(in_features=256, out_features=10))
def __init__(self, vocab_size, num_class, routing_type, num_iterations): super().__init__() self.embedding_size = 64 self.hidden_size = 128 self.embedding = CompositionalEmbedding(num_embeddings=vocab_size, embedding_dim=self.embedding_size, num_codebook=8) self.features = nn.GRU(self.embedding_size, self.hidden_size, num_layers=2, dropout=0.5, batch_first=True, bidirectional=True) if routing_type == 'k_means': self.classifier = CapsuleLinear(out_capsules=num_class, in_length=8, out_length=16, in_capsules=16, share_weight=False, num_iterations=num_iterations, similarity='cosine') else: self.classifier = CapsuleLinear(out_capsules=num_class, in_length=8, out_length=16, in_capsules=16, share_weight=False, routing_type='dynamic', num_iterations=num_iterations)
def test_ri_optim(milestones, addition): models = [ nn.Conv2d(1, 3, 3), CapsuleLinear(10, 8, 16, num_iterations=1), CapsuleConv2d(8, 16, 3, 4, 8), nn.Sequential(nn.Conv2d(1, 20, 5), CapsuleLinear(10, 8, 16)), nn.ModuleList([ nn.Sequential(nn.Conv2d(1, 5, 3), CapsuleLinear(10, 8, 16, num_iterations=2)), nn.Sequential(CapsuleLinear(10, 8, 16), CapsuleConv2d(8, 16, 3, 4, 8)), CapsuleLinear(10, 8, 16) ]) ] for model in models: schedule = MultiStepRI(model, milestones, addition, verbose=True) for epoch in range(20): schedule.step()
def test_dropout_optim(milestones, addition): models = [ nn.Conv2d(1, 3, 3), CapsuleLinear(10, 8, 16, dropout=0.1), CapsuleConv2d(8, 16, 3, 4, 8), nn.Sequential(nn.Conv2d(1, 20, 5), nn.ReLU(), CapsuleLinear(10, 8, 16)), nn.ModuleList([ nn.Sequential(nn.Conv2d(1, 5, 3), nn.ReLU(), CapsuleLinear(10, 8, 16, dropout=0.2)), nn.Sequential(CapsuleLinear(10, 8, 16), CapsuleConv2d(8, 16, 3, 4, 8)), CapsuleLinear(10, 8, 16) ]) ] for model in models: schedule = MultiStepDropout(model, milestones, addition, verbose=True) for epoch in range(10): schedule.step()