Beispiel #1
0
    def __init__(self, num_class=101, 
                 network='resnet50', 
                 dropout=0.5, 
                 use_dropout=True, 
                 use_l2_norm=False,
                 use_final_bn=False):
        super(LinearClassifier, self).__init__()
        self.network = network
        self.num_class = num_class
        self.dropout = dropout
        self.use_dropout = use_dropout
        self.use_l2_norm = use_l2_norm
        self.use_final_bn = use_final_bn
        
        message = 'Classifier to %d classes with %s backbone;' % (num_class, network)
        if use_dropout: message += ' + dropout %f' % dropout
        if use_l2_norm: message += ' + L2Norm'
        if use_final_bn: message += ' + final BN'
        print(message)

        self.backbone, self.param = select_backbone(network)
        
        if use_final_bn:
            self.final_bn = nn.BatchNorm1d(self.param['feature_size'])
            self.final_bn.weight.data.fill_(1)
            self.final_bn.bias.data.zero_()
        
        if use_dropout:
            self.final_fc = nn.Sequential(
                nn.Dropout(dropout),
                nn.Linear(self.param['feature_size'], self.num_class))
        else:
            self.final_fc = nn.Sequential(
                nn.Linear(self.param['feature_size'], self.num_class))
        self._initialize_weights(self.final_fc)
Beispiel #2
0
    def __init__(self, network='s3d', dim=128, K=2048, m=0.999, T=0.07):
        '''
        dim: feature dimension (default: 128)
        K: queue size; number of negative keys (default: 2048)
        m: moco momentum of updating key encoder (default: 0.999)
        T: softmax temperature (default: 0.07)
        '''
        super(InfoNCE, self).__init__()

        self.dim = dim 
        self.K = K
        self.m = m
        self.T = T

        # create the encoders (including non-linear projection head: 2 FC layers)
        backbone, self.param = select_backbone(network)
        feature_size = self.param['feature_size']
        self.encoder_q = nn.Sequential(
                            backbone, 
                            nn.AdaptiveAvgPool3d((1,1,1)),
                            nn.Conv3d(feature_size, feature_size, kernel_size=1, bias=True),
                            nn.ReLU(),
                            nn.Conv3d(feature_size, dim, kernel_size=1, bias=True))

        backbone, _ = select_backbone(network)
        self.encoder_k = nn.Sequential(
                            backbone, 
                            nn.AdaptiveAvgPool3d((1,1,1)),
                            nn.Conv3d(feature_size, feature_size, kernel_size=1, bias=True),
                            nn.ReLU(),
                            nn.Conv3d(feature_size, dim, kernel_size=1, bias=True))

        for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
            param_k.data.copy_(param_q.data)  # initialize
            param_k.requires_grad = False  # not update by gradient

        # create the queue
        self.register_buffer("queue", torch.randn(dim, K))
        self.queue = nn.functional.normalize(self.queue, dim=0)
        self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long))
Beispiel #3
0
    def __init__(self, net='s3d', dropout=0.5, num_class=400):
        super(Classifier, self).__init__()
        self.backbone, self.param = select_backbone(net)
        feature_size = self.param['feature_size']

        self.AvgPool = nn.AdaptiveAvgPool3d(output_size=(1, 1, 1))
        self.Dropout = nn.Dropout3d(dropout)
        self.Conv = nn.Conv3d(feature_size,
                              num_class,
                              kernel_size=1,
                              stride=1,
                              bias=True)

        nn.init.normal_(self.Conv.weight.data, mean=0, std=0.01)
        nn.init.constant_(self.Conv.bias.data, 0.0)
Beispiel #4
0
    def __init__(self,
                 network='s3d',
                 dim=128,
                 K=2048,
                 m=0.999,
                 T=0.07,
                 topk=5,
                 reverse=False):
        '''
        dim: feature dimension (default: 128)
        K: queue size; number of negative keys (default: 2048)
        m: moco momentum of updating key encoder (default: 0.999)
        T: softmax temperature (default: 0.07)
        '''
        super(CoCLR, self).__init__(network, dim, K, m, T)

        self.topk = topk

        # create another encoder, for the second view of the data
        backbone, _ = select_backbone(network)
        feature_size = self.param['feature_size']
        self.sampler = nn.Sequential(
            backbone, nn.AdaptiveAvgPool3d((1, 1, 1)),
            nn.Conv3d(feature_size, feature_size, kernel_size=1, bias=True),
            nn.ReLU(), nn.Conv3d(feature_size, dim, kernel_size=1, bias=True))
        for param_s in self.sampler.parameters():
            param_s.requires_grad = False  # not update by gradient

        # create another queue, for the second view of the data
        self.register_buffer("queue_second", torch.randn(dim, K))
        self.queue_second = nn.functional.normalize(self.queue_second, dim=0)

        # for handling sibling videos, e.g. for UCF101 dataset
        self.register_buffer("queue_vname",
                             torch.ones(K, dtype=torch.long) * -1)
        # for monitoring purpose only
        self.register_buffer("queue_label",
                             torch.ones(K, dtype=torch.long) * -1)

        self.queue_is_full = False
        self.reverse = reverse
Beispiel #5
0
def show_num_params(net_name):
    model, _ = select_backbone(net_name)
    num_param = sum(p.numel() for p in model.parameters())
    return '%s\t%d' % (net_name, num_param)