def __init__(self, output_scale, noise_size=120, num_classes=0, out_channels=3, base_channels=96, block_depth=2, input_scale=4, with_shared_embedding=True, shared_dim=128, sn_eps=1e-6, init_type='ortho', concat_noise=True, act_cfg=dict(type='ReLU', inplace=False), upsample_cfg=dict(type='nearest', scale_factor=2), with_spectral_norm=True, auto_sync_bn=True, blocks_cfg=dict(type='BigGANDeepGenResBlock'), arch_cfg=None, out_norm_cfg=dict(type='BN'), pretrained=None, rgb2bgr=False): super().__init__() self.noise_size = noise_size self.num_classes = num_classes self.shared_dim = shared_dim self.with_shared_embedding = with_shared_embedding self.output_scale = output_scale self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg( self.output_scale, base_channels) self.input_scale = input_scale self.concat_noise = concat_noise self.blocks_cfg = deepcopy(blocks_cfg) self.upsample_cfg = deepcopy(upsample_cfg) self.block_depth = block_depth self.rgb2bgr = rgb2bgr # Validity Check # If 'num_classes' equals to zero, we shall set 'with_shared_embedding' # to False. if num_classes == 0: assert not self.with_shared_embedding assert not self.concat_noise elif not self.with_shared_embedding: # If not `with_shared_embedding`, we will use `nn.Embedding` to # replace the original `Linear` layer in conditional BN. # Meanwhile, we do not adopt split noises. assert not self.concat_noise # First linear layer if self.concat_noise: self.noise2feat = nn.Linear( self.noise_size + self.shared_dim, self.arch['in_channels'][0] * (self.input_scale**2)) else: self.noise2feat = nn.Linear( self.noise_size, self.arch['in_channels'][0] * (self.input_scale**2)) if with_spectral_norm: self.noise2feat = spectral_norm(self.noise2feat, eps=sn_eps) # If using 'shared_embedding', we will get an unified embedding of # label for all blocks. If not, we just pass the label to each # block. if with_shared_embedding: self.shared_embedding = nn.Embedding(num_classes, shared_dim) else: self.shared_embedding = nn.Identity() if num_classes > 0: if self.concat_noise: self.dim_after_concat = ( self.shared_dim + self.noise_size if self.with_shared_embedding else self.num_classes) else: self.dim_after_concat = ( self.shared_dim if self.with_shared_embedding else self.num_classes) else: self.dim_after_concat = 0 self.blocks_cfg.update( dict( dim_after_concat=self.dim_after_concat, act_cfg=act_cfg, sn_eps=sn_eps, input_is_label=(num_classes > 0) and (not with_shared_embedding), with_spectral_norm=with_spectral_norm, auto_sync_bn=auto_sync_bn)) self.conv_blocks = nn.ModuleList() for index, out_ch in enumerate(self.arch['out_channels']): for depth in range(self.block_depth): # change args to adapt to current block block_cfg_ = deepcopy(self.blocks_cfg) block_cfg_.update( dict( in_channels=self.arch['in_channels'][index], out_channels=out_ch if depth == (self.block_depth - 1) else self.arch['in_channels'][index], upsample_cfg=self.upsample_cfg if self.arch['upsample'][index] and depth == (self.block_depth - 1) else None)) self.conv_blocks.append(build_module(block_cfg_)) if self.arch['attention'][index]: self.conv_blocks.append( SelfAttentionBlock( out_ch, with_spectral_norm=with_spectral_norm, sn_eps=sn_eps)) self.output_layer = SNConvModule( self.arch['out_channels'][-1], out_channels, kernel_size=3, padding=1, with_spectral_norm=with_spectral_norm, spectral_norm_cfg=dict(eps=sn_eps), act_cfg=act_cfg, norm_cfg=out_norm_cfg, bias=True, order=('norm', 'act', 'conv')) self.init_weights(pretrained=pretrained, init_type=init_type)
def reset_classifier(self, num_classes, global_pool=''): self.num_classes = num_classes self.head = nn.Linear( self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
def __init__(self, in_channels=512, out_channels=17, num_stages=1, num_deconv_layers=3, num_deconv_filters=(256, 256, 256), num_deconv_kernels=(4, 4, 4), extra=None, loss_keypoint=None, train_cfg=None, test_cfg=None): super().__init__() self.in_channels = in_channels self.num_stages = num_stages self.loss = build_loss(loss_keypoint) self.train_cfg = {} if train_cfg is None else train_cfg self.test_cfg = {} if test_cfg is None else test_cfg self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap') if extra is not None and not isinstance(extra, dict): raise TypeError('extra should be dict or None.') # build multi-stage deconv layers self.multi_deconv_layers = nn.ModuleList([]) for _ in range(self.num_stages): if num_deconv_layers > 0: deconv_layers = self._make_deconv_layer( num_deconv_layers, num_deconv_filters, num_deconv_kernels, ) elif num_deconv_layers == 0: deconv_layers = nn.Identity() else: raise ValueError( f'num_deconv_layers ({num_deconv_layers}) should >= 0.') self.multi_deconv_layers.append(deconv_layers) identity_final_layer = False if extra is not None and 'final_conv_kernel' in extra: assert extra['final_conv_kernel'] in [0, 1, 3] if extra['final_conv_kernel'] == 3: padding = 1 elif extra['final_conv_kernel'] == 1: padding = 0 else: # 0 for Identity mapping. identity_final_layer = True kernel_size = extra['final_conv_kernel'] else: kernel_size = 1 padding = 0 # build multi-stage final layers self.multi_final_layers = nn.ModuleList([]) for i in range(self.num_stages): if identity_final_layer: final_layer = nn.Identity() else: final_layer = build_conv_layer( cfg=dict(type='Conv2d'), in_channels=num_deconv_filters[-1] if num_deconv_layers > 0 else in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=padding) self.multi_final_layers.append(final_layer)
def __init__(self, cid, nchannels, nactions): super(PNNColumn, self).__init__() nhidden = 256 self.cid = cid # 6 layers neural network self.nlayers = 6 # init normal nn, lateral connection, adapter layer and alpha self.w = nn.ModuleList() self.u = nn.ModuleList() self.v = nn.ModuleList() self.alpha = nn.ModuleList() # normal neural network self.w.append( nn.Conv2d(nchannels, 32, kernel_size=3, stride=2, padding=1)) self.w.extend([ nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1) for _ in range(self.nlayers - 3) ]) conv_out_size = self._get_conv_out((nchannels, 84, 84)) self.w.append(nn.Linear(conv_out_size, nhidden)) # w[-2] is the critic layer and w[-1] is the actor layer self.w.append( nn.ModuleList( [nn.Linear(nhidden, 1), nn.Linear(nhidden, nactions)])) # only add lateral connections and adapter layers if not first column # v[col][layer][(nnList on that layer)] for i in range(self.cid): self.v.append(nn.ModuleList()) # adapter layer self.v[i].append(nn.Identity()) self.v[i].extend([ nn.Conv2d(32, 1, kernel_size=1) for _ in range(self.nlayers - 3) ]) self.v[i].append(nn.Linear(conv_out_size, conv_out_size)) self.v[i].append( nn.ModuleList( [nn.Linear(nhidden, nhidden), nn.Linear(nhidden, nhidden)])) # alpha self.alpha.append(nn.ParameterList()) self.alpha[i].append( nn.Parameter(torch.Tensor(1), requires_grad=False)) self.alpha[i].extend([ nn.Parameter( torch.Tensor(np.array(np.random.choice([1e0, 1e-1, 1e-2])))) for _ in range(self.nlayers) ]) # lateral connection self.u.append(nn.ModuleList()) self.u[i].append(nn.Identity()) self.u[i].extend([ nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1) for _ in range(self.nlayers - 3) ]) self.u[i].append(nn.Linear(conv_out_size, nhidden)) self.u[i].append( nn.ModuleList( [nn.Linear(nhidden, 1), nn.Linear(nhidden, nactions)])) # init weights self._reset_parameters() self.w[-1][0].weight.data = self._normalized(self.w[-1][0].weight.data) self.w[-1][1].weight.data = self._normalized(self.w[-1][1].weight.data, 1e-2) for i in range(self.cid): self.v[i][-1][0].weight.data = self._normalized( self.v[i][-1][0].weight.data) self.v[i][-1][1].weight.data = self._normalized( self.v[i][-1][1].weight.data, 1e-2) self.u[i][-1][0].weight.data = self._normalized( self.u[i][-1][0].weight.data) self.u[i][-1][1].weight.data = self._normalized( self.u[i][-1][1].weight.data, 1e-2)
def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., hybrid_backbone=None, norm_layer=nn.LayerNorm): super().__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) if hybrid_backbone is not None: self.patch_embed = HybridEmbed(hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim) else: self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameter( torch.zeros(1, num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) self.dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, depth) ] # stochastic depth decay rule self.blocks = nn.ModuleList([ Block( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=self.dpr[i], norm_layer=norm_layer, ) for i in range(depth) ]) self.norm = norm_layer(embed_dim) # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here #self.repr = nn.Linear(embed_dim, representation_size) #self.repr_act = nn.Tanh() # Classifier head self.head = nn.Linear( embed_dim, num_classes) if num_classes > 0 else nn.Identity() trunc_normal_(self.pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights)
def __init__(self, in_channels, out_channels, activation='relu'): super().__init__() self.in_channels, self.out_channels, self.activation = in_channels, out_channels, activation self.blocks = nn.Identity() self.activate = activation_func(activation) self.shortcut = nn.Identity()
def __init__(self, embedding_model, mixup_layer, n_class): super().__init__() self.mix_model = TMix(embedding_model, mixup_layer=mixup_layer) self.classifier = create_sentence_classifier(embedding_model.embed_dim, n_class) self.sentence_h = nn.Identity()
def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512], num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], num_stages=4): super().__init__() self.num_classes = num_classes self.depths = depths self.num_stages = num_stages dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) ] # stochastic depth decay rule cur = 0 for i in range(num_stages): patch_embed = PatchEmbed( img_size=img_size if i == 0 else img_size // (2**(i + 1)), patch_size=patch_size if i == 0 else 2, in_chans=in_chans if i == 0 else embed_dims[i - 1], embed_dim=embed_dims[i]) num_patches = patch_embed.num_patches if i != num_stages - 1 else patch_embed.num_patches + 1 pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dims[i])) pos_drop = nn.Dropout(p=drop_rate) block = nn.ModuleList([ Block(dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j], norm_layer=norm_layer, sr_ratio=sr_ratios[i]) for j in range(depths[i]) ]) cur += depths[i] setattr(self, f"patch_embed{i + 1}", patch_embed) setattr(self, f"pos_embed{i + 1}", pos_embed) setattr(self, f"pos_drop{i + 1}", pos_drop) setattr(self, f"block{i + 1}", block) self.norm = norm_layer(embed_dims[3]) # cls_token self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims[3])) # classification head self.head = nn.Linear( embed_dims[3], num_classes) if num_classes > 0 else nn.Identity() # init weights for i in range(num_stages): pos_embed = getattr(self, f"pos_embed{i + 1}") trunc_normal_(pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights)
encoder_params: Params of encoder module. pooling_params: Params of the pooling layer. head_params: 'Head' module params. Returns: Model. """ encoder: nn.Module = nn.Identity() if (encoder_params_ := copy.deepcopy(encoder_params)) is not None: encoder_fn = MODULE.get(encoder_params_.pop("module")) encoder = encoder_fn(**encoder_params_) pool: nn.Module = nn.Identity() if (pooling_params_ := copy.deepcopy(pooling_params)) is not None: pool_fn = MODULE.get(pooling_params_.pop("module")) pool = pool_fn(**pooling_params_) head: nn.Module = nn.Identity() if (head_params_ := copy.deepcopy(head_params)) is not None: head_fn = MODULE.get(head_params_.pop("module")) head = head_fn(**head_params_) net = cls(encoder=encoder, pool=pool, head=head) utils.net_init_(net) return net __all__ = ["VGGConv"]
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.swin = SwinModel(config) # Classifier head self.classifier = ( nn.Linear(self.swin.num_features, config.num_labels) if config.num_labels > 0 else nn.Identity() ) # Initialize weights and apply final processing self.post_init()
def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512], num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]): super().__init__() self.num_classes = num_classes self.depths = depths # patch_embed self.patch_embed1 = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dims[0]) self.patch_embed2 = GridDown(img_size=img_size // 4, patch_size=2, in_chans=embed_dims[0], embed_dim=embed_dims[1]) self.patch_embed3 = GridDown(img_size=img_size // 8, patch_size=2, in_chans=embed_dims[1], embed_dim=embed_dims[2]) self.patch_embed4 = GridDown(img_size=img_size // 16, patch_size=2, in_chans=embed_dims[2], embed_dim=embed_dims[3]) # pos_embed self.pos_embed1 = nn.Parameter( torch.zeros(1, self.patch_embed1.num_patches, embed_dims[0])) self.pos_drop1 = nn.Dropout(p=drop_rate) self.pos_embed2 = nn.Parameter( torch.zeros(1, self.patch_embed2.num_patches, embed_dims[1])) self.pos_drop2 = nn.Dropout(p=drop_rate) self.pos_embed3 = nn.Parameter( torch.zeros(1, self.patch_embed3.num_patches, embed_dims[2])) self.pos_drop3 = nn.Dropout(p=drop_rate) self.pos_embed4 = nn.Parameter( torch.zeros(1, self.patch_embed4.num_patches + 1, embed_dims[3])) self.pos_drop4 = nn.Dropout(p=drop_rate) # transformer encoder dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) ] # stochastic depth decay rule cur = 0 self.block1 = nn.ModuleList([ Block(dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, sr_ratio=sr_ratios[0]) for i in range(depths[0]) ]) cur += depths[0] self.block2 = nn.ModuleList([ Block(dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, sr_ratio=sr_ratios[1]) for i in range(depths[1]) ]) cur += depths[1] self.block3 = nn.ModuleList([ Block(dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, sr_ratio=sr_ratios[2]) for i in range(depths[2]) ]) cur += depths[2] self.block4 = nn.ModuleList([ Block(dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, sr_ratio=sr_ratios[3]) for i in range(depths[3]) ]) self.norm = norm_layer(embed_dims[3]) # cls_token self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims[3])) # classification head self.head = nn.Linear( embed_dims[3], num_classes) if num_classes > 0 else nn.Identity() # init weights trunc_normal_(self.pos_embed1, std=.02) trunc_normal_(self.pos_embed2, std=.02) trunc_normal_(self.pos_embed3, std=.02) trunc_normal_(self.pos_embed4, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights)
def __init__(self, config, dim, input_resolution, num_heads, shift_size=0): super().__init__() self.chunk_size_feed_forward = config.chunk_size_feed_forward self.shift_size = shift_size self.window_size = config.window_size self.input_resolution = input_resolution if min(self.input_resolution) <= self.window_size: # if window size is larger than input resolution, we don't partition windows self.shift_size = 0 self.window_size = min(self.input_resolution) self.layernorm_before = nn.LayerNorm(dim, eps=config.layer_norm_eps) self.attention = SwinAttention(config, dim, num_heads) self.drop_path = SwinDropPath(config.drop_path_rate) if config.drop_path_rate > 0.0 else nn.Identity() self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps) self.intermediate = SwinIntermediate(config, dim) self.output = SwinOutput(config, dim) if self.shift_size > 0: # calculate attention mask for SW-MSA height, width = self.input_resolution img_mask = torch.zeros((1, height, width, 1)) height_slices = ( slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None), ) width_slices = ( slice(0, -self.window_size), slice(-self.window_size, -self.shift_size), slice(-self.shift_size, None), ) count = 0 for height_slice in height_slices: for width_slice in width_slices: img_mask[:, height_slice, width_slice, :] = count count += 1 mask_windows = window_partition(img_mask, self.window_size) mask_windows = mask_windows.view(-1, self.window_size * self.window_size) attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) else: attn_mask = None self.attn_mask = attn_mask
import torch import torch.nn as nn from horch.models.modules import Conv2d, get_activation, get_norm_layer OPS = { 'none': lambda C, stride: Zero(stride), 'avg_pool_3x3': lambda C, stride: nn.AvgPool2d( 3, stride=stride, padding=1, count_include_pad=False), 'max_pool_3x3': lambda C, stride: nn.MaxPool2d(3, stride=stride, padding=1), 'skip_connect': lambda C, stride: nn.Identity() if stride == 1 else FactorizedReduce(C, C), 'sep_conv_3x3': lambda C, stride: SepConv(C, C, 3, stride, 1), 'sep_conv_5x5': lambda C, stride: SepConv(C, C, 5, stride, 2), 'sep_conv_7x7': lambda C, stride: SepConv(C, C, 7, stride, 3), 'nor_conv_1x1': lambda C, stride: ReLUConvBN(C, C, 1, stride), 'nor_conv_3x3': lambda C, stride: ReLUConvBN(C, C, 3, stride), 'dil_conv_3x3': lambda C, stride: DilConv(C, C, 3, stride, 2), 'dil_conv_5x5': lambda C, stride: DilConv(C, C, 5, stride, 4), 'conv_7x1_1x7': lambda C, stride: nn.Sequential(
def __init__(self, problem_type, num_net_outputs=None, quantile_levels=None, train_dataset=None, architecture_desc=None, device=None, **kwargs): if (architecture_desc is None) and (train_dataset is None): raise ValueError( "train_dataset cannot = None if architecture_desc=None") super().__init__() self.problem_type = problem_type if self.problem_type == QUANTILE: self.register_buffer( 'quantile_levels', torch.Tensor(quantile_levels).float().reshape(1, -1)) self.device = torch.device('cpu') if device is None else device if architecture_desc is None: params = self._set_params(**kwargs) # adpatively specify network architecture based on training dataset self.from_logits = False self.has_vector_features = train_dataset.has_vector_features() self.has_embed_features = train_dataset.num_embed_features() > 0 if self.has_embed_features: num_categs_per_feature = train_dataset.getNumCategoriesEmbeddings( ) embed_dims = get_embed_sizes(train_dataset, params, num_categs_per_feature) if self.has_vector_features: vector_dims = train_dataset.data_list[ train_dataset.vectordata_index].shape[-1] else: # ignore train_dataset, params, etc. Recreate architecture based on description: self.architecture_desc = architecture_desc self.has_vector_features = architecture_desc['has_vector_features'] self.has_embed_features = architecture_desc['has_embed_features'] self.from_logits = architecture_desc['from_logits'] params = architecture_desc['params'] if self.has_embed_features: num_categs_per_feature = architecture_desc[ 'num_categs_per_feature'] embed_dims = architecture_desc['embed_dims'] if self.has_vector_features: vector_dims = architecture_desc['vector_dims'] # init input size input_size = 0 # define embedding layer: if self.has_embed_features: self.embed_blocks = nn.ModuleList() for i in range(len(num_categs_per_feature)): self.embed_blocks.append( nn.Embedding(num_embeddings=num_categs_per_feature[i], embedding_dim=embed_dims[i])) input_size += embed_dims[i] # update input size if self.has_vector_features: input_size += vector_dims # activation act_fn = nn.Identity() if params['activation'] == 'elu': act_fn = nn.ELU() elif params['activation'] == 'relu': act_fn = nn.ReLU() elif params['activation'] == 'tanh': act_fn = nn.Tanh() layers = [] if params['use_batchnorm']: layers.append(nn.BatchNorm1d(input_size, track_running_stats=False)) layers.append(nn.Linear(input_size, params['hidden_size'])) layers.append(act_fn) for _ in range(params['num_layers'] - 1): if params['use_batchnorm']: layers.append( nn.BatchNorm1d(params['hidden_size'], track_running_stats=False)) layers.append(nn.Dropout(params['dropout_prob'])) layers.append( nn.Linear(params['hidden_size'], params['hidden_size'])) layers.append(act_fn) layers.append(nn.Linear(params['hidden_size'], num_net_outputs)) self.main_block = nn.Sequential(*layers) if self.problem_type in [REGRESSION, QUANTILE]: # set range for output y_range = params[ 'y_range'] # Used specifically for regression. = None for classification. self.y_constraint = None # determines if Y-predictions should be constrained if y_range is not None: if y_range[0] == -np.inf and y_range[1] == np.inf: self.y_constraint = None # do not worry about Y-range in this case elif y_range[0] >= 0 and y_range[1] == np.inf: self.y_constraint = 'nonnegative' elif y_range[0] == -np.inf and y_range[1] <= 0: self.y_constraint = 'nonpositive' else: self.y_constraint = 'bounded' self.y_lower = y_range[0] self.y_upper = y_range[1] self.y_span = self.y_upper - self.y_lower if self.problem_type == QUANTILE: self.alpha = params['alpha'] # for huber loss if self.problem_type == SOFTCLASS: self.log_softmax = torch.nn.LogSoftmax(dim=1) if self.problem_type in [BINARY, MULTICLASS, SOFTCLASS]: self.softmax = torch.nn.Softmax(dim=1) if architecture_desc is None: # Save Architecture description self.architecture_desc = { 'has_vector_features': self.has_vector_features, 'has_embed_features': self.has_embed_features, 'params': params, 'num_net_outputs': num_net_outputs, 'from_logits': self.from_logits } if self.has_embed_features: self.architecture_desc[ 'num_categs_per_feature'] = num_categs_per_feature self.architecture_desc['embed_dims'] = embed_dims if self.has_vector_features: self.architecture_desc['vector_dims'] = vector_dims
def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, global_pool=None, block_layers=LayerScale_Block, block_layers_token=LayerScale_Block_CA, Patch_layer=PatchEmbed, act_layer=nn.GELU, Attention_block=Attention_talking_head, Mlp_block=Mlp, init_scale=1e-4, Attention_block_token_only=Class_Attention, Mlp_block_token_only=Mlp, depth_token_only=2, mlp_ratio_clstk=4.0): super().__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim self.patch_embed = Patch_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [drop_path_rate for i in range(depth)] self.blocks = nn.ModuleList([ block_layers(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, act_layer=act_layer, Attention_block=Attention_block, Mlp_block=Mlp_block, init_values=init_scale) for i in range(depth) ]) self.blocks_token_only = nn.ModuleList([ block_layers_token(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio_clstk, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=0.0, attn_drop=0.0, drop_path=0.0, norm_layer=norm_layer, act_layer=act_layer, Attention_block=Attention_block_token_only, Mlp_block=Mlp_block_token_only, init_values=init_scale) for i in range(depth_token_only) ]) self.norm = norm_layer(embed_dim) self.feature_info = [ dict(num_chs=embed_dim, reduction=0, module='head') ] self.head = nn.Linear( embed_dim, num_classes) if num_classes > 0 else nn.Identity() trunc_normal_(self.pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights)
class VGGConv(nn.Module): """VGG-like neural network for image classification. Args: encoder: Image encoder module, usually used for the extraction of embeddings from input signals. pool: Pooling layer, used to reduce embeddings from the encoder. head: Classification head, usually consists of Fully Connected layers. """ def __init__( self, encoder: nn.Module, pool: nn.Module, head: nn.Module, ) -> None: super().__init__() self.encoder = encoder self.pool = pool self.head = head def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward call. Args: x: Batch of images. Returns: Batch of logits. """ x = self.pool(self.encoder(x)) x = x.view(x.shape[0], -1) x = self.head(x) return x @classmethod def get_from_params( cls, encoder_params: Optional[dict] = None, pooling_params: Optional[dict] = None, head_params: Optional[dict] = None, ) -> "VGGConv": """Create model based on it config. Args: encoder_params: Params of encoder module. pooling_params: Params of the pooling layer. head_params: 'Head' module params. Returns: Model. """ encoder: nn.Module = nn.Identity() if (encoder_params_ := copy.deepcopy(encoder_params)) is not None: encoder_fn = MODULE.get(encoder_params_.pop("module")) encoder = encoder_fn(**encoder_params_) pool: nn.Module = nn.Identity() if (pooling_params_ := copy.deepcopy(pooling_params)) is not None: pool_fn = MODULE.get(pooling_params_.pop("module")) pool = pool_fn(**pooling_params_)
def activation_func(activation): return nn.ModuleDict( [['relu', nn.ReLU(inplace=True)], ['leaky_relu', nn.LeakyReLU(negative_slope=0.01, inplace=True)], ['selu', nn.SELU(inplace=True)], ['none', nn.Identity()]])[activation]
def __init__( self, out_features: int, model_config: ModelConfig, met_config: MetricLearningConfig, pooling_config: PoolingConfig, train_df: pd.DataFrame = pd.DataFrame(), ): super(ShopeeImgNet4, self).__init__() self.model_config = model_config self.pooling_config = pooling_config self.met_config = met_config channel_size = model_config.channel_size if "efficientnet-" in model_config.model_arch: self.backbone = (EfficientNet.from_pretrained( model_config.model_arch) if model_config.pretrained else EfficientNet.from_name(model_config.model_arch)) else: self.backbone = timm.create_model( model_config.model_arch, pretrained=model_config.pretrained) if ("resnext" in model_config.model_arch or "resnet" in model_config.model_arch or "xception" in model_config.model_arch or "resnest" in model_config.model_arch): final_in_features = self.backbone.fc.in_features self.backbone.fc = nn.Identity() elif "efficientnet-" in model_config.model_arch: final_in_features = self.backbone._fc.in_features self.backbone._dropout = nn.Identity() self.backbone._fc = nn.Identity() self.backbone._swish = nn.Identity() elif "vit" in model_config.model_arch: final_in_features = self.backbone.head.in_features self.backbone.head = nn.Identity() elif "nfnet" in model_config.model_arch: final_in_features = self.backbone.head.fc.in_features self.backbone.head.global_pool = nn.Identity() self.backbone.head.fc = nn.Identity() else: final_in_features = self.backbone.classifier.in_features self.backbone.classifier = nn.Identity() if ("efficientnet-" not in model_config.model_arch and "nfnet" not in model_config.model_arch): self.backbone.global_pool = nn.Identity() if pooling_config.name.lower() == "gem": self.pooling = GeM(**pooling_config.params) else: self.pooling = PoolingFactory.get_pooling(pooling_config) self.dropout = nn.Dropout(p=model_config.dropout) self.bn1 = nn.BatchNorm1d(final_in_features) self.fc = nn.Linear(final_in_features, channel_size) self.bn2 = nn.BatchNorm1d(channel_size) if met_config.name == "ArcAdaptiveMarginProduct": self.margin = MetricLearningFactory.get_metric_learning_product( met_config, in_features=channel_size, out_features=out_features, train_df=train_df, ) else: self.margin = MetricLearningFactory.get_metric_learning_product( met_config, in_features=channel_size, out_features=out_features, ) self._init_params()
def __init__(self, path=None, features=256, non_negative=True, yolo_cfg='', augment=False, image_size=None, device='cpu'): """Init. Args: path (str, optional): Path to saved model. Defaults to None. features (int, optional): Number of features. Defaults to 256. backbone (str, optional): Backbone network for encoder. Defaults to resnet50 """ print("Loading weights: ", path) super(MidasYoloNet, self).__init__() use_pretrained = False if path is None else True self.pretrained, self.scratch = blocks._make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) # Midas Decoder part self.scratch.refinenet4 = layers.FeatureFusionBlock(features) self.scratch.refinenet3 = layers.FeatureFusionBlock(features) self.scratch.refinenet2 = layers.FeatureFusionBlock(features) self.scratch.refinenet1 = layers.FeatureFusionBlock(features) self.scratch.output_conv = nn.Sequential( nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), layers.Interpolate(scale_factor=2, mode="bilinear"), nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), nn.ReLU(True), nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), nn.ReLU(True) if non_negative else nn.Identity(), ) self.yolo_head = nn.Sequential( nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False), # 208 x 208 nn.BatchNorm2d(32, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True), nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False), # 104 x 104 nn.BatchNorm2d(64, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True), nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False), # 52 x 52 nn.BatchNorm2d(128, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True), nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1, bias=False), # 26 x 26 nn.BatchNorm2d(256, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True), nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1, bias=False), # 13 x 13 nn.BatchNorm2d(512, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True), nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False), # 13 x 13 nn.BatchNorm2d(1024, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True), nn.Conv2d(1024, 2048, kernel_size=3, stride=1, padding=1, bias=False), # 13 x 13 nn.BatchNorm2d(2048, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True) ) # Concat midas output with yolo head output self.yolo_connect = nn.Sequential( nn.Conv2d(2048+256, 2048, kernel_size=3, stride=1, padding=1, bias=False), # 13 x 13 nn.BatchNorm2d(2048, momentum=0.03, eps=0.0001), nn.LeakyReLU(negative_slope=0.1, inplace=True) ) self.yolo_decoder = yolo3_net.Darknet(cfg=yolo_cfg, img_size=image_size) # self.yolo_decoder = yolo3_net.YoloDecoder(img_size=image_size, features=features) self.yolo_layers = self.yolo_decoder.yolo_layers # self.module_list = self.yolo_decoder.module_list # ToDo Smita: fix this, its a hack # Add planercnn model. Can directly add, as its backbone is also Resnet101, same as midas planercnn_config = Config(None) self.planercnn_decoder = planercnn_net.MaskRCNN(planercnn_config, device)
def __init__(self, in_chs, out_chs=None, stride=1, dilation=1, first_dilation=None, alpha=1.0, beta=1.0, bottle_ratio=0.25, group_size=None, ch_div=1, reg=True, extra_conv=False, skipinit=False, attn_layer=None, attn_gain=2.0, act_layer=None, conv_layer=None, drop_path_rate=0.): super().__init__() first_dilation = first_dilation or dilation out_chs = out_chs or in_chs # RegNet variants scale bottleneck from in_chs, otherwise scale from out_chs like ResNet mid_chs = make_divisible( in_chs * bottle_ratio if reg else out_chs * bottle_ratio, ch_div) groups = 1 if not group_size else mid_chs // group_size if group_size and group_size % ch_div == 0: mid_chs = group_size * groups # correct mid_chs if group_size divisible by ch_div, otherwise error self.alpha = alpha self.beta = beta self.attn_gain = attn_gain if in_chs != out_chs or stride != 1 or dilation != first_dilation: self.downsample = DownsampleAvg(in_chs, out_chs, stride=stride, dilation=dilation, first_dilation=first_dilation, conv_layer=conv_layer) else: self.downsample = None self.act1 = act_layer() self.conv1 = conv_layer(in_chs, mid_chs, 1) self.act2 = act_layer(inplace=True) self.conv2 = conv_layer(mid_chs, mid_chs, 3, stride=stride, dilation=first_dilation, groups=groups) if extra_conv: self.act2b = act_layer(inplace=True) self.conv2b = conv_layer(mid_chs, mid_chs, 3, stride=1, dilation=dilation, groups=groups) else: self.act2b = None self.conv2b = None if reg and attn_layer is not None: self.attn = attn_layer( mid_chs) # RegNet blocks apply attn btw conv2 & 3 else: self.attn = None self.act3 = act_layer() self.conv3 = conv_layer(mid_chs, out_chs, 1) if not reg and attn_layer is not None: self.attn_last = attn_layer( out_chs) # ResNet blocks apply attn after conv3 else: self.attn_last = None self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0 else nn.Identity() self.skipinit_gain = nn.Parameter( torch.tensor(0.)) if skipinit else None
def __init__(self, in_channels, out_channels): super().__init__() should_skip = in_channels == out_channels self.convolution = nn.Conv2d(in_channels, out_channels, kernel_size=1) if not should_skip else nn.Identity() self.fusion = GLPNSelectiveFeatureFusion(out_channels) self.upsample = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
def __init__(self, cfg: NfCfg, num_classes=1000, in_chans=3, global_pool='avg', output_stride=32, drop_rate=0., drop_path_rate=0.): super().__init__() self.num_classes = num_classes self.drop_rate = drop_rate assert cfg.act_layer in _nonlin_gamma, f"Please add non-linearity constants for activation ({cfg.act_layer})." conv_layer = ScaledStdConv2dSame if cfg.same_padding else ScaledStdConv2d if cfg.gamma_in_act: act_layer = act_with_gamma(cfg.act_layer, gamma=_nonlin_gamma[cfg.act_layer]) conv_layer = partial(conv_layer, eps=1e-4) # DM weights better with higher eps else: act_layer = get_act_layer(cfg.act_layer) conv_layer = partial(conv_layer, gamma=_nonlin_gamma[cfg.act_layer]) attn_layer = partial(get_attn(cfg.attn_layer), ** cfg.attn_kwargs) if cfg.attn_layer else None stem_chs = make_divisible( (cfg.stem_chs or cfg.channels[0]) * cfg.width_factor, cfg.ch_div) self.stem, stem_stride, stem_feat = create_stem(in_chans, stem_chs, cfg.stem_type, conv_layer=conv_layer, act_layer=act_layer) self.feature_info = [stem_feat] if stem_stride == 4 else [] drop_path_rates = [ x.tolist() for x in torch.linspace( 0, drop_path_rate, sum(cfg.depths)).split(cfg.depths) ] prev_chs = stem_chs net_stride = stem_stride dilation = 1 expected_var = 1.0 stages = [] for stage_idx, stage_depth in enumerate(cfg.depths): stride = 1 if stage_idx == 0 and stem_stride > 2 else 2 if stride == 2: self.feature_info += [ dict(num_chs=prev_chs, reduction=net_stride, module=f'stages.{stage_idx}.0.act1') ] if net_stride >= output_stride and stride > 1: dilation *= stride stride = 1 net_stride *= stride first_dilation = 1 if dilation in (1, 2) else 2 blocks = [] for block_idx in range(cfg.depths[stage_idx]): first_block = block_idx == 0 and stage_idx == 0 out_chs = make_divisible( cfg.channels[stage_idx] * cfg.width_factor, cfg.ch_div) blocks += [ NormFreeBlock( in_chs=prev_chs, out_chs=out_chs, alpha=cfg.alpha, beta=1. / expected_var**0.5, stride=stride if block_idx == 0 else 1, dilation=dilation, first_dilation=first_dilation, group_size=cfg.group_size, bottle_ratio=1. if cfg.reg and first_block else cfg.bottle_ratio, ch_div=cfg.ch_div, reg=cfg.reg, extra_conv=cfg.extra_conv, skipinit=cfg.skipinit, attn_layer=attn_layer, attn_gain=cfg.attn_gain, act_layer=act_layer, conv_layer=conv_layer, drop_path_rate=drop_path_rates[stage_idx][block_idx], ) ] if block_idx == 0: expected_var = 1. # expected var is reset after first block of each stage expected_var += cfg.alpha**2 # Even if reset occurs, increment expected variance first_dilation = dilation prev_chs = out_chs stages += [nn.Sequential(*blocks)] self.stages = nn.Sequential(*stages) if cfg.num_features: # The paper NFRegNet models have an EfficientNet-like final head convolution. self.num_features = make_divisible( cfg.width_factor * cfg.num_features, cfg.ch_div) self.final_conv = conv_layer(prev_chs, self.num_features, 1) else: self.num_features = prev_chs self.final_conv = nn.Identity() self.final_act = act_layer(inplace=cfg.num_features > 0) self.feature_info += [ dict(num_chs=self.num_features, reduction=net_stride, module='final_act') ] self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=self.drop_rate) for n, m in self.named_modules(): if 'fc' in n and isinstance(m, nn.Linear): if cfg.zero_init_fc: nn.init.zeros_(m.weight) else: nn.init.normal_(m.weight, 0., .01) if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='linear') if m.bias is not None: nn.init.zeros_(m.bias)
def __init__( self, dimensions: int, in_channels: int, out_channels: int, strides: Union[Sequence[int], int] = 1, kernel_size: Union[Sequence[int], int] = 3, subunits: int = 2, act: Optional[Union[Tuple, str]] = Act.PRELU, norm: Union[Tuple, str] = Norm.INSTANCE, dropout: Optional[Union[Tuple, str, float]] = None, dropout_dim: int = 1, dilation: Union[Sequence[int], int] = 1, bias: bool = True, last_conv_only: bool = False, padding: Optional[Union[Sequence[int], int]] = None, ) -> None: super().__init__() self.dimensions = dimensions self.in_channels = in_channels self.out_channels = out_channels self.conv = nn.Sequential() self.residual = nn.Identity() if not padding: padding = same_padding(kernel_size, dilation) schannels = in_channels sstrides = strides subunits = max(1, subunits) for su in range(subunits): conv_only = last_conv_only and su == (subunits - 1) unit = Convolution( dimensions, schannels, out_channels, strides=sstrides, kernel_size=kernel_size, act=act, norm=norm, dropout=dropout, dropout_dim=dropout_dim, dilation=dilation, bias=bias, conv_only=conv_only, padding=padding, ) self.conv.add_module(f"unit{su:d}", unit) # after first loop set channels and strides to what they should be for subsequent units schannels = out_channels sstrides = 1 # apply convolution to input to change number of output channels and size to match that coming from self.conv if np.prod(strides) != 1 or in_channels != out_channels: rkernel_size = kernel_size rpadding = padding if np.prod(strides) == 1: # if only adapting number of channels a 1x1 kernel is used with no padding rkernel_size = 1 rpadding = 0 conv_type = Conv[Conv.CONV, dimensions] self.residual = conv_type(in_channels, out_channels, rkernel_size, strides, rpadding, bias=bias)
def __init__(self, in_channels, out_channels, kernel_size=3, upsampling=1): conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2) upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity() super().__init__(conv2d, upsampling)
def __init__(self, args, img_size=32, patch_size=None, in_chans=3, num_classes=1, embed_dim=None, depth=7, num_heads=4, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., hybrid_backbone=None, norm_layer=nn.LayerNorm): super().__init__() self.num_classes = num_classes self.num_features = embed_dim = self.embed_dim = args.df_dim # num_features for consistency with other models depth = args.d_depth self.args = args patch_size = args.patch_size self.patch_embed = nn.Conv2d(3, embed_dim, kernel_size=patch_size, stride=patch_size, padding=0) num_patches = (args.img_size // patch_size)**2 self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameter( torch.zeros(1, num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) ] # stochastic depth decay rule self.blocks = nn.ModuleList([ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) for i in range(depth) ]) self.norm = norm_layer(embed_dim) # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here #self.repr = nn.Linear(embed_dim, representation_size) #self.repr_act = nn.Tanh() # Classifier head self.head = nn.Linear( embed_dim, num_classes) if num_classes > 0 else nn.Identity() trunc_normal_(self.pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights)
def main(args: argparse.Namespace): logger = CompleteLogger(args.log, args.phase) print(args) if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') cudnn.benchmark = True # Data loading code train_transform = utils.get_train_transform(args.train_resizing, not args.no_hflip, args.color_jitter) val_transform = utils.get_val_transform(args.val_resizing) print("train_transform: ", train_transform) print("val_transform: ", val_transform) train_dataset, val_dataset, num_classes = utils.get_dataset( args.data, args.root, train_transform, val_transform, args.sample_rate, args.num_samples_per_classes) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, drop_last=False) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) print("training dataset size: {} test dataset size: {}".format( len(train_dataset), len(val_dataset))) # create model print("=> using pre-trained model '{}'".format(args.arch)) backbone = utils.get_model(args.arch, args.pretrained) pool_layer = nn.Identity() if args.no_pool else None classifier = Classifier(backbone, num_classes, head_source=backbone.copy_head(), pool_layer=pool_layer, finetune=args.finetune).to(device) kd = KnowledgeDistillationLoss(args.T) source_classifier = nn.Sequential(classifier.backbone, classifier.pool_layer, classifier.head_source) pretrain_labels = collect_pretrain_labels(train_loader, source_classifier, device) train_dataset = CombineDataset( [train_dataset, TensorDataset(pretrain_labels)]) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True) train_iter = ForeverDataIterator(train_loader) # define optimizer and lr scheduler optimizer = SGD(classifier.get_parameters(args.lr), momentum=args.momentum, weight_decay=args.wd, nesterov=True) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, args.lr_decay_epochs, gamma=args.lr_gamma) # resume from the best checkpoint if args.phase == 'test': checkpoint = torch.load(logger.get_checkpoint_path('best'), map_location='cpu') classifier.load_state_dict(checkpoint) acc1 = utils.validate(val_loader, classifier, args, device) print(acc1) return # start training best_acc1 = 0.0 for epoch in range(args.epochs): # train for one epoch train(train_iter, classifier, kd, optimizer, epoch, args) lr_scheduler.step() # evaluate on validation set acc1 = utils.validate(val_loader, classifier, args, device) # remember best acc@1 and save checkpoint torch.save(classifier.state_dict(), logger.get_checkpoint_path('latest')) if acc1 > best_acc1: shutil.copy(logger.get_checkpoint_path('latest'), logger.get_checkpoint_path('best')) best_acc1 = max(acc1, best_acc1) print("best_acc1 = {:3.1f}".format(best_acc1)) logger.close()
dataiter = iter(train_loader) (images1, images2), labels = dataiter.next() imshow(torchvision.utils.make_grid(images1)) imshow(torchvision.utils.make_grid(images2)) print(' '.join('%5s' % train_loader.dataset.classes[labels[j]] for j in range(len(labels)))) import torch.nn as nn from torchvision import models resnet = eval(f'models.{backbone_name}()') # resnet = eval(f"{backbone_name}()") resnet.output_dim = resnet.fc.in_features resnet.fc = nn.Identity() # if backbone_name == 'resnet18': # resnet = models.resnet18(pretrained=False) # elif backbone_name == 'resnet50': # resnet = models.resnet50(pretrained=False) # else: # raise NotImplementedError("Backbone is not implemented!") import copy import math from torch.nn import functional class MLP(nn.Module): def __init__(self, input_dim):
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.deit = DeiTModel(config, add_pooling_layer=False) # Classifier heads self.cls_classifier = ( nn.Linear(config.hidden_size, config.num_labels) if config.num_labels > 0 else nn.Identity() ) self.distillation_classifier = ( nn.Linear(config.hidden_size, config.num_labels) if config.num_labels > 0 else nn.Identity() ) self.init_weights()
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups super(Conv, self).__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = nn.Hardswish() if act else nn.Identity()
def __init__(self, dim, depth, heads = 8, dim_head = 64, mlp_mult = 4, local_patch_size = 7, global_k = 7, dropout = 0., has_local = True): super().__init__() self.layers = nn.ModuleList([]) for _ in range(depth): self.layers.append(nn.ModuleList([ Residual(PreNorm(dim, LocalAttention(dim, heads = heads, dim_head = dim_head, dropout = dropout, patch_size = local_patch_size))) if has_local else nn.Identity(), Residual(PreNorm(dim, FeedForward(dim, mlp_mult, dropout = dropout))) if has_local else nn.Identity(), Residual(PreNorm(dim, GlobalAttention(dim, heads = heads, dim_head = dim_head, dropout = dropout, k = global_k))), Residual(PreNorm(dim, FeedForward(dim, mlp_mult, dropout = dropout))) ]))