def _make_dense_block( self, num_layers, in_planes, block_idx, growth_rate=32, expansion=4, use_se=False, se_reduction_ratio=16, ): assert is_pos_int(in_planes) assert is_pos_int(growth_rate) assert is_pos_int(expansion) # create a block of dense layers at same resolution: layers = OrderedDict() for idx in range(num_layers): layers[f"block{block_idx}-{idx}"] = _DenseLayer( in_planes + idx * growth_rate, growth_rate=growth_rate, expansion=expansion, use_se=use_se, se_reduction_ratio=se_reduction_ratio, ) return nn.Sequential(layers)
def __init__( self, convolutional_block, in_planes, out_planes, stride=1, mid_planes_and_cardinality=None, reduction=4, final_bn_relu=True, ): # assertions on inputs: assert is_pos_int(in_planes) and is_pos_int(out_planes) assert is_pos_int(stride) and is_pos_int(reduction) # set object fields: super(GenericLayer, self).__init__() self.convolutional_block = convolutional_block self.final_bn_relu = final_bn_relu # final batchnorm and relu layer: if final_bn_relu: self.bn = nn.BatchNorm2d(out_planes) self.relu = nn.ReLU(inplace=INPLACE) # define down-sampling layer (if direct residual impossible): self.downsample = None if stride != 1 or in_planes != out_planes: self.downsample = nn.Sequential( conv1x1(in_planes, out_planes, stride=stride), nn.BatchNorm2d(out_planes), )
def __init__( self, in_planes, out_planes, stride=1, mid_planes_and_cardinality=None, reduction=4, final_bn_relu=True, ): # assertions on inputs: assert is_pos_int(in_planes) and is_pos_int(out_planes) assert is_pos_int(stride) and is_pos_int(reduction) # define convolutional block: convolutional_block = nn.Sequential( conv3x3(in_planes, out_planes, stride=stride), nn.BatchNorm2d(out_planes), nn.ReLU(inplace=INPLACE), conv3x3(out_planes, out_planes), ) # call constructor of generic layer: super(BasicLayer, self).__init__( convolutional_block, in_planes, out_planes, stride=stride, reduction=reduction, final_bn_relu=final_bn_relu, )
def _make_dense_block( self, num_layers, in_planes, block_idx, growth_rate=32, expansion=4, use_se=False, se_reduction_ratio=16, ): assert is_pos_int(in_planes) assert is_pos_int(growth_rate) assert is_pos_int(expansion) # create a block of dense layers at same resolution: layers = [] for idx in range(num_layers): layers.append( self.build_attachable_block( f"block{block_idx}-{idx}", _DenseLayer( in_planes + idx * growth_rate, growth_rate=growth_rate, expansion=expansion, use_se=use_se, se_reduction_ratio=se_reduction_ratio, ), )) return nn.Sequential(*layers)
def __init__( self, dataset: Sequence, split: Optional[str], batchsize_per_replica: int, shuffle: bool, transform: Optional[Union[ClassyTransform, Callable]], num_samples: Optional[int], ) -> None: """ Constructor for a ClassyDataset. Args: split: When set, split of dataset to use ("train", "test") batchsize_per_replica: Positive integer indicating batch size for each replica shuffle: Whether to shuffle between epochs transform: When set, transform to be applied to each sample num_samples: When set, this restricts the number of samples provided by the dataset """ # Asserts: assert is_pos_int(batchsize_per_replica ), "batchsize_per_replica must be a positive int" assert isinstance(shuffle, bool), "shuffle must be a boolean" assert num_samples is None or is_pos_int( num_samples), "num_samples must be a positive int or None" # Assignments: self.split = split self.batchsize_per_replica = batchsize_per_replica self.shuffle = shuffle self.transform = transform self.num_samples = num_samples self.dataset = dataset
def __init__(self, in_planes, growth_rate=32, expansion=4): # assertions: assert is_pos_int(in_planes) assert is_pos_int(growth_rate) assert is_pos_int(expansion) # add all layers to layer super(_DenseLayer, self).__init__() intermediate = expansion * growth_rate self.add_module("norm-1", nn.BatchNorm2d(in_planes)) self.add_module("relu-1", nn.ReLU(inplace=INPLACE)) self.add_module( "conv-1", nn.Conv2d(in_planes, intermediate, kernel_size=1, stride=1, bias=False), ) self.add_module("norm-2", nn.BatchNorm2d(intermediate)) self.add_module("relu-2", nn.ReLU(inplace=INPLACE)) self.add_module( "conv-2", nn.Conv2d( intermediate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False, ), )
def __init__( self, unique_id: str, num_classes: int, in_plane: int, zero_init_bias: bool = False, ): """Constructor for FullyConnectedHead Args: unique_id: A unique identifier for the head. Multiple instances of the same head might be attached to a model, and unique_id is used to refer to them. num_classes: Number of classes for the head. If None, then the fully connected layer is not applied. in_plane: Input size for the fully connected layer. """ super().__init__(unique_id, num_classes) assert num_classes is None or is_pos_int(num_classes) assert is_pos_int(in_plane) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = None if num_classes is None else nn.Linear(in_plane, num_classes) if zero_init_bias: self.fc.bias.data.zero_()
def check_generic_args(args): """ Perform assertions on generic command-line arguments. """ # check types and values: assert is_pos_int(args.num_workers), "incorrect number of workers" assert is_pos_int(args.visdom_port), "incorrect visdom port" # create checkpoint folder if it does not exist: if args.checkpoint_folder != "" and not os.path.exists(args.checkpoint_folder): os.makedirs(args.checkpoint_folder, exist_ok=True) assert os.path.exists(args.checkpoint_folder), ( "could not create folder %s" % args.checkpoint_folder ) # when in debugging mode, enter debugger upon error: if args.debug: import sys from classy_vision.generic.debug import debug_info sys.excepthook = debug_info # check visdom server name: if args.visdom_server != "": if args.visdom_server.startswith("https://"): print("WARNING: Visdom does not work over HTTPS.") args.visdom_server = args.visdom_server[8:] if not args.visdom_server.startswith("http://"): args.visdom_server = "http://%s" % args.visdom_server # return input arguments: return args
def __init__( self, unique_id: str, num_classes: Optional[int], in_plane: int, conv_planes: Optional[int] = None, activation: Optional[nn.Module] = None, zero_init_bias: bool = False, normalize_inputs: Optional[str] = None, ): """Constructor for FullyConnectedHead Args: unique_id: A unique identifier for the head. Multiple instances of the same head might be attached to a model, and unique_id is used to refer to them. num_classes: Number of classes for the head. If None, then the fully connected layer is not applied. in_plane: Input size for the fully connected layer. conv_planes: If specified, applies a 1x1 convolutional layer to the input before passing it to the average pooling layer. The convolution is also followed by a BatchNorm and an activation. activation: The activation to be applied after the convolutional layer. Unused if `conv_planes` is not specified. zero_init_bias: Zero initialize the bias normalize_inputs: If specified, normalize the inputs after performing average pooling using the specified method. Supports "l2" normalization. """ super().__init__(unique_id, num_classes) assert num_classes is None or is_pos_int(num_classes) assert is_pos_int(in_plane) if conv_planes is not None and activation is None: raise TypeError("activation cannot be None if conv_planes is specified") if normalize_inputs is not None and normalize_inputs != NORMALIZE_L2: raise ValueError( f"Unsupported value for normalize_inputs: {normalize_inputs}" ) self.conv = ( nn.Conv2d(in_plane, conv_planes, kernel_size=1, bias=False) if conv_planes else None ) self.bn = nn.BatchNorm2d(conv_planes) if conv_planes else None self.activation = activation self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = ( None if num_classes is None else nn.Linear( in_plane if conv_planes is None else conv_planes, num_classes ) ) self.normalize_inputs = normalize_inputs if zero_init_bias: self.fc.bias.data.zero_()
def __init__( self, in_planes, out_planes, stride=1, mid_planes_and_cardinality=None, reduction=4, final_bn_relu=True, use_se=False, se_reduction_ratio=16, ): # assertions on inputs: assert is_pos_int(in_planes) and is_pos_int(out_planes) assert (is_pos_int(stride) or is_pos_int_tuple(stride)) and is_pos_int( reduction ) # define convolutional layers: bottleneck_planes = int(math.ceil(out_planes / reduction)) cardinality = 1 if mid_planes_and_cardinality is not None: mid_planes, cardinality = mid_planes_and_cardinality bottleneck_planes = mid_planes * cardinality convolutional_block = nn.Sequential( conv1x1(in_planes, bottleneck_planes), nn.BatchNorm2d(bottleneck_planes), nn.ReLU(inplace=INPLACE), conv3x3( bottleneck_planes, bottleneck_planes, stride=stride, groups=cardinality ), nn.BatchNorm2d(bottleneck_planes), nn.ReLU(inplace=INPLACE), conv1x1(bottleneck_planes, out_planes), ) # call constructor of generic layer: super(BottleneckLayer, self).__init__( convolutional_block, in_planes, out_planes, stride=stride, reduction=reduction, final_bn_relu=final_bn_relu, use_se=use_se, se_reduction_ratio=se_reduction_ratio, )
def __init__(self, topk, target_is_one_hot=True, num_classes=None): """ args: topk: list of int `k` values. target_is_one_hot: boolean, if class labels are one-hot encoded. num_classes: int, number of classes. """ assert isinstance(topk, list), "topk must be a list" assert len(topk) > 0, "topk list should have at least one element" assert [is_pos_int(x) for x in topk], "each value in topk must be >= 1" if not target_is_one_hot: assert ( type(num_classes) == int and num_classes > 0 ), "num_classes must be positive integer" self._topk = topk self._target_is_one_hot = target_is_one_hot self._num_classes = num_classes # _total_* variables store running, in-sync totals for the # metrics. These should not be communicated / summed. self._total_correct_predictions_k = None self._total_correct_targets = None # _curr_* variables store counts since the last sync. Only # these should be summed across workers and they are reset # after each communication self._curr_correct_predictions_k = None self._curr_correct_targets = None # Initialize all values properly self.reset()
def from_config(cls, config: Dict[str, Any]) -> "MultiStepParamScheduler": """Instantiates a MultiStepParamScheduler from a configuration. Args: config: A configuration for a MultiStepParamScheduler. See :func:`__init__` for parameters expected in the config. Returns: A MultiStepParamScheduler instance. """ assert ( "values" in config and isinstance(config["values"], list) and len(config["values"]) > 0 ), "Non-Equi Step scheduler requires a list of at least one param value" assert is_pos_int(config["num_epochs"]), "Num epochs must be a positive integer" assert config["num_epochs"] >= len( config["values"] ), "Num epochs must be greater than param schedule" milestones = config.get("milestones", None) if "milestones" in config: assert ( isinstance(config["milestones"], list) and len(config["milestones"]) == len(config["values"]) - 1 ), "Non-Equi Step scheduler requires a list of %d epochs" % ( len(config["values"]) - 1 ) return cls( values=config["values"], num_epochs=config["num_epochs"], milestones=milestones, update_interval=UpdateInterval.from_config(config, UpdateInterval.EPOCH), )
def __init__(self, topk): """ args: topk: list of int `k` values. """ super().__init__() assert isinstance(topk, list), "topk must be a list" assert len(topk) > 0, "topk list should have at least one element" assert [is_pos_int(x) for x in topk], "each value in topk must be >= 1" self._topk = topk # _total_* variables store running, in-sync totals for the # metrics. These should not be communicated / summed. self._total_correct_predictions_k = None self._total_sample_count = None # _curr_* variables store counts since the last sync. Only # these should be summed across workers and they are reset # after each communication self._curr_correct_predictions_k = None self._curr_sample_count = None # Initialize all values properly self.reset()
def __init__(self, num_layers, in_planes, growth_rate=32, expansion=4): # assertions: assert is_pos_int(in_planes) assert is_pos_int(growth_rate) assert is_pos_int(expansion) # create block of dense layers at same resolution: super(_DenseBlock, self).__init__() for idx in range(num_layers): layer = _DenseLayer( in_planes + idx * growth_rate, growth_rate=growth_rate, expansion=expansion, ) self.add_module("denselayer-%d" % (idx + 1), layer)
def __init__(self, num_meters: int, topk_values: List[int], meter_names: List[str]): super().__init__() assert is_pos_int(num_meters), "num_meters must be positive" assert isinstance(topk_values, list), "topk_values must be a list" assert len(topk_values) > 0, "topk_values list should have at least one element" assert [ is_pos_int(x) for x in topk_values ], "each value in topk_values must be >= 1" self._num_meters = num_meters self._topk_values = topk_values self._meters = [ AccuracyMeter(self._topk_values) for _ in range(self._num_meters) ] self._meter_names = meter_names self.reset()
def __init__(self, in_planes, out_planes, reduction=2): # assertions: assert is_pos_int(in_planes) assert is_pos_int(out_planes) assert is_pos_int(reduction) # create layers for pooling: super(_Transition, self).__init__() self.add_module("pool-norm", nn.BatchNorm2d(in_planes)) self.add_module("pool-relu", nn.ReLU(inplace=INPLACE)) self.add_module( "pool-conv", nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, bias=False), ) self.add_module( "pool-pool", nn.AvgPool2d(kernel_size=reduction, stride=reduction) )
def __init__( self, convolutional_block, in_planes, out_planes, stride=1, mid_planes_and_cardinality=None, reduction=4, final_bn_relu=True, use_se=False, se_reduction_ratio=16, ): # assertions on inputs: assert is_pos_int(in_planes) and is_pos_int(out_planes) assert (is_pos_int(stride) or is_pos_int_tuple(stride)) and is_pos_int( reduction ) # set object fields: super(GenericLayer, self).__init__() self.convolutional_block = convolutional_block self.final_bn_relu = final_bn_relu # final batchnorm and relu layer: if final_bn_relu: self.bn = nn.BatchNorm2d(out_planes) self.relu = nn.ReLU(inplace=INPLACE) # define down-sampling layer (if direct residual impossible): self.downsample = None if (stride != 1 and stride != (1, 1)) or in_planes != out_planes: self.downsample = nn.Sequential( conv1x1(in_planes, out_planes, stride=stride), nn.BatchNorm2d(out_planes), ) self.se = ( SqueezeAndExcitationLayer(out_planes, reduction_ratio=se_reduction_ratio) if use_se else None )
def __init__(self, meters_config: AttrDict): self.meters_config = meters_config num_meters = self.meters_config["num_meters"] meter_names = self.meters_config["meter_names"] assert is_pos_int(num_meters), "num_meters must be positive" self._num_meters = num_meters self._meters = [ MeanAPMeter.from_config(meters_config) for _ in range(self._num_meters) ] self._meter_names = meter_names self.reset()
def __init__(self, topk, clips_per_video_train, clips_per_video_test): """ Args: topk: list of int `k` values. clips_per_video_train: No. of clips sampled per video at train time clips_per_video_test: No. of clips sampled per video at test time """ super().__init__(clips_per_video_train, clips_per_video_test) assert isinstance(topk, Sequence), "topk must be a sequence" assert len(topk) > 0, "topk list should have at least one element" assert [is_pos_int(x) for x in topk], "each value in topk must be >= 1" self._accuracy_meter = AccuracyMeter(topk)
def from_config(cls, config: Dict[str, Any]) -> "FullyConvolutionalLinearHead": """Instantiates a FullyConvolutionalLinearHead from a configuration. Args: config: A configuration for a FullyConvolutionalLinearHead. See :func:`__init__` for parameters expected in the config. Returns: A FullyConvolutionalLinearHead instance. """ required_args = ["in_plane", "num_classes"] for arg in required_args: assert arg in config, "argument %s is required" % arg config.update( {"activation_func": config.get("activation_func", "softmax")}) config.update({"use_dropout": config.get("use_dropout", False)}) pool_size = config.get("pool_size", None) if pool_size is not None: assert isinstance(pool_size, Sequence) and len(pool_size) == 3 for pool_size_dim in pool_size: assert is_pos_int(pool_size_dim) assert is_pos_int(config["in_plane"]) assert is_pos_int(config["num_classes"]) num_classes = config.get("num_classes", None) in_plane = config["in_plane"] return cls( config["unique_id"], num_classes, in_plane, pool_size, config["activation_func"], config["use_dropout"], config.get("dropout_ratio", 0.5), )
def __init__( self, in_planes, out_planes, stride=1, mid_planes_and_cardinality=None, reduction=1, final_bn_relu=True, use_se=False, se_reduction_ratio=16, ): # assertions on inputs: assert is_pos_int(in_planes) and is_pos_int(out_planes) assert (is_pos_int(stride) or is_pos_int_tuple(stride)) and is_pos_int( reduction ) # define convolutional block: convolutional_block = nn.Sequential( conv3x3(in_planes, out_planes, stride=stride), nn.BatchNorm2d(out_planes), nn.ReLU(inplace=INPLACE), conv3x3(out_planes, out_planes), ) # call constructor of generic layer: super().__init__( convolutional_block, in_planes, out_planes, stride=stride, reduction=reduction, final_bn_relu=final_bn_relu, use_se=use_se, se_reduction_ratio=se_reduction_ratio, )
def __init__(self, loss_config: AttrDict): super(NCELossWithMemory, self).__init__() self.loss_config = loss_config memory_params = self.loss_config.memory_params memory_params.memory_size = self.loss_config.num_train_samples assert is_pos_int( memory_params.memory_size ), f"Memory size must be positive: {memory_params.memory_size}" assert self.loss_config.loss_type in [ "nce", "cross_entropy", ], f"Supported types are nce/cross_entropy. Found {self.loss_config.loss_type}" self.loss_type = self.loss_config.loss_type self.update_memory_on_forward = memory_params.update_mem_on_forward self.update_memory_emb_index = self.loss_config.update_mem_with_emb_index if self.update_memory_on_forward is False: # we have multiple embeddings used in NCE # but we update memory with only one of them assert self.update_memory_emb_index >= 0 # first setup the NCEAverage method to get the scores of the output wrt # memory bank negatives self.nce_average = NCEAverage( memory_params=memory_params, negative_sampling_params=self.loss_config.negative_sampling_params, T=self.loss_config.temperature, Z=self.loss_config.norm_constant, loss_type=self.loss_type, ) if self.loss_type == "nce": # setup the actual NCE loss self.nce_criterion = NCECriterion( self.loss_config.num_train_samples) elif self.loss_type == "cross_entropy": # cross-entropy loss. Also called InfoNCE self.xe_criterion = nn.CrossEntropyLoss() # other constants self.normalize_embedding = self.loss_config.norm_embedding self.loss_weights = self.loss_config.loss_weights self.init_sync_memory = False self.ignore_index = self.loss_config.get("ignore_index", -1)
def __init__( self, num_blocks, num_classes, init_planes, growth_rate, expansion, small_input, final_bn_relu, use_se=False, se_reduction_ratio=16, ): """ Implementation of a standard densely connected network (DenseNet). Contains the following attachable blocks: block{block_idx}-{idx}: This is the output of each dense block, indexed by the block index and the index of the dense layer transition-{idx}: This is the output of the transition layers trunk_output: The final output of the `DenseNet`. This is where a `fully_connected` head is normally attached. Args: small_input: set to `True` for 32x32 sized image inputs. final_bn_relu: set to `False` to exclude the final batchnorm and ReLU layers. These settings are useful when training Siamese networks. use_se: Enable squeeze and excitation se_reduction_ratio: The reduction ratio to apply in the excitation stage. Only used if `use_se` is `True`. """ super().__init__() # assertions: assert isinstance(num_blocks, Sequence) assert all(is_pos_int(b) for b in num_blocks) assert num_classes is None or is_pos_int(num_classes) assert is_pos_int(init_planes) assert is_pos_int(growth_rate) assert is_pos_int(expansion) assert type(small_input) == bool # initial convolutional block: self._num_classes = num_classes self.num_blocks = num_blocks self.small_input = small_input if self.small_input: self.initial_block = nn.Sequential( nn.Conv2d(3, init_planes, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(init_planes), nn.ReLU(inplace=INPLACE), ) else: self.initial_block = nn.Sequential( nn.Conv2d(3, init_planes, kernel_size=7, stride=2, padding=3, bias=False), nn.BatchNorm2d(init_planes), nn.ReLU(inplace=INPLACE), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) # loop over spatial resolutions: num_planes = init_planes blocks = nn.Sequential() for idx, num_layers in enumerate(num_blocks): # add dense block block = self._make_dense_block( num_layers, num_planes, idx, growth_rate=growth_rate, expansion=expansion, use_se=use_se, se_reduction_ratio=se_reduction_ratio, ) blocks.add_module(f"block_{idx}", block) num_planes = num_planes + num_layers * growth_rate # add transition layer: if idx != len(num_blocks) - 1: trans = _Transition(num_planes, num_planes // 2) blocks.add_module(f"transition-{idx}", trans) num_planes = num_planes // 2 blocks.add_module( "trunk_output", self._make_trunk_output_block(num_planes, final_bn_relu)) self.features = blocks # initialize weights of convolutional and batchnorm layers: for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2.0 / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
def __init__( self, num_blocks, init_planes: int = 64, reduction: int = 4, small_input: bool = False, zero_init_bn_residuals: bool = False, base_width_and_cardinality: Optional[Union[Tuple, List]] = None, basic_layer: bool = False, final_bn_relu: bool = True, use_se: bool = False, se_reduction_ratio: int = 16, ): """ Implementation of `ResNeXt <https://arxiv.org/pdf/1611.05431.pdf>`_. Args: small_input: set to `True` for 32x32 sized image inputs. final_bn_relu: set to `False` to exclude the final batchnorm and ReLU layers. These settings are useful when training Siamese networks. use_se: Enable squeeze and excitation se_reduction_ratio: The reduction ratio to apply in the excitation stage. Only used if `use_se` is `True`. """ super().__init__() # assertions on inputs: assert type(num_blocks) == list assert all(is_pos_int(n) for n in num_blocks) assert is_pos_int(init_planes) and is_pos_int(reduction) assert type(small_input) == bool assert ( type(zero_init_bn_residuals) == bool ), "zero_init_bn_residuals must be a boolean, set to true if gamma of last\ BN of residual block should be initialized to 0.0, false for 1.0" assert base_width_and_cardinality is None or ( isinstance(base_width_and_cardinality, (tuple, list)) and len(base_width_and_cardinality) == 2 and is_pos_int(base_width_and_cardinality[0]) and is_pos_int(base_width_and_cardinality[1]) ) assert isinstance(use_se, bool), "use_se has to be a boolean" # initial convolutional block: self.num_blocks = num_blocks self.small_input = small_input self._make_initial_block(small_input, init_planes, basic_layer) # compute number of planes at each spatial resolution: out_planes = [init_planes * 2 ** i * reduction for i in range(len(num_blocks))] in_planes = [init_planes] + out_planes[:-1] # create subnetworks for each spatial resolution: blocks = [] for idx in range(len(out_planes)): mid_planes_and_cardinality = None if base_width_and_cardinality is not None: w, c = base_width_and_cardinality mid_planes_and_cardinality = (w * 2 ** idx, c) new_block = self._make_resolution_block( in_planes[idx], out_planes[idx], idx, num_blocks[idx], # num layers stride=1 if idx == 0 else 2, mid_planes_and_cardinality=mid_planes_and_cardinality, reduction=reduction, final_bn_relu=final_bn_relu or (idx != (len(out_planes) - 1)), use_se=use_se, se_reduction_ratio=se_reduction_ratio, ) blocks.append(new_block) self.blocks = nn.Sequential(*blocks) self.out_planes = out_planes[-1] self._num_classes = out_planes # initialize weights: self._initialize_weights(zero_init_bn_residuals)
def __init__( self, num_blocks, init_planes: int = 64, reduction: int = 4, small_input: bool = False, zero_init_bn_residuals: bool = False, base_width_and_cardinality: Optional[Union[Tuple, List]] = None, basic_layer: bool = False, final_bn_relu: bool = True, bn_weight_decay: Optional[bool] = False, use_se: bool = False, se_reduction_ratio: int = 16, ): """ Implementation of `ResNeXt <https://arxiv.org/pdf/1611.05431.pdf>`_. Args: small_input: set to `True` for 32x32 sized image inputs. final_bn_relu: set to `False` to exclude the final batchnorm and ReLU layers. These settings are useful when training Siamese networks. use_se: Enable squeeze and excitation se_reduction_ratio: The reduction ratio to apply in the excitation stage. Only used if `use_se` is `True`. """ super().__init__() # assertions on inputs: assert type(num_blocks) == list assert all(is_pos_int(n) for n in num_blocks) assert is_pos_int(init_planes) and is_pos_int(reduction) assert type(small_input) == bool assert type(bn_weight_decay) == bool assert ( type(zero_init_bn_residuals) == bool ), "zero_init_bn_residuals must be a boolean, set to true if gamma of last\ BN of residual block should be initialized to 0.0, false for 1.0" assert base_width_and_cardinality is None or ( isinstance(base_width_and_cardinality, (tuple, list)) and len(base_width_and_cardinality) == 2 and is_pos_int(base_width_and_cardinality[0]) and is_pos_int(base_width_and_cardinality[1])) assert isinstance(use_se, bool), "use_se has to be a boolean" # Chooses whether to apply weight decay to batch norm # parameters. This improves results in some situations, # e.g. ResNeXt models trained / evaluated using the Imagenet # dataset, but can cause worse performance in other scenarios self.bn_weight_decay = bn_weight_decay # initial convolutional block: self.num_blocks = num_blocks self.small_input = small_input self._make_initial_block(small_input, init_planes, basic_layer) # compute number of planes at each spatial resolution: out_planes = [ init_planes * 2**i * reduction for i in range(len(num_blocks)) ] in_planes = [init_planes] + out_planes[:-1] # create subnetworks for each spatial resolution: blocks = [] for idx in range(len(out_planes)): mid_planes_and_cardinality = None if base_width_and_cardinality is not None: w, c = base_width_and_cardinality mid_planes_and_cardinality = (w * 2**idx, c) new_block = self._make_resolution_block( in_planes[idx], out_planes[idx], idx, num_blocks[idx], # num layers stride=1 if idx == 0 else 2, mid_planes_and_cardinality=mid_planes_and_cardinality, reduction=reduction, final_bn_relu=final_bn_relu or (idx != (len(out_planes) - 1)), use_se=use_se, se_reduction_ratio=se_reduction_ratio, ) blocks.append(nn.Sequential(*new_block)) self.blocks = nn.Sequential(*blocks) self.out_planes = out_planes[-1] self._num_classes = out_planes # initialize weights: for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Init BatchNorm gamma to 0.0 for last BN layer, it gets 0.2-0.3% higher # final val top1 for larger batch sizes. if zero_init_bn_residuals: for m in self.modules(): if isinstance(m, GenericLayer): if hasattr(m, "bn"): nn.init.constant_(m.bn.weight, 0)
def _parse_config(config): ret_config = {} required_args = [ "input_planes", "clip_crop_size", "skip_transformation_type", "residual_transformation_type", "frames_per_clip", "num_blocks", ] for arg in required_args: assert arg in config, "resnext3d model requires argument %s" % arg ret_config[arg] = config[arg] # Default setting for model stem, which is considered as stage 0. Stage # index starts from 0 as implemented in ResStageBase._block_name() method. # stem_planes: No. of output channles of conv op in stem # stem_temporal_kernel: temporal size of conv op in stem # stem_spatial_kernel: spatial size of conv op in stem # stem_maxpool: by default, spatial maxpool op is disabled in stem ret_config.update({ "input_key": config.get("input_key", None), "stem_name": config.get("stem_name", "resnext3d_stem"), "stem_planes": config.get("stem_planes", 64), "stem_temporal_kernel": config.get("stem_temporal_kernel", 3), "stem_spatial_kernel": config.get("stem_spatial_kernel", 7), "stem_maxpool": config.get("stem_maxpool", False), }) # Default setting for model stages 1, 2, 3 and 4 # stage_planes: No. of output channel of 1st conv op in stage 1 # stage_temporal_kernel_basis: Basis of temporal kernel sizes for each of # the stage. # temporal_conv_1x1: if True, do temporal convolution in the fist # 1x1 Conv3d. Otherwise, do it in the second 3x3 Conv3d (default settting) # stage_temporal_stride: temporal stride for each stage # stage_spatial_stride: spatial stride for each stage # num_groups: No. of groups in 2nd (group) conv in the residual transformation # width_per_group: No. of channels per group in 2nd (group) conv in the # residual transformation ret_config.update({ "stage_planes": config.get("stage_planes", 256), "stage_temporal_kernel_basis": config.get("stage_temporal_kernel_basis", [[3], [3], [3], [3]]), "temporal_conv_1x1": config.get("temporal_conv_1x1", [False, False, False, False]), "stage_temporal_stride": config.get("stage_temporal_stride", [1, 2, 2, 2]), "stage_spatial_stride": config.get("stage_spatial_stride", [1, 2, 2, 2]), "num_groups": config.get("num_groups", 1), "width_per_group": config.get("width_per_group", 64), }) # Default setting for model parameter initialization ret_config.update({ "zero_init_residual_transform": config.get("zero_init_residual_transform", False) }) assert is_pos_int_list(ret_config["num_blocks"]) assert is_pos_int(ret_config["stem_planes"]) assert is_pos_int(ret_config["stem_temporal_kernel"]) assert is_pos_int(ret_config["stem_spatial_kernel"]) assert type(ret_config["stem_maxpool"]) == bool assert is_pos_int(ret_config["stage_planes"]) assert type(ret_config["stage_temporal_kernel_basis"]) == list assert all( is_pos_int_list(l) for l in ret_config["stage_temporal_kernel_basis"]) assert type(ret_config["temporal_conv_1x1"]) == list assert is_pos_int_list(ret_config["stage_temporal_stride"]) assert is_pos_int_list(ret_config["stage_spatial_stride"]) assert is_pos_int(ret_config["num_groups"]) assert is_pos_int(ret_config["width_per_group"]) return ret_config
def image_map( mapcoord: Union[np.ndarray, torch.Tensor], dataset: Union[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataset.Dataset], mapsize: int = 5000, imsize: int = 32, unnormalize: Optional[Callable] = None, snap_to_grid: bool = False, ) -> torch.ByteTensor: """Constructs a 2D map of images. The 2D coordinates for each of the images are specified in `mapcoord`, the corresponding images are in `dataset`. Optional arguments set the size of the map images, the size of the images themselves, the unnormalization transform, and whether or not to snap images to a grid. """ # assertions: if type(mapcoord) == np.ndarray: mapcoord = torch.from_numpy(mapcoord) assert torch.is_tensor(mapcoord) if isinstance(dataset, torch.utils.data.dataloader.DataLoader): dataset = dataset.dataset assert isinstance(dataset, torch.utils.data.dataset.Dataset) assert is_pos_int(mapsize) assert is_pos_int(imsize) if unnormalize is not None: assert callable(unnormalize) # initialize some variables: import torchvision.transforms.functional as F background = 255 mapim = torch.ByteTensor(3, mapsize, mapsize).fill_(background) # normalize map coordinates: mapc = mapcoord.add(-(mapcoord.min())) mapc.div_(mapc.max()) # loop over images: for idx in range(len(dataset)): # compute grid location: if snap_to_grid: y = 1 + int(math.floor(mapc[idx][0] * (mapsize - imsize - 2))) x = 1 + int(math.floor(mapc[idx][1] * (mapsize - imsize - 2))) else: y = 1 + int( math.floor(mapc[idx][0] * (math.floor(mapsize - imsize) - 2))) x = 1 + int( math.floor(mapc[idx][1] * (math.floor(mapsize - imsize) - 2))) # check whether we can overwrite this location: overwrite = not snap_to_grid if not overwrite: segment = mapim.narrow(1, y, imsize).narrow(2, x, imsize) overwrite = segment.eq(background).all() # draw image: if overwrite: # load, unnormalize, and resize image: image = dataset[idx][0] if unnormalize is not None: image = unnormalize(image) resized_im = F.to_tensor( F.resize(F.to_pil_image(image), imsize, Image.BILINEAR)) # place image: segment = mapim.narrow(1, y, imsize).narrow(2, x, imsize) segment.copy_(resized_im.mul_(255.0).byte()) # return map: return mapim
def __init__( self, num_blocks, init_planes, reduction, small_input, zero_init_bn_residuals, base_width_and_cardinality, basic_layer, final_bn_relu, ): """ Implementation of `ResNeXt <https://arxiv.org/pdf/1611.05431.pdf>`_. Set ``small_input`` to `True` for 32x32 sized image inputs. Set ``final_bn_relu`` to `False` to exclude the final batchnorm and ReLU layers. These settings are useful when training Siamese networks. """ super().__init__() # assertions on inputs: assert type(num_blocks) == list assert all(is_pos_int(n) for n in num_blocks) assert is_pos_int(init_planes) and is_pos_int(reduction) assert type(small_input) == bool assert ( type(zero_init_bn_residuals) == bool ), "zero_init_bn_residuals must be a boolean, set to true if gamma of last\ BN of residual block should be initialized to 0.0, false for 1.0" assert base_width_and_cardinality is None or ( isinstance(base_width_and_cardinality, (tuple, list)) and len(base_width_and_cardinality) == 2 and is_pos_int(base_width_and_cardinality[0]) and is_pos_int(base_width_and_cardinality[1])) # we apply weight decay to batch norm if the model is a ResNeXt and we don't if # it is a ResNet self.bn_weight_decay = base_width_and_cardinality is not None # initial convolutional block: self.num_blocks = num_blocks self.small_input = small_input self._make_initial_block(small_input, init_planes, basic_layer) # compute number of planes at each spatial resolution: out_planes = [ init_planes * 2**i * reduction for i in range(len(num_blocks)) ] in_planes = [init_planes] + out_planes[:-1] # create subnetworks for each spatial resolution: blocks = [] for idx in range(len(out_planes)): mid_planes_and_cardinality = None if base_width_and_cardinality is not None: w, c = base_width_and_cardinality mid_planes_and_cardinality = (w * 2**idx, c) new_block = self._make_resolution_block( in_planes[idx], out_planes[idx], idx, num_blocks[idx], # num layers stride=1 if idx == 0 else 2, mid_planes_and_cardinality=mid_planes_and_cardinality, reduction=reduction, final_bn_relu=final_bn_relu or (idx != (len(out_planes) - 1)), ) blocks.append(nn.Sequential(*new_block)) self.blocks = nn.Sequential(*blocks) self.out_planes = out_planes[-1] self._num_classes = out_planes # initialize weights: for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Init BatchNorm gamma to 0.0 for last BN layer, it gets 0.2-0.3% higher # final val top1 for larger batch sizes. if zero_init_bn_residuals: for m in self.modules(): if isinstance(m, GenericLayer): if hasattr(m, "bn"): nn.init.constant_(m.bn.weight, 0)
def __init__( self, num_blocks, num_classes, init_planes, growth_rate, expansion, small_input, final_bn_relu, ): """ Implementation of a standard densely connected network (DenseNet). Set `small_input` to `True` for 32x32 sized image inputs. Set `final_bn_relu` to `False` to exclude the final batchnorm and ReLU layers. These settings are useful when training Siamese networks. """ super().__init__() # assertions: assert type(num_blocks) == list assert all(is_pos_int(b) for b in num_blocks) assert num_classes is None or is_pos_int(num_classes) assert is_pos_int(init_planes) assert is_pos_int(growth_rate) assert is_pos_int(expansion) assert type(small_input) == bool # initial convolutional block: self._num_classes = num_classes self.num_blocks = num_blocks self.small_input = small_input if self.small_input: self.initial_block = nn.Sequential( nn.Conv2d(3, init_planes, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(init_planes), nn.ReLU(inplace=INPLACE), ) else: self.initial_block = nn.Sequential( nn.Conv2d(3, init_planes, kernel_size=7, stride=2, padding=3, bias=False), nn.BatchNorm2d(init_planes), nn.ReLU(inplace=INPLACE), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) # loop over spatial resolutions: num_planes = init_planes self.features = nn.Sequential() for idx, num_layers in enumerate(num_blocks): # add dense block: block = _DenseBlock(num_layers, num_planes, growth_rate=growth_rate, expansion=expansion) self.features.add_module("denseblock-%d" % (idx + 1), block) num_planes = num_planes + num_layers * growth_rate # add transition layer: if idx != len(num_blocks) - 1: trans = _Transition(num_planes, num_planes // 2) self.features.add_module("transition-%d" % (idx + 1), trans) num_planes = num_planes // 2 # final batch normalization: if final_bn_relu: self.features.add_module("norm-final", nn.BatchNorm2d(num_planes)) self.features.add_module("relu-final", nn.ReLU(inplace=INPLACE)) # final classifier: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = None if num_classes is None else nn.Linear( num_planes, num_classes) self.num_planes = num_planes # initialize weights of convolutional and batchnorm layers: for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2.0 / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()