def __init__( self, channels: int, sa_ratio: int = 16, out_multiplier: int = 1, act_layer: Optional[nn.Module] = None, norm_layer: Optional[Callable[[int], nn.Module]] = None, drop_layer: Optional[Callable[..., nn.Module]] = None) -> None: super().__init__( GlobalAvgPool2d(flatten=False), *conv_sequence(channels, max(channels // sa_ratio, 32), act_layer, norm_layer, drop_layer, kernel_size=1, stride=1, bias=False), *conv_sequence(max(channels // sa_ratio, 32), channels * out_multiplier, nn.Sigmoid(), None, drop_layer, kernel_size=1, stride=1))
def __init__(self, layout, num_classes=10, in_channels=3, stem_channels=64, act_layer=None, norm_layer=None, drop_layer=None, conv_layer=None): super().__init__(OrderedDict([ ('features', DarknetBodyV1(layout, in_channels, stem_channels, act_layer, norm_layer, drop_layer, conv_layer)), ('pool', GlobalAvgPool2d(flatten=True)), ('classifier', nn.Linear(layout[2][-1], num_classes))])) init_module(self, 'leaky_relu')
def test_classification_trainer_few_classes(): num_it = 10 batch_size = 8 # Generate all dependencies model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(inplace=True), GlobalAvgPool2d(flatten=True), nn.Linear(32, 3)) train_loader = DataLoader(MockClassificationDataset(num_it * batch_size), batch_size=batch_size) optimizer = torch.optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() learner = trainer.ClassificationTrainer(model, train_loader, train_loader, criterion, optimizer) # Fewer than 5 classes assert learner.evaluate()['acc5'] == 0
def test_binary_classification_trainer(): num_it = 10 batch_size = 8 # Generate all dependencies model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(inplace=True), GlobalAvgPool2d(flatten=True), nn.Linear(32, 1)) train_loader = DataLoader(MockBinaryClassificationDataset(num_it * batch_size), batch_size=batch_size) optimizer = torch.optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() learner = trainer.BinaryClassificationTrainer(model, train_loader, train_loader, criterion, optimizer) res = learner.evaluate() assert 0 <= res['acc'] <= 1
def test_classification_trainer(tmpdir_factory): folder = tmpdir_factory.mktemp("checkpoints") file_path = str(folder.join("tmp.pt")) num_it = 100 batch_size = 8 # Generate all dependencies model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(inplace=True), GlobalAvgPool2d(flatten=True), nn.Linear(32, 5)) train_loader = DataLoader(MockClassificationDataset(num_it * batch_size), batch_size=batch_size) optimizer = torch.optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() with pytest.raises( ValueError if torch.cuda.is_available() else AssertionError): trainer.ClassificationTrainer(model, train_loader, train_loader, criterion, optimizer, gpu=7) learner = trainer.ClassificationTrainer( model, train_loader, train_loader, criterion, optimizer, output_file=file_path, gpu=0 if torch.cuda.is_available() else None) _test_trainer(learner, num_it, '3.weight', None) # AMP learner = trainer.ClassificationTrainer( model, train_loader, train_loader, criterion, optimizer, output_file=file_path, gpu=0 if torch.cuda.is_available() else None, amp=True) _test_trainer(learner, num_it, '3.weight', None)
def __init__( self, layout: List[Tuple[int, int]], num_classes: int = 10, in_channels: int = 3, stem_channels: int = 32, act_layer: Optional[nn.Module] = None, norm_layer: Optional[Callable[[int], nn.Module]] = None, drop_layer: Optional[Callable[..., nn.Module]] = None, conv_layer: Optional[Callable[..., nn.Module]] = None ) -> None: super().__init__(OrderedDict([ ('features', DarknetBodyV2(layout, in_channels, stem_channels, False, act_layer, norm_layer, drop_layer, conv_layer)), ('classifier', nn.Conv2d(layout[-1][0], num_classes, 1)), ('pool', GlobalAvgPool2d(flatten=True))])) init_module(self, 'leaky_relu')
def __init__(self, channels, se_ratio=12, act_layer=None, norm_layer=None, drop_layer=None): super().__init__() self.pool = GlobalAvgPool2d(flatten=False) self.conv = nn.Sequential( *conv_sequence(channels, channels // se_ratio, act_layer, norm_layer, drop_layer, kernel_size=1, stride=1, bias=(norm_layer is None)), *conv_sequence(channels // se_ratio, channels, nn.Sigmoid(), None, drop_layer, kernel_size=1, stride=1))
def __init__(self, width_mult=1.0, depth_mult=1.0, num_classes=1000, in_channels=3, in_planes=16, final_planes=180, use_se=True, se_ratio=12, dropout_ratio=0.2, bn_momentum=0.9, act_layer=None, norm_layer=None, drop_layer=None): """Mostly adapted from https://github.com/clovaai/rexnet/blob/master/rexnetv1.py""" super().__init__() if act_layer is None: act_layer = SiLU() if norm_layer is None: norm_layer = nn.BatchNorm2d num_blocks = [1, 2, 2, 3, 3, 5] strides = [1, 2, 2, 2, 1, 2] num_blocks = [ceil(element * depth_mult) for element in num_blocks] strides = sum([[element] + [1] * (num_blocks[idx] - 1) for idx, element in enumerate(strides)], []) depth = sum(num_blocks) stem_channel = 32 / width_mult if width_mult < 1.0 else 32 inplanes = in_planes / width_mult if width_mult < 1.0 else in_planes # The following channel configuration is a simple instance to make each layer become an expand layer chans = [int(round(width_mult * stem_channel))] chans.extend([ int(round(width_mult * (inplanes + idx * final_planes / depth))) for idx in range(depth) ]) ses = [False] * (num_blocks[0] + num_blocks[1]) + [use_se] * sum( num_blocks[2:]) _layers = conv_sequence(in_channels, chans[0], act_layer, norm_layer, drop_layer, kernel_size=3, stride=2, padding=1, bias=False) t = 1 for in_c, c, s, se in zip(chans[:-1], chans[1:], strides, ses): _layers.append( ReXBlock(in_channels=in_c, channels=c, t=t, stride=s, use_se=se, se_ratio=se_ratio)) t = 6 pen_channels = int(width_mult * 1280) _layers.extend( conv_sequence(chans[-1], pen_channels, act_layer, norm_layer, drop_layer, kernel_size=1, stride=1, padding=0, bias=False)) super().__init__( OrderedDict([('features', nn.Sequential(*_layers)), ('pool', GlobalAvgPool2d(flatten=True)), ('head', nn.Sequential(nn.Dropout(dropout_ratio), nn.Linear(pen_channels, num_classes)))]))
def __init__(self, block, num_blocks, planes, num_classes=10, in_channels=3, zero_init_residual=False, width_per_group=64, conv_layer=None, act_layer=None, norm_layer=None, drop_layer=None, deep_stem=False, stem_pool=True, avg_downsample=False, num_repeats=1, block_args=None): if conv_layer is None: conv_layer = nn.Conv2d if norm_layer is None: norm_layer = nn.BatchNorm2d if act_layer is None: act_layer = nn.ReLU(inplace=True) self.dilation = 1 in_planes = 64 # Deep stem from ResNet-C if deep_stem: _layers = [ *conv_sequence(in_channels, in_planes // 2, act_layer, norm_layer, drop_layer, conv_layer, kernel_size=3, stride=2, padding=1, bias=False), *conv_sequence(in_planes // 2, in_planes // 2, act_layer, norm_layer, drop_layer, conv_layer, kernel_size=3, stride=1, padding=1, bias=False), *conv_sequence(in_planes // 2, in_planes, act_layer, norm_layer, drop_layer, conv_layer, kernel_size=3, stride=1, padding=1, bias=False) ] else: _layers = conv_sequence(in_channels, in_planes, act_layer, norm_layer, drop_layer, conv_layer, kernel_size=7, stride=2, padding=3, bias=False) if stem_pool: _layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) # Optional tensor repetitions along channel axis (mainly for TridentNet) if num_repeats > 1: _layers.append(ChannelRepeat(num_repeats)) # Consecutive convolutional blocks stride = 1 # Block args if block_args is None: block_args = dict(groups=1) if not isinstance(block_args, list): block_args = [block_args] * len(num_blocks) for _num_blocks, _planes, _block_args in zip(num_blocks, planes, block_args): _layers.append( self._make_layer(block, _num_blocks, in_planes, _planes, stride, width_per_group, act_layer=act_layer, norm_layer=norm_layer, drop_layer=drop_layer, avg_downsample=avg_downsample, num_repeats=num_repeats, block_args=_block_args)) in_planes = block.expansion * _planes stride = 2 super().__init__( OrderedDict([('features', nn.Sequential(*_layers)), ('pool', GlobalAvgPool2d(flatten=True)), ('head', nn.Linear(num_repeats * in_planes, num_classes))])) # Init all layers init.init_module(self, nonlinearity='relu') # Init shortcut if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): m.convs[2][1].weight.data.zero_() elif isinstance(m, BasicBlock): m.convs[1][1].weight.data.zero_()
def test_classification_trainer(self): num_it = 100 batch_size = 8 # Generate all dependencies model = nn.Sequential(nn.Conv2d(3, 32, 3), nn.ReLU(inplace=True), GlobalAvgPool2d(flatten=True), nn.Linear(32, 5)) train_loader = torch.utils.data.DataLoader(MockClassificationDataset( num_it * batch_size), batch_size=batch_size) optimizer = torch.optim.Adam(model.parameters()) criterion = torch.nn.CrossEntropyLoss() self.assertRaises( ValueError if torch.cuda.is_available() else AssertionError, trainer.ClassificationTrainer, model, train_loader, train_loader, criterion, optimizer, gpu=7) with NamedTemporaryFile() as tf: if platform.system() == 'Windows': tf.name = "C:\\Temp\\" + tf.name.split('\\')[-1] learner = trainer.ClassificationTrainer( model, train_loader, train_loader, criterion, optimizer, output_file=tf.name, gpu=0 if torch.cuda.is_available() else None) learner.save(tf.name) checkpoint = torch.load(tf.name, map_location='cpu') model_w = learner.model[-1].weight.data.clone() # Check setup self.assertTrue(learner.check_setup(num_it=num_it)) # LR Find learner.load(checkpoint) self.assertRaises(AssertionError, learner.plot_recorder, block=False) learner.lr_find(num_it=num_it) self.assertEqual(len(learner.lr_recorder), len(learner.loss_recorder)) learner.plot_recorder(block=False) # Training # Perform the iterations learner.load(checkpoint) self.assertRaises(ValueError, learner.fit_n_epochs, 1, 1e-3, sched_type='my_scheduler') learner.fit_n_epochs(1, 1e-3) # Check that params were updated self.assertFalse(torch.equal(model[-1].weight.data, model_w)) learner.load(checkpoint) learner.fit_n_epochs(1, 1e-3, sched_type='cosine') # Check that params were updated self.assertFalse(torch.equal(model[-1].weight.data, model_w))
def __init__(self, channels, sa_ratio=16, out_multiplier=1, act_layer=None, norm_layer=None, drop_layer=None): super().__init__(GlobalAvgPool2d(flatten=False), *conv_sequence(channels, max(channels // sa_ratio, 32), act_layer, norm_layer, drop_layer, kernel_size=1, stride=1, bias=False), *conv_sequence(max(channels // sa_ratio, 32), channels * out_multiplier, nn.Sigmoid(), None, drop_layer, kernel_size=1, stride=1))