def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 8, 3, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(8) self.conv2 = nn.Conv2d(8, 32, 3, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(32) self.conv3 = nn.Conv2d(32, 64, 3, padding=1, bias=False) if args.pooling_method == 'stride_conv': self.pl2 = nn.Conv2d(32, 32, kernel_size=2, stride=2) self.pl3 = nn.Conv2d(64, 64, kernel_size=14, stride=14) elif args.pooling_method == 'max': self.pl2 = nn.MaxPool2d(2) self.pl3 = nn.MaxPool2d(14) elif args.pooling_method == 'avg': self.pl2 = nn.AvgPool2d(2) self.pl3 = nn.AvgPool2d(14) elif args.pooling_method == 'stoch': self.pl2 = nn.FractionalMaxPool2d(2, output_size=14) self.pl3 = nn.FractionalMaxPool2d(14, output_size=1) #self.pl2 = nn.Conv2d(32, 32, kernel_size=2, stride=2) #self.pl3 = nn.MaxPool2d(14) self.fc = nn.Linear(64, 10)
def __init__(self, path): self.inplanes = 16 super(smallNet, self).__init__(path) block = BasicBlock # self.convi = InceptionI() self.conv1 = nn.Conv2d(2, 16, 7, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(16) self.relu1 = nn.LeakyReLU() #nn.ReLU() self.layer1 = self._make_layer(block, 16, 1, stride=1) # 16->16 self.layer2 = self._make_layer(block, 32, 2, stride=1) # receptive field in output feature map 7 # self.layer2 = InceptionA(); self.inplanes=32 self.layer3 = self._make_layer(block, 64, 2, stride=1) # 11 # self.layer1 = self._make_sequential(16, 16, 1, 1) # self.layer2 = self._make_sequential(16, 32, 2, 1) # self.layer3 = self._make_sequential(32, 64, 2, 1) self.layer4 = self._make_layer(block, 128, 2, stride=2) # 19 # self.avgpool = nn.MaxPool2d(2) # input 30 8 , 20 5 self.fc1 = nn.Linear(128*5*5,128) self.fc1bn = nn.BatchNorm2d(128) self.fc1relu = nn.ReLU() self.fc2 = nn.Linear(128+1, 1) # self.maxpool1 = nn.MaxPool2d(3, 2, padding=1) self.conv5 = nn.Conv2d(128,128, 3, 1, padding=1) self.fp1 = nn.FractionalMaxPool2d(3, output_ratio=(0.8, 0.8)) self.fp2 = nn.FractionalMaxPool2d(3, output_ratio=(0.8, 0.8)) self.fp3 = nn.FractionalMaxPool2d(3, output_ratio=(0.8, 0.8)) # self.fp4 = nn.FractionalMaxPool2d(3, output_ratio=(0.8, 0.8)) self.apply(weight_init)
def __init__(self): super(DeepNet, self).__init__() # self.nl = nn.LeakyReLU(0.3) self.nl = nn.ELU(0.3) # 32 x 32 x 3 self.conv1 = nn.Conv2d(3, 128, kernel_size=3, padding=1) # self.conv2 = nn.Conv2d(96, 128, kernel_size=1, padding=0) self.bn2 = nn.BatchNorm2d(128, 1e-3) self.pool1 = nn.FractionalMaxPool2d(kernel_size=3, output_size=(23, 23)) self.dropout1 = nn.Dropout(p=0.1) # 23 x 23 self.conv3 = nn.Conv2d(128, 384, kernel_size=3, padding=1) # self.conv4 = nn.Conv2d(256, 384, kernel_size=1, padding=0) self.bn4 = nn.BatchNorm2d(384, 1e-3) self.dropout2 = nn.Dropout(p=0.2) self.pool2 = nn.FractionalMaxPool2d(3, output_size=(16 ,16)) # 16 x 16 self.conv5 = nn.Conv2d(384, 768, kernel_size=3, padding=1) # self.conv6 = nn.Conv2d(512, 768, kernel_size=1, padding=0) self.bn6 = nn.BatchNorm2d(768, 1e-3) self.dropout3 = nn.Dropout(p=0.25) self.pool3 = nn.FractionalMaxPool2d(3, output_size=(11, 11)) # 11 x 11 self.conv7 = nn.Conv2d(768, 1280, kernel_size=3, padding=1) # self.conv8 = nn.Conv2d(1280, 1280, kernel_size=1, padding=0) self.bn7 = nn.BatchNorm2d(1280, 1e-3) self.dropout4 = nn.Dropout(p=0.35) self.pool4 = nn.FractionalMaxPool2d(3, output_size=(7,7)) # 7 x 7 self.conv9 = nn.Conv2d(1280, 1920, kernel_size=3, padding=1) self.dropout4 = nn.Dropout(p=0.4) self.pool5 = nn.FractionalMaxPool2d(3, output_size=(4, 4)) # 4 x 4 self.conv10 = nn.Conv2d(1920, 2048, kernel_size=2, padding=0) # 3 x 3 self.conv11 = nn.Conv2d(2048, 2560, kernel_size=2, padding=0) self.bn11 = nn.BatchNorm2d(2560, 1e-3) # 2 x 2 self.conv12 = nn.Conv2d(2560, 3072, kernel_size=2, padding=0) self.dropout5 = nn.Dropout(p=0.45) self.bn12 = nn.BatchNorm2d(3072, 1e-3) # 1 x 1 self.fc1 = nn.Linear(3072, 100)
def __init__(self): super(NNPoolingModule, self).__init__() self.input1d = torch.randn(1, 16, 50) self.module1d = nn.ModuleList([ nn.MaxPool1d(3, stride=2), nn.AvgPool1d(3, stride=2), nn.LPPool1d(2, 3, stride=2), nn.AdaptiveMaxPool1d(3), nn.AdaptiveAvgPool1d(3), ]) self.input2d = torch.randn(1, 16, 30, 10) self.module2d = nn.ModuleList([ nn.MaxPool2d((3, 2), stride=(2, 1)), nn.AvgPool2d((3, 2), stride=(2, 1)), nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5)), nn.LPPool2d(2, 3, stride=(2, 1)), nn.AdaptiveMaxPool2d((5, 7)), nn.AdaptiveAvgPool2d((7)), ]) self.input3d = torch.randn(1, 16, 20, 4, 4) self.module3d = nn.ModuleList([ nn.MaxPool3d(2), nn.AvgPool3d(2), nn.FractionalMaxPool3d(2, output_ratio=(0.5, 0.5, 0.5)), nn.AdaptiveMaxPool3d((5, 7, 9)), nn.AdaptiveAvgPool3d((5, 7, 9)), ])
def __init__(self, inplanes, planes, input_res, cardinality, stride=1): super(PyramidBlock, self).__init__() self.conv1_1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=self.bias) self.bn1_1 = nn.BatchNorm2d(planes, affine=self.affine) self.conv2_1 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=self.bias) self.bn2_1 = nn.BatchNorm2d(planes, affine=self.affine) self.conv3_1 = nn.Conv2d(planes, planes*self.expansion, kernel_size=1, bias=self.bias) self.bn3_1 = nn.BatchNorm2d(planes*self.expansion, affine=self.affine) self.relu = nn.ReLU(inplace=True) if stride != 1 or inplanes != planes*self.expansion: self.downsample = nn.Sequential( nn.Conv2d(inplanes, planes*self.expansion, kernel_size=1, stride=stride, bias=self.bias), nn.BatchNorm2d(planes*self.expansion, affine=self.affine) ) else: self.downsample = nn.Sequential() self.conv1_2 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=self.bias) self.bn1_2 = nn.BatchNorm2d(planes, affine=self.affine) self.bn2_2 = nn.BatchNorm2d(planes, affine=self.affine) self.conv3_2 = nn.Conv2d(planes, planes*self.expansion, kernel_size=1, bias=self.bias) self.bn3_2 = nn.BatchNorm2d(planes*self.expansion, affine=self.affine) output_res = ((input_res[0]+1)/stride, (input_res[1]+1)/stride) pool, conv, upsample = [], [], [] for i in range(cardinality): ratio = 1.0 / math.pow(2, (i+1.0)/cardinality) pool.append(nn.FractionalMaxPool2d(kernel_size=2, output_ratio=ratio)) conv.append(nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=self.bias)) upsample.append(nn.Upsample(size=output_res, mode='bilinear')) self.pool = nn.ModuleList(pool) self.conv = nn.ModuleList(conv) self.upsample = nn.ModuleList(upsample) self.cardinality = cardinality
def test_KeepSize(): mod = layers.KeepSize( sub=nn.FractionalMaxPool2d((3, 3), output_ratio=(0.7, 0.9))) a = torch.ones((17, 11, 64, 64)) b = mod(a) assert tuple(b.size()) == (17, 11, 64, 64) torch.jit.script(mod)
def conv_layers(sizes=[64, 64], k=3, mp=(1, 2), mpk=2, relu=1e-3, bn=None, fmp=False): """Construct a set of conv layers programmatically based on some parameters.""" if isinstance(mp, (int, float)): mp = (1, mp) layers = [] nin = 1 for nout in sizes: if nout < 0: nout = -nout layers += [Lstm2D(nin, nout)] else: layers += [nn.Conv2d(nin, nout, k, padding=(k - 1) // 2)] if relu > 0: layers += [nn.LeakyReLU(relu)] else: layers += [nn.ReLU()] if bn is not None: assert isinstance(bn, float) layers += [nn.BatchNorm2d(nout, momentum=bn)] if not fmp: layers += [nn.MaxPool2d(kernel_size=mpk, stride=mp)] else: layers += [ nn.FractionalMaxPool2d(kernel_size=mpk, output_ratio=(1.0 / mp[0], 1.0 / mp[1])) ] nin = nout return layers
def __init__(self, block, channel_sequence, size_sequence, block_count, kernel_size=3, use_block_for_last=False): super().__init__() assert len(channel_sequence)==len(size_sequence), "channel and size sequences should have same length" old_channels, old_size = channel_sequence[0], size_sequence[0] layers = [] for channels, size in zip(channel_sequence[1:], size_sequence[1:]): layers.append(BlockSet(block, in_channels=old_channels, out_channels=channels, block_count=block_count, kernel_size=kernel_size)) if size<old_size: layers.append(nn.FractionalMaxPool2d(kernel_size=kernel_size, output_size=size)) elif size>old_size: layers.append(nn.Upsample(size=(size,size), mode='bilinear', align_corners=True)) old_channels, old_size = channels, size if use_block_for_last: layers.append(block(channels, channels, kernel_size=1)) else: layers.append(nn.Conv2d(channels, channels, kernel_size=1)) self.layers = nn.Sequential(*layers)
def __init__(self, num_classes=8, num_channels=1, init_weights=True, batch_norm=False): super(FMPNet, self).__init__() self.num_classes = num_classes self.num_channels = num_channels self.size_input = 48 self.dim = 128 * 6 * 6 self.features = nn.Sequential( nn.Conv2d(num_channels, 64, kernel_size=5, stride=1, padding=2), nn.ReLU(inplace=True), nn.FractionalMaxPool2d(kernel_size=3, output_ratio=(0.5, 0.5)), nn.BatchNorm2d(64), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.FractionalMaxPool2d(kernel_size=3, output_ratio=(0.5, 0.5)), nn.BatchNorm2d(64), nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.FractionalMaxPool2d(kernel_size=3, output_ratio=(0.5, 0.5)), nn.BatchNorm2d(128), ) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(128 * 6 * 6, 1024), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(1024, 1024), nn.ReLU(inplace=True), nn.Linear(1024, num_classes), )
def __init__(self, block, channel_sequence, size_sequence, block_count, kernel_size=3, norm_method='BN', use_block_for_last=False, pool_layer='fmp'): super().__init__() assert len(channel_sequence) == len( size_sequence ), "channel and size sequences should have same length" old_channels, old_size = channel_sequence[0], size_sequence[0] layers = [] for channels, size in zip(channel_sequence[1:], size_sequence[1:]): layers.append( BlockSet(block, in_channels=old_channels, out_channels=channels, block_count=block_count, kernel_size=kernel_size, norm_method=norm_method)) if size < old_size: if pool_layer == 'fmp': # Fractional Max Pooling layers.append( nn.FractionalMaxPool2d(kernel_size=kernel_size, output_size=size)) elif pool_layer == 'mp': # Max Pooling, padding = 0, stride := kernel_size, dilation = 1, layers.append(nn.MaxPool2d(kernel_size=old_size // size)) elif size > old_size: layers.append( nn.Upsample(size=(size, size), mode='bilinear', align_corners=True)) old_channels, old_size = channels, size if use_block_for_last: layers.append( block(channels, channels, kernel_size=1, norm_method=norm_method)) else: layers.append(nn.Conv2d(channels, channels, kernel_size=1)) self.layers = nn.Sequential(*layers)
def forward(self, x, pool_type='avg', pool_size=(2, 2)): x = F.relu_(self.bn1(self.conv1(x))) x = F.relu_(self.bn2(self.conv2(x))) if pool_type == 'avg': x = F.avg_pool2d(x, kernel_size=pool_size) elif pool_type == 'max': x = F.max_pool2d(x, kernel_size=pool_size) elif pool_type == 'frac': fractional_maxpool2d = nn.FractionalMaxPool2d( kernel_size=pool_size, output_ratio=1 / np.sqrt(2)) x = fractional_maxpool2d(x) return x
def pyramid(D, C, inputResH, inputResW): pyraTable = ConcatTable() sc = math.pow(2, 1 / C) for i in range(C): scaled = 1 / math.pow(sc, i + 1) conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1) if opt.init: nn.init.xavier_normal(conv1.weight) s = nn.Sequential( nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)), conv1, nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW)))) pyraTable.add(s) pyra = nn.Sequential(pyraTable, CaddTable(False)) return pyra
def conv2d_block(d, r=3, mp=None, fmp=None, repeat=1, batchnorm=True, nonlin=nn.ReLU): """Generate a conv layer with batchnorm and optional maxpool.""" result = [] for i in range(repeat): result += [flex.Conv2d(d, r, padding=(r // 2, r // 2))] if batchnorm: result += [flex.BatchNorm2d()] result += [nonlin()] if fmp is not None: assert mp is None, (fmp, mp) result += [nn.FractionalMaxPool2d(3, output_ratio=fmp)] elif mp is not None: result += [nn.MaxPool2d(mp)] return result
def __init__(self, D, C, inputRes): super(Pyramid, self).__init__() self.C = C self.sc = 2**(1 / C) for i in range(self.C): scaled = 1 / self.sc**(i + 1) setattr(self, 'conv_' + str(i), nn.Conv2d(D, D, kernel_size=2, stride=1, padding=1)) setattr(self, 'SpatialFractionalMaxPooling_' + str(i), nn.FractionalMaxPool2d(kernel_size=2, output_ratio=scaled)) setattr( self, 'SpatialUpSamplingBilinear_' + str(i), nn.Upsample(size=(int(inputRes[0]), int(inputRes[1])), mode='bilinear'))
def __init__(self): super(alignment, self).__init__() """Sound Features""" self.conv1_1 = nn.Conv1d(2, 64, 65, stride=4, padding=0, dilation=1, groups=1, bias=True) self.pool1_1 = nn.MaxPool1d(4, stride=4) self.s_net_1 = self._make_layer(Block2, 64, 128, 15, 4, 1) self.s_net_2 = self._make_layer(Block2, 128, 128, 15, 4, 1) self.s_net_3 = self._make_layer(Block2, 128, 256, 15, 4, 1) self.pool1_2 = nn.MaxPool1d(3, stride=3) self.conv1_2 = nn.Conv1d(256, 128, 3, stride=1, padding=0, dilation=1, groups=1, bias=True) """Image Features""" self.conv3_1 = nn.Conv3d(1, 64, (5, 7, 7), (2, 2, 2), padding=(2, 3, 3), dilation=1, groups=1, bias=True) self.pool3_1 = nn.MaxPool3d((1, 3, 3), (1, 2, 2), padding=(0, 1, 1)) self.im_net_1 = self._make_layer(Block3, 64, 64, (3, 3, 3), (2, 2, 2), 2) """Fuse Features""" self.fractional_maxpool = nn.FractionalMaxPool2d((3, 1), output_size=(10, 1)) self.conv3_2 = nn.Conv3d(192, 512, (1, 1, 1)) self.conv3_3 = nn.Conv3d(512, 128, (1, 1, 1)) self.joint_net_1 = self._make_layer(Block3, 128, 128, (3, 3, 3), (2, 2, 2), 2) self.joint_net_2 = self._make_layer(Block3, 128, 256, (3, 3, 3), (1, 2, 2), 2) self.joint_net_3 = self._make_layer(Block3, 256, 512, (3, 3, 3), (1, 2, 2), 2) #TODO: Global avg pooling, fc and sigmoid self.fc = Linear(512, 2)
def __init__(self, D, cardinality, inputRes): super(Pyramid, self).__init__() self.D = D self.cardinality = cardinality self.inputRes = inputRes self.scale = 2**(-1 / self.cardinality) _scales = [] for card in range(self.cardinality): temp = nn.Sequential( nn.FractionalMaxPool2d(2, output_ratio=self.scale**(card + 1)), nn.Conv2d(self.D, self.D, 3, 1, 1), nn.Upsample(size=self.inputRes) #, mode='bilinear') ) _scales.append(temp) self.scales = nn.ModuleList(_scales)
def __init__(self, in_channels, state_channels, state_size, output_size=10, act='relu', tol=1e-3): super().__init__() self.conv1 = nn.Conv2d(in_channels, state_channels, 3, padding=1) self.norm1 = nn.BatchNorm2d(state_channels) self.pool = nn.FractionalMaxPool2d(2, output_size=state_size) self.odefunc = ODEfunc(state_channels, act=act) self.odeblock = ODEBlock(self.odefunc, rtol=tol, atol=tol) self.fc = nn.Linear(state_size * state_size * state_channels, output_size)
def __init__(self, D, cardinality, inputRes): super(Pyramid, self).__init__() self.D = D self.cardinality = cardinality self.inputRes = inputRes self.scale = 2**(-1 / self.cardinality) _scales = [] # repeating the downsampling, convolution, upsampling process a designated number # of times for card in range(self.cardinality): temp = nn.Sequential( nn.FractionalMaxPool2d(2, output_ratio=self.scale**(card + 1)), nn.Conv2d(self.D, self.D, 3, 1, 1), nn.Upsample(size=self.inputRes) # , mode='bilinear') ) _scales.append(temp) self.scales = nn.ModuleList(_scales)
def __init__(self, input_channels: int = 3, output_channels: int = 16, feature_map_size: int = 7): super().__init__() self.output_channels = output_channels self.feature_map_size = feature_map_size self.cnn = nn.Sequential( # conv block nn.Conv2d(input_channels, 16, kernel_size=5, stride=1), nn.BatchNorm2d(16), nn.ReLU(), nn.MaxPool2d(2, 2), # conv block nn.Conv2d(16, output_channels, kernel_size=5, stride=1), nn.ReLU(), ) self.fraction = nn.FractionalMaxPool2d((2, 2), (feature_map_size, feature_map_size))
W=[32, 64], device=['cpu', 'cuda'], tags=['long']) pool_2d_ops_list = op_bench.op_list( attr_names=['op_name', 'op_func'], attrs=[ ['MaxPool2d', nn.MaxPool2d], ['AvgPool2d', nn.AvgPool2d], [ 'AdaptiveMaxPool2d', lambda kernel, stride: nn.AdaptiveMaxPool2d(kernel) ], [ 'FractionalMaxPool2d', lambda kernel, stride: nn.FractionalMaxPool2d(kernel, output_size=2) ], ], ) class Pool2dBenchmark(op_bench.TorchBenchmarkBase): def init(self, kernel, stride, N, C, H, W, device, op_func): self.input = torch.rand(N, C, H, W, device=device) self.kernel = kernel self.stride = stride self.op_func = op_func(self.kernel, stride=self.stride) def forward(self): return self.op_func(self.input)
constructor_args=((2, 2, 2), ), input_size=(2, 3, 4, 4, 4)), dict(module_name='AvgPool3d', constructor_args=(2, (2, 2, 2)), input_size=(2, 3, 5, 5, 5), desc='stride'), dict(module_name='ReplicationPad3d', constructor_args=((1, 2, 3, 4, 5, 6), ), input_size=(2, 3, 5, 5, 5)), dict(module_name='Embedding', constructor_args=(4, 3), input=Variable(torch.randperm(2).repeat(1, 2).long(), requires_grad=False), jacobian_input=False), dict(constructor=lambda: nn.FractionalMaxPool2d( 2, output_ratio=0.5, _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()), input_size=(1, 3, 5, 5), fullname='FractionalMaxPool2d_ratio', test_cuda=False), dict(constructor=lambda: nn.FractionalMaxPool2d( (2, 2), output_size=(4, 4), _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()), input_size=(1, 3, 7, 7), fullname='FractionalMaxPool2d_size', test_cuda=False), ] for test_params in module_tests + new_module_tests: # TODO: CUDA is not implemented yet
def __init__(self, *args, **kwargs): super(CnnOcrModel, self).__init__() if len(args) > 0: raise Exception("Only keyword arguments allowed in CnnOcrModel") self.hyper_params = kwargs.copy() self.input_line_height = kwargs['input_line_height'] self.rds_line_height = kwargs['rds_line_height'] self.alphabet = kwargs['alphabet'] self.lstm_input_dim = kwargs['lstm_input_dim'] self.num_lstm_layers = kwargs['num_lstm_layers'] self.num_lstm_hidden_units = kwargs['num_lstm_hidden_units'] self.p_lstm_dropout = kwargs['p_lstm_dropout'] self.num_in_channels = kwargs.get('num_in_channels', 1) self.gpu = kwargs.get('gpu', True) self.multigpu = kwargs.get('multigpu', True) self.verbose = kwargs.get('verbose', True) self.lattice_decoder = None # Sanity checks if self.rds_line_height > self.input_line_height: raise Exception( "rapid-downsample line height must be less than or equal to input line height" ) if self.input_line_height % self.rds_line_height != 0: raise Exception( "rapid-downsample line height must evenly divide input line height by a power of 2" ) num_rds_pooling_layers = 0 lh = self.input_line_height while lh > self.rds_line_height: num_rds_pooling_layers += 1 if lh % 2 != 0: raise Exception( "rapid-downsample line height must eenly diide input line height by a power of 2" ) lh /= 2 if lh != self.rds_line_height: raise Exception( "rapid-downsample line height must eenly diide input line height by a power of 2" ) self.rapid_ds = nn.Sequential() last_num_filters = self.num_in_channels for i in range(num_rds_pooling_layers): self.rapid_ds.add_module( "%02d-conv" % i, nn.Conv2d(last_num_filters, 16, kernel_size=3, padding=1)) self.rapid_ds.add_module("%02d-relu" % i, nn.ReLU(inplace=True)) self.rapid_ds.add_module("%02d-pool" % i, nn.MaxPool2d(2, stride=2)) last_num_filters = 16 self.cnn = nn.Sequential( *self.ConvBNReLU(last_num_filters, 64), *self.ConvBNReLU(64, 64), nn.FractionalMaxPool2d(2, output_ratio=(0.5, 0.7)), *self.ConvBNReLU(64, 128), *self.ConvBNReLU(128, 128), nn.FractionalMaxPool2d(2, output_ratio=(0.5, 0.7)), *self.ConvBNReLU(128, 256), *self.ConvBNReLU(256, 256), *self.ConvBNReLU(256, 256)) # We need to calculate cnn output size to construct the bridge layer fake_input_width = 20 cnn_out_h, _ = self.cnn_input_size_to_output_size( (self.input_line_height, fake_input_width)) cnn_out_c = self.cnn_output_num_channels() cnn_feat_size = cnn_out_c * cnn_out_h self.bridge_layer = nn.Sequential( nn.Linear(cnn_feat_size, self.lstm_input_dim), nn.ReLU(inplace=True)) self.lstm = nn.LSTM(self.lstm_input_dim, self.num_lstm_hidden_units, num_layers=self.num_lstm_layers, dropout=self.p_lstm_dropout, bidirectional=True) self.prob_layer = nn.Sequential( nn.Linear(2 * self.num_lstm_hidden_units, len(self.alphabet))) # Finally, let's initialize parameters for param in self.parameters(): torch.nn.init.uniform_(param, -0.08, 0.08) total_params = 0 for param in self.parameters(): local_params = 1 for d in param.size(): local_params *= d total_params += local_params cnn_params = 0 for param in self.cnn.parameters(): local_params = 1 for d in param.size(): local_params *= d cnn_params += local_params lstm_params = 0 for param in self.lstm.parameters(): local_params = 1 for d in param.size(): local_params *= d lstm_params += local_params if self.verbose: logger.info("Total Model Params = %d" % total_params) logger.info("\tCNN Params = %d" % cnn_params) logger.info("\tLSTM Params = %d" % lstm_params) logger.info("Model looks like:") logger.info(repr(self)) if torch.cuda.is_available() and self.gpu: self.rapid_ds = self.rapid_ds.cuda() self.cnn = self.cnn.cuda() self.bridge_layer = self.bridge_layer.cuda() self.lstm = self.lstm.cuda() self.prob_layer = self.prob_layer.cuda() if self.multigpu: self.cnn = torch.nn.DataParallel(self.cnn) else: logger.info("Warning: Runnig model on CPU")
def __append_layer(self, net_style, args_dict): args_values_list = list(args_dict.values()) if net_style == "Conv2d": self.layers.append( nn.Conv2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "MaxPool2d": self.layers.append( nn.MaxPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "Linear": self.layers.append( nn.Linear(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "reshape": # 如果是特殊情况 reshape,就直接将目标向量尺寸传入 # print(type(args_values_list[0])) self.layers.append(args_values_list[0]) elif net_style == "Conv1d": self.layers.append( nn.Conv1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "Conv3d": self.layers.append( nn.Conv3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "ConvTranspose1d": self.layers.append( nn.ConvTranspose1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "ConvTranspose2d": self.layers.append( nn.ConvTranspose2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "ConvTranspose3d": self.layers.append( nn.ConvTranspose3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "Unfold": self.layers.append( nn.Unfold(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "Fold": self.layers.append( nn.Unfold(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "MaxPool1d": self.layers.append( nn.MaxPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "MaxPool3d": self.layers.append( nn.MaxPool3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "MaxUnpool1d": self.layers.append( nn.MaxUnpool1d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "MaxUnpool2d": self.layers.append( nn.MaxUnpool2d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "MaxUnpool3d": self.layers.append( nn.MaxUnpool3d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "AvgPool1d": self.layers.append( nn.AvgPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "AvgPool2d": self.layers.append( nn.AvgPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "AvgPool3d": self.layers.append( nn.AvgPool3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "FractionalMaxPool2d": self.layers.append( nn.FractionalMaxPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LPPool1d": self.layers.append( nn.LPPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "LPPool2d": self.layers.append( nn.LPPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "AdaptiveMaxPool1d": self.layers.append( nn.AdaptiveMaxPool1d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveMaxPool2d": self.layers.append( nn.AdaptiveMaxPool2d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveMaxPool3d": self.layers.append( nn.AdaptiveMaxPool3d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveAvgPool1d": self.layers.append(nn.AdaptiveAvgPool1d(args_values_list[0])) elif net_style == "AdaptiveAvgPool2d": self.layers.append(nn.AdaptiveAvgPool2d(args_values_list[0])) elif net_style == "AdaptiveAvgPool3d": self.layers.append(nn.AdaptiveAvgPool3d(args_values_list[0])) elif net_style == "ReflectionPad1d": self.layers.append(nn.ReflectionPad1d(args_values_list[0])) elif net_style == "ReflectionPad2d": self.layers.append(nn.ReflectionPad2d(args_values_list[0])) elif net_style == "ReplicationPad1d": self.layers.append(nn.ReplicationPad1d(args_values_list[0])) elif net_style == "ReplicationPad2d": self.layers.append(nn.ReplicationPad2d(args_values_list[0])) elif net_style == "ReplicationPad3d": self.layers.append(nn.ReplicationPad3d(args_values_list[0])) elif net_style == "ZeroPad2d": self.layers.append(nn.ZeroPad2d(args_values_list[0])) elif net_style == "ConstantPad1d": self.layers.append( nn.ConstantPad1d(args_values_list[0], args_values_list[1])) elif net_style == "ConstantPad2d": self.layers.append( nn.ConstantPad2d(args_values_list[0], args_values_list[1])) elif net_style == "ConstantPad3d": self.layers.append( nn.ConstantPad3d(args_values_list[0], args_values_list[1])) elif net_style == "ELU": self.layers.append(nn.ELU(args_values_list[0], args_values_list[1])) elif net_style == "Hardshrink": self.layers.append(nn.Hardshrink(args_values_list[0])) elif net_style == "Hardtanh": self.layers.append( nn.Hardtanh(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LeakyReLU": self.layers.append( nn.LeakyReLU(args_values_list[0], args_values_list[1])) elif net_style == "LogSigmoid": self.layers.append(nn.LogSigmoid()) elif net_style == "PReLU": self.layers.append( nn.PReLU(args_values_list[0], args_values_list[1])) elif net_style == "ReLU": self.layers.append(nn.ReLU(args_values_list[0])) elif net_style == "ReLU6": self.layers.append(nn.ReLU6(args_values_list[0])) elif net_style == "RReLU": self.layers.append( nn.RReLU(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "SELU": self.layers.append(nn.SELU(args_values_list[0])) elif net_style == "CELU": self.layers.append( nn.CELU(args_values_list[0], args_values_list[1])) elif net_style == "Sigmoid": self.layers.append(nn.Sigmoid()) elif net_style == "Softplus": self.layers.append( nn.Softplus(args_values_list[0], args_values_list[1])) elif net_style == "Softshrink": self.layers.append(nn.Softshrink(args_values_list[0])) elif net_style == "Softsign": self.layers.append(nn.Softsign()) elif net_style == "Tanh": self.layers.append(nn.Tanh()) elif net_style == "Tanhshrink": self.layers.append(nn.Tanhshrink()) elif net_style == "Threshold": self.layers.append( nn.Threshold(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "Softmin": self.layers.append(nn.Softmin(args_values_list[0])) elif net_style == "Softmax": self.layers.append(nn.Softmax(args_values_list[0])) elif net_style == "Softmax2d": self.layers.append(nn.Softmax2d()) elif net_style == "LogSoftmax": self.layers.append(nn.LogSoftmax(args_values_list[0])) elif net_style == "AdaptiveLogSoftmaxWithLoss": self.layers.append( nn.AdaptiveLogSoftmaxWithLoss(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm1d": self.layers.append( nn.BatchNorm1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm2d": self.layers.append( nn.BatchNorm2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm3d": self.layers.append( nn.BatchNorm3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "GroupNorm": self.layers.append( nn.GroupNorm(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "InstanceNorm1d": self.layers.append( nn.InstanceNorm1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "InstanceNorm2d": self.layers.append( nn.InstanceNorm2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "InstanceNorm3d": self.layers.append( nn.InstanceNorm3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LayerNorm": self.layers.append( nn.LayerNorm(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "LocalResponseNorm": self.layers.append( nn.LocalResponseNorm(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "Linear": self.layers.append( nn.Linear(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "Dropout": self.layers.append( nn.Dropout(args_values_list[0], args_values_list[1])) elif net_style == "Dropout2d": self.layers.append( nn.Dropout2d(args_values_list[0], args_values_list[1])) elif net_style == "Dropout3d": self.layers.append( nn.Dropout3d(args_values_list[0], args_values_list[1])) elif net_style == "AlphaDropout": self.layers.append( nn.AlphaDropout(args_values_list[0], args_values_list[1]))
class decom_vgg16_2stream(nn.Module): def __init__(self): # n_class includes the background super(decom_vgg16_2stream, self).__init__() <<<<<<< HEAD self.extractor = decom_vgg16() self.extractor2 = decom_vgg16_depth() self.NIN = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) def forward(self, x,x2): x1_cnn1 = self.extractor(x) x2_cnn2 = self.extractor2(x2) x_concat=t.cat((x1_cnn1,x2_cnn2),1) feature=self.NIN(x_concat) return feature ======= self.downpooling = nn.FractionalMaxPool2d(1,output_size=(256,320)) self.upsample=nn.Upsample(size=[600,791], mode='bilinear') self.depth_extimater = model if opt.load_depth_extimater: self.depth_extimater.load_state_dict(t.load(opt.load_depth_extimater)['state_dict']) print(' ==> load depth_extimater successfully') self.extractor = decom_vgg16() self.extractor2 = decom_vgg16_depth() self.NIN = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) #self.extractor_others = features_net_others def forward(self, x): x_small = self.downpooling(x) mydepth = self.depth_extimater(x_small) mydepth = self.upsample(mydepth) x1_cnn1 = self.extractor(x)
def __init__(self, *args, **kwargs): super(CnnOcrModel, self).__init__() if len(args) > 0: raise Exception("Only keyword arguments allowed in CnnOcrModel") self.hyper_params = kwargs.copy() self.input_line_height = kwargs['input_line_height'] self.alphabet = kwargs['alphabet'] self.lstm_input_dim = kwargs['lstm_input_dim'] self.num_lstm_layers = kwargs['num_lstm_layers'] self.num_lstm_hidden_units = kwargs['num_lstm_hidden_units'] self.p_lstm_dropout = kwargs['p_lstm_dropout'] self.num_in_channels = kwargs.get('num_in_channels', 1) self.gpu = kwargs.get('gpu', True) self.multigpu = kwargs.get('multigpu', True) self.verbose = kwargs.get('verbose', True) self.LSTMList = None self.lattice_decoder = None self.rapid_ds = nn.Sequential( nn.Conv2d(self.num_in_channels, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2), nn.Conv2d(16, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2), nn.Conv2d(16, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2), ) self.cnn = nn.Sequential( *self.ConvBNReLU(16, 64), *self.ConvBNReLU(64, 64), nn.FractionalMaxPool2d(2, output_ratio=(0.5, 0.7)), *self.ConvBNReLU(64, 128), *self.ConvBNReLU(128, 128), nn.FractionalMaxPool2d(2, output_ratio=(0.5, 0.7)), *self.ConvBNReLU(128, 256), *self.ConvBNReLU(256, 256), *self.ConvBNReLU(256, 256)) # We need to calculate cnn output size to construct the bridge layer fake_input_width = 20 cnn_out_h, _ = self.cnn_input_size_to_output_size( (self.input_line_height, fake_input_width)) cnn_out_c = self.cnn_output_num_channels() cnn_feat_size = cnn_out_c * cnn_out_h self.bridge_layer = nn.Sequential( nn.Linear(cnn_feat_size, self.lstm_input_dim), nn.ReLU(inplace=True)) self.lstm = nn.LSTM(self.lstm_input_dim, self.num_lstm_hidden_units, num_layers=self.num_lstm_layers, dropout=self.p_lstm_dropout, bidirectional=True) self.prob_layer = nn.Sequential( nn.Linear(2 * self.num_lstm_hidden_units, len(self.alphabet))) # Finally, let's initialize parameters for param in self.parameters(): torch.nn.init.uniform(param, -0.08, 0.08) total_params = 0 for param in self.parameters(): local_params = 1 for d in param.size(): local_params *= d total_params += local_params cnn_params = 0 for param in self.cnn.parameters(): local_params = 1 for d in param.size(): local_params *= d cnn_params += local_params lstm_params = 0 for param in self.lstm.parameters(): local_params = 1 for d in param.size(): local_params *= d lstm_params += local_params if self.verbose: logger.info("Total Model Params = %d" % total_params) logger.info("\tCNN Params = %d" % cnn_params) logger.info("\tLSTM Params = %d" % lstm_params) logger.info("Model looks like:") logger.info(repr(self)) if torch.cuda.is_available() and self.gpu: self.rapid_ds = self.rapid_ds.cuda() self.cnn = self.cnn.cuda() self.bridge_layer = self.bridge_layer.cuda() self.lstm = self.lstm.cuda() self.prob_layer = self.prob_layer.cuda() if self.multigpu: self.cnn = torch.nn.DataParallel(self.cnn) else: logger.info("Warning: Runnig model on CPU")