def __init__(self, output_stride=16, num_classes=21): super(DeepLab, self).__init__() BatchNorm = nn.BatchNorm2d self.ResNet = ResNet18() self.aspp = build_aspp(output_stride, BatchNorm) self.decoder = build_decoder(num_classes, BatchNorm)
def __init__(self, backbone='resnet', output_stride=16, num_classes=4, sync_bn=False, freeze_bn=False): super(DeepLab, self).__init__() if backbone == 'drn': output_stride = 8 if sync_bn == True: BatchNorm = SynchronizedBatchNorm2d else: BatchNorm = nn.BatchNorm2d self.backbone = build_backbone(backbone, output_stride, BatchNorm) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm) self.freeze_bn = freeze_bn
def main(): global args img_model = resnet.resnet50(args.image_model_path).cuda().eval() pla_model = resnet.resnet50(args.place_model_path).cuda().eval() decoder_model = decoder.build_decoder(None, args.size, args.num_feat, args.num_feat).cuda() optimizer = torch.optim.SGD(decoder_model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Data loading code train_loader = torch.utils.data.DataLoader( SaliconLoader.ImageList(args.data_folder, transforms.Compose([ transforms.ToTensor(), ]), train=True, ), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) if args.mse: args.output_folder = args.output_folder + "_mse" criterion = nn.MSELoss().cuda() else: args.lr *= 0.1 args.output_folder = args.output_folder + "_eml" criterion = EMLLoss.Loss().cuda() args.output_folder = pl.Path(args.output_folder) if not args.output_folder.is_dir(): args.output_folder.mkdir() for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train(train_loader, img_model, pla_model, decoder_model, criterion, optimizer, epoch) state = { 'state_dict' : decoder_model.state_dict(), } save_path = args.output_folder / ("model.pth.tar") save_model(state, save_path)
def __init__(self, backbone='resnet', output_stride=16, num_classes=21, freeze_bn=False): super(DeepLab, self).__init__() if backbone == 'drn': output_stride = 8 BatchNorm = nn.BatchNorm2d self.backbone = resnet_deeplab.ResNet101(BatchNorm=BatchNorm, pretrained=False, output_stride=8) self.aspp = build_aspp(backbone, output_stride, BatchNorm) self.decoder = build_decoder(num_classes, backbone, BatchNorm) self.freeze_bn = freeze_bn
def __init__(self, config): super(Transducer, self).__init__() # define encoder self.config = config self.encoder = build_encoder(config) # define decoder self.decoder = build_decoder(config) # define JointNet self.joint = JointNet( input_size=config.joint.input_size, inner_dim=config.joint.inner_size, vocab_size=config.vocab_size ) if config.share_embedding: assert self.decoder.embedding.weight.size() == self.joint.project_layer.weight.size(), '%d != %d' % (self.decoder.embedding.weight.size(1), self.joint.project_layer.weight.size(1)) self.joint.project_layer.weight = self.decoder.embedding.weight self.crit = RNNTLoss(blank=28)
def main(): global args preprocess = transforms.Compose([ transforms.Resize(args.size), transforms.ToTensor(), ]) img_model = resnet.resnet50(args.image_model_path).cuda().eval() pla_model = resnet.resnet50(args.place_model_path).cuda().eval() decoder_model = decoder.build_decoder(args.decoder_model_path, args.size, args.num_feat, args.num_feat).cuda().eval() pil_img = Image.open(args.img_path).convert('RGB') processed = preprocess(pil_img).unsqueeze(0).cuda() with torch.no_grad(): img_feat = img_model(processed, decode=True) pla_feat = pla_model(processed, decode=True) pred = decoder_model([img_feat, pla_feat]) fig, ax = plt.subplots(1, 2) pred = pred.squeeze().detach().cpu().numpy() pred = post_process(pred) pred_path = args.img_path.stem + "_smap.png" print("Saving prediction", pred_path) sio.imsave(pred_path, pred) processed = processed.squeeze().permute(1, 2, 0).cpu() ax[0].imshow(processed) ax[0].set_title("Input Image") ax[1].imshow(pred) ax[1].set_title("Prediction") plt.show()
def __init__(self, pretrained_model_name_or_path, num_layers, dim_model, num_heads, dim_ff, dropout, max_utter_num_length, utter_type, max_decode_output_length, vocab_size, embeddings=None): super().__init__() self.token_encoder = BertModel.from_pretrained( pretrained_model_name_or_path) self.target_embeddings = nn.Sequential( self.token_encoder.embeddings.word_embeddings, PositionalEncoding(dropout, dim_model, max_decode_output_length)) self.utterance_encoder = TransformerEncoder(num_layers, dim_model, num_heads, dim_ff, dropout, max_utter_num_length, utter_type, embeddings) self.decoder = build_decoder(num_layer=num_layers, heads=num_heads, d_model=dim_model, d_ff=dim_ff, drop_rate=dropout) # self.token_weight_1 = nn.Linear(Config.dim_model, 1, bias=False) # self.token_weight_2 = nn.Linear(Config.dim_model, Config.dim_model, bias=True) for param in self.utterance_encoder.parameters(): if param.dim() > 1: nn.init.xavier_uniform_(param) for param in self.decoder.parameters(): if param.dim() > 1: nn.init.xavier_uniform_(param)
def __init__(self, block, layers, num_classes): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64, affine=affine_par) for i in self.bn1.parameters(): i.requires_grad = False self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self.Resblock(block, 64, layers[0], stride=1) self.layer2 = self.Resblock(block, 128, layers[1], stride=2) self.layer3 = self.Resblock(block, 256, layers[2], stride=1, dilation=2) self.channel_compress = nn.Sequential( nn.Conv2d(in_channels=1024 + 512, out_channels=256, kernel_size=3, stride=1, padding=2, dilation=2, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5)) self.IEM = build_IEM(256) self.layer5 = nn.Sequential( nn.Conv2d(in_channels=256 + 256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5)) self.skip1 = nn.Sequential( nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True)) self.skip2 = nn.Sequential( nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True)) self.skip3 = nn.Sequential( nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True)) self.dilation_conv_0 = nn.Sequential( nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5), ) self.dilation_conv_1 = nn.Sequential( nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5), ) self.dilation_conv_6 = nn.Sequential( nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=6, dilation=6, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5)) self.dilation_conv_12 = nn.Sequential( nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=12, dilation=12, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5)) self.dilation_conv_18 = nn.Sequential( nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=18, dilation=18, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5)) self.layer_out1 = nn.Sequential( nn.Conv2d(1280, 256, kernel_size=1, stride=1, padding=0, bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5), ) self.layer_out2 = nn.Conv2d(256, num_classes, kernel_size=1, stride=1, bias=True) self.decoder = build_decoder(num_classes, 256, nn.BatchNorm2d) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, 0.01) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
from encoder import build_encoder from decoder import build_decoder (X_train, _), (X_test, _) = mnist.load_data() image_size = X_train.shape[1] X_train = np.reshape(X_train, [-1, image_size, image_size, 1]) X_test = np.reshape(X_test, [-1, image_size, image_size, 1]) X_train = X_train.astype('float32') / 255 X_test = X_test.astype('float32') / 255 latent_dim = 16 batch_size = 128 kernel_size = 3 layer_filters = [32, 64] inputs, encoder, shape = build_encoder(image_size, latent_dim, layer_filters, kernel_size) decoder = build_decoder(shape, latent_dim, layer_filters, kernel_size) autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') autoencoder.summary() autoencoder.compile(loss='mse', optimizer='adam') autoencoder.fit(X_train, X_train, validation_data=(X_test, X_test), epochs=30, batch_size=batch_size)