def init_testloader(self, **kwargs): super(SarRecognitionTemplate, self).init_testloader(**kwargs) test_transforms = get('test_transforms', kwargs, None) if test_transforms is None: test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) kwargs['transforms'] = test_transforms test_index = get_valid('test_index', kwargs) test_folder = get_valid('test_folder', kwargs) test_dataset = SarDataset(index_path=test_index, folder=test_folder, **kwargs) test_batch = get('test_batch', kwargs, 64) test_worker = get('test_worker', kwargs, 8) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=self.get_batch_size(test_batch), num_workers=test_worker) test_data_num = len(test_dataset) print('Analyse test index: {}, test data num: {}!'.format( test_index, test_data_num)) return test_loader
def retrieval(self, **kwargs): target_folder = get_valid('target_folder', kwargs) lsh = get_valid('lsh', kwargs) query_num = get('query_num', kwargs, 1) query_num = int(query_num) images = ImageProcess(target_folder).process() print('extract target feature') vecs, img_paths = extract_vectors(self.model, images, 1024, self.transform, ms=self.ms) target_feature = dict( zip(img_paths, list(vecs.detach().cpu().numpy().T))) for q_path, q_vec in target_feature.items(): try: response = lsh.query(q_vec.flatten(), num_results=query_num, distance_func="cosine") print('target img: {}'.format(q_path)) for idx in range(query_num): query_img_path = response[idx][0][1] print('{}th query img: {}'.format(idx, query_img_path)) print('*' * 20) except: print('error occur on: {}'.format(q_path)) continue
def init_trainloader(self, **kwargs): super(SarRecognitionTemplate, self).init_trainloader(**kwargs) train_transforms = get('train_transforms', kwargs, None) if train_transforms is None: distort_num = get('distort_num', kwargs, 10) distort_ratio = get('distort_ratio', kwargs, 0.1) train_transforms = transforms.Compose([ RandomDistort(distort_num, distort_ratio), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) kwargs['transforms'] = train_transforms train_index = get_valid('train_index', kwargs) train_folder = get_valid('train_folder', kwargs) train_dataset = SarDataset(index_path=train_index, folder=train_folder, **kwargs) train_batch = get('train_batch', kwargs, 64) train_worker = get('train_worker', kwargs, 8) drop_last = get('drop_last', kwargs, True) shuffle = get('shuffle', kwargs, True) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.get_batch_size(train_batch), num_workers=train_worker, drop_last=drop_last, shuffle=shuffle) train_data_num = len(train_dataset) print('Analyse train index: {}, train data num: {}!'.format( train_index, train_data_num)) return train_loader
def init_testloader(self, **kwargs): test_transforms = get('test_transforms', kwargs, None) if test_transforms is None: test_transforms = transforms.Compose([ # Denoise(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) kwargs['transforms'] = test_transforms test_folder = get_valid('test_folder', kwargs) test_batch = get('test_batch', kwargs, 64) test_worker = get('test_worker', kwargs, 8) class_dict = get_valid('class_dict', kwargs) test_loader_dict = dict() for cls in class_dict: cls_folder = os.path.join(test_folder, cls) cls_index_path = os.path.join(cls_folder, 'label.txt') test_dataset = SarDataset(index_path=cls_index_path, folder=cls_folder, **kwargs) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=self.get_batch_size(test_batch), num_workers=test_worker) cls_int = class_dict[cls] test_loader_dict[cls_int] = test_loader print('test {} data num: {}!'.format(cls, len(test_dataset))) return test_loader_dict
def save_model(self, **kwargs): model = get_valid('model', kwargs) ckpt_folder = get_valid('ckpt_folder', kwargs) epoch = get_valid('epoch', kwargs) loss = get_valid('loss', kwargs) eval_type = get('eval_type', kwargs, 'acc') acc = get('acc', kwargs) acc = round(acc, 4) model_name = get('model_name', kwargs, '') state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'eval_type': eval_type, 'acc': acc, 'loss': loss } loss = round(loss, 4) ckpt_name = '{}epoch{}_{}{}_loss{}.pth'.format(model_name, epoch, eval_type, acc, loss) ckpt_path = os.path.join(ckpt_folder, ckpt_name) torch.save(state, ckpt_path) print('Save checkpoint: {}'.format(ckpt_path)) return ckpt_path
def save_model(self, **kwargs): model = get_valid('model', kwargs) ckpt_folder = get_valid('ckpt_folder', kwargs) epoch = get_valid('epoch', kwargs) loss = get_valid('loss', kwargs) eval_type = get('eval_type', kwargs, 'score') eval_score = get('eval_score', kwargs, 0) model_name = get('model_name', kwargs, '') state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'eval_type': eval_type, 'eval_score': eval_score, 'loss': loss } if eval_type is None: eval_type = '' if eval_score is None: eval_score = '' else: eval_score = round(eval_score, 4) eval_score = str(eval_score) loss = round(loss, 4) loss = str(loss) ckpt_name = '{}epoch{}{}{}loss{}.pth'.format(model_name, epoch, eval_type, eval_score, loss) ckpt_path = os.path.join(ckpt_folder, ckpt_name) torch.save(state, ckpt_path) print('Save checkpoint: {}'.format(ckpt_path)) return ckpt_path
def forward(self, **kwargs): input = get_valid('input', kwargs) hidden = get_valid('hidden', kwargs) feature = get_valid('feature', kwargs) mask = get_valid('mask', kwargs) input = input.long() input = self.embedding(input) input = self.dropout(input) input = input.unsqueeze(0) _, hidden = self.rnn(input, hidden) hidden_tmp = hidden[1:].permute(1, 2, 0).unsqueeze(3) hidden_tmp = self.conv1(hidden_tmp) hidden_tmp = hidden_tmp.expand_as(feature) encode_conv = self.conv2(feature) encode_conv = self.conv3(torch.tanh(encode_conv + hidden_tmp)).view( encode_conv.shape[0], 1, -1) mask = mask.view(mask.shape[0], mask.shape[1], -1) w = self.mask_softmax(encode_conv, dim=2, mask=mask) # feature = self.feature_conv(feature) # feature = self.bn(feature) # feature = self.relu(feature) feature = feature.view(feature.shape[0], feature.shape[1], -1) c = torch.sum(feature * w, 2) ouput = torch.cat([hidden[1], c], 1) ouput = self.out(ouput) return ouput, hidden
def train_model(self, **kwargs): model = get_valid('model', kwargs) model.train() optimizer = get_valid('optimizer', kwargs) epoch = get_valid('epoch', kwargs) criterion = get_valid('criterion', kwargs) train_loader = get_valid('train_loader', kwargs) total_loss = 0 iter_num = len(train_loader) log_step = iter_num // 100 log_step = max(1, log_step) for idx, data in enumerate(train_loader): img, label = data[0], data[1] if self.gpu is not None: img = img.cuda() label = label.cuda() model = model.cuda() optimizer.zero_grad() pred = model(image=img, type='classify') loss = criterion['cls'](pred, label) loss.backward() optimizer.step() if idx % log_step == 0 and idx != 0: finish_percent = int(idx / iter_num * 100) print('Train: finish {}%, loss: {}'.format( finish_percent, loss.data.item())) total_loss += loss.data.item() avg_loss = total_loss / iter_num log = generate_log(epoch=epoch, name='Train', avg_loss=avg_loss) return log, avg_loss
def forward(self, **kwargs): super(SarRecognitionModel, self).forward(**kwargs) image = get_valid('image', kwargs) target_variable = get_valid('target', kwargs) mask = get_valid('mask', kwargs) teacher_forcing_ratio = get('teacher_forcing_ratio', kwargs, 1) hidden, feature = self.encoder(image=image) hidden = hidden.permute(2, 0, 1) decoder_input = target_variable[:, 0] output_list = list() for di in range(1, target_variable.shape[1]): output, hidden = self.decoder(input=decoder_input, hidden=hidden, feature=feature, mask=mask) output_list.append(output.unsqueeze(1)) teacher_forcing = random.random() < teacher_forcing_ratio if teacher_forcing: decoder_input = target_variable[:, di] else: _, topi = output.data.topk(1) decoder_input = topi.squeeze(1) output_list = torch.cat(output_list, 1) return output_list
def __init__(self, **kwargs): self.folder = get_valid('folder', kwargs) self.is_training = get_valid('is_training', kwargs) self.class_dict = get_valid('class_dict', kwargs) self.transforms = get_valid('transforms', kwargs) self.split = get('split', kwargs, ',') self.index_list = self.__generate_index()
def __init__(self, **kwargs): self.index_path = get_valid('index_path', kwargs) self.folder = get_valid('folder', kwargs) self.transforms = get('transforms', kwargs, None) self.encoding = get('encoding', kwargs, 'UTF-8-sig') self.size = get('size', kwargs, (32, 256, 3)) self.label_split = get('label_split', kwargs, ',') self.data_list = self.get_data_list() self.max_len = get('max_len', kwargs, 64) self.mask_ratio = get('mask_ratio', kwargs, (8, 4))
def test_model(self, **kwargs): model = get_valid('model', kwargs) model.eval() epoch = get_valid('epoch', kwargs) criterion = get_valid('criterion', kwargs) test_loader = get_valid('test_loader', kwargs) total_loss = 0 iter_num = len(test_loader) log_step = iter_num // 100 log_step = max(1, log_step) total_correct = 0 total_pred = 0 with torch.no_grad(): for idx, (image, label, mask) in enumerate(test_loader): target = self.encode(label, False) target = torch.from_numpy(target) target = target.long() target_cp = self.encode(label, True) target_cp = torch.from_numpy(target_cp) target_cp = target_cp.long() if self.use_gpu: image = image.cuda() target = target.cuda() mask = mask.cuda() target_cp = target_cp.cuda() output = model(image=image, target=target, mask=mask) pred_label = self.decode(output) acc, correct, pred_num = self.compute_acc(pred_label, label) total_correct += correct total_pred += pred_num output = output.contiguous().view(-1, self.class_num) target_cp = target_cp[:, 1:].contiguous().view(-1) loss = criterion(output, target_cp, ignore_index=-1) total_loss += loss.data.item() if idx % log_step == 0 and idx != 0: finish_percent = int(idx / iter_num * 100) print('Test: finish {}%, loss: {}'.format( finish_percent, loss.data.item())) acc = total_correct / total_pred avg_loss = total_loss / iter_num log = generate_log(epoch=epoch, name='Test', avg_loss=avg_loss) return log, avg_loss, acc
def init_trainloader(self, **kwargs): train_transforms = get('train_transforms', kwargs, None) if train_transforms is None: distort_num = get('distort_num', kwargs, 10) distort_ratio = get('distort_ratio', kwargs, 0.1) train_transforms = transforms.Compose([ RandomDistort(distort_num, distort_ratio), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) kwargs['transforms'] = train_transforms train_folder = get_valid('train_folder', kwargs) class_dict = get_valid('class_dict', kwargs) train_batch = get('train_batch', kwargs, 64) train_worker = get('train_worker', kwargs, 8) drop_last = get('drop_last', kwargs, True) shuffle = get('shuffle', kwargs, True) train_loader_dict = dict() max_data_num = 0 for cls in class_dict: cls_folder = os.path.join(train_folder, cls) cls_index_path = os.path.join(cls_folder, 'label.txt') train_dataset = SarDataset(index_path=cls_index_path, folder=cls_folder, **kwargs) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.get_batch_size(train_batch), num_workers=train_worker, drop_last=drop_last, shuffle=shuffle) cls_int = class_dict[cls] train_loader_dict[cls_int] = dict() train_loader_dict[cls_int]['data_loader'] = train_loader data_num = len(train_dataset) train_loader_dict[cls_int]['data_num'] = data_num if data_num > max_data_num: max_data_num = data_num print('train {} data num: {}!'.format(cls, data_num)) for cls in class_dict: cls_int = class_dict[cls] data_num = train_loader_dict[cls_int]['data_num'] train_loader_dict[cls_int]['repeat_num'] = int(max_data_num / data_num) return train_loader_dict
def eval_model(self, **kwargs): model = get_valid('model', kwargs) model.eval() eval_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_folder = get_valid('test_folder', kwargs) pred_folder = get_valid('pred_folder', kwargs) scale = get('scale', kwargs, 4) img_list = get_img_list(test_folder) for img in img_list: img_path = os.path.join(test_folder, img) image = cv2.imread(img_path) image = image.astype(np.float32) image = eval_transforms(image) image = image.unsqueeze(0) if self.gpu is not None: image = image.cuda() with torch.no_grad(): output = model(image=image) preds, boxes_list = pse_decode(output[0], scale) pred_list = list() if len(boxes_list)>0: for box in boxes_list: box = sort_poly(box.astype(np.int32)) pred = '{},{},{},{},{},{},{},{}'.format(box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1]) pred_list.append(pred) txt_name = img.split('.')[0] + '.txt' txt_path = os.path.join(pred_folder, txt_name) list2txt(boxes_list, txt_path) iou_threshold = get('iou_threshold', kwargs, 0.5) score, total_label_num, total_pred_num, total_correct_num = f1score(test_folder, pred_folder, iou_threshold) if total_label_num == 0 or total_correct_num == 0 or total_pred_num == 0: precision = 0 recall = 0 else: precision = total_correct_num / total_label_num recall = total_correct_num / total_pred_num print('f1score: {}, precision={}/{}={}, recall={}/{}={}, iou threshold: {};'.format(score, total_correct_num, total_label_num, precision, total_correct_num, total_pred_num, recall, iou_threshold)) return score
def train_model(self, **kwargs): super(EastDetectionTemplate, self).train_model(**kwargs) model = get_valid('model', kwargs) model = model.train() optimizer = get_valid('optimizer', kwargs) epoch = get_valid('epoch', kwargs) criterion = get_valid('criterion', kwargs) train_loader = get_valid('train_loader', kwargs) total_loss = 0 aabb_loss = 0 theta_loss = 0 cls_loss = 0 iter_num = len(train_loader) if iter_num == 0: raise RuntimeError('training data num < batch num!') for data in train_loader: img = data['img'] score_map = data['score_map'] geo_map = data['geo_map'] training_mask = data['training_mask'] if self.gpu is not None: img = img.cuda() score_map = score_map.cuda() geo_map = geo_map.cuda() training_mask = training_mask.cuda() f_score, f_geometry = model(image=img) geo_map = geo_map.permute(0, 3, 1, 2).contiguous() loss, l_aabb, l_theta, l_cls = criterion(score_map, f_score, geo_map, f_geometry, training_mask) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.data.item() aabb_loss += l_aabb.data.item() theta_loss += l_theta.data.item() cls_loss += l_cls.data.item() avg_loss = total_loss / iter_num avg_aabb_loss = aabb_loss / iter_num avg_theta_loss = theta_loss / iter_num avg_cls_loss = cls_loss / iter_num log = generate_log(epoch=epoch, name='Train', avg_loss=avg_loss, avg_aabb_loss=avg_aabb_loss, avg_theta_loss=avg_theta_loss, avg_cls_loss=avg_cls_loss) return log, avg_loss
def __init__(self, **kwargs): super(AbstractEncoderDecoderModel, self).__init__() encoder = get_valid('encoder', kwargs) decoder = get_valid('decoder', kwargs) model_name = get_valid('model_name', kwargs) encoder_factory = EncoderModelFactory() kwargs['model_name'] = encoder self.encoder = encoder_factory.get_model(**kwargs) decoder_factory = DecoderModelFactory() kwargs['model_name'] = decoder self.decoder = decoder_factory.get_model(**kwargs) kwargs['model_name'] = model_name
def load_model(self, **kwargs): model = get_valid('model', kwargs) checkpoint = get_valid('checkpoint', kwargs) assert os.path.exists(checkpoint) if self.use_gpu is False: ckpt = torch.load(checkpoint, map_location='cpu') else: ckpt = torch.load(checkpoint) new_state_dict = OrderedDict() for k, v in ckpt['state_dict'].items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) return model
def test_model(self, **kwargs): super(EastDetectionTemplate, self).test_model(**kwargs) model = get_valid('model', kwargs) model = model.eval() epoch = get_valid('epoch', kwargs) criterion = get_valid('criterion', kwargs) test_loader = get_valid('test_loader', kwargs) total_loss = 0 aabb_loss = 0 theta_loss = 0 cls_loss = 0 iter_num = len(test_loader) with torch.no_grad(): for data in test_loader: img = data['img'] score_map = data['score_map'] geo_map = data['geo_map'] training_mask = data['training_mask'] # img_path = data['img_path'] if self.gpu is not None: img = img.cuda() score_map = score_map.cuda() geo_map = geo_map.cuda() training_mask = training_mask.cuda() f_score, f_geometry = model(image=img) geo_map = geo_map.permute(0, 3, 1, 2).contiguous() loss, l_aabb, l_theta, l_cls = criterion(score_map, f_score, geo_map, f_geometry, training_mask) total_loss += loss.data.item() aabb_loss += l_aabb.data.item() theta_loss += l_theta.data.item() cls_loss += l_cls.data.item() avg_loss = total_loss / iter_num avg_aabb_loss = aabb_loss / iter_num avg_theta_loss = theta_loss / iter_num avg_cls_loss = cls_loss / iter_num log = generate_log(epoch=epoch, name='Test', avg_loss=avg_loss, avg_aabb_loss=avg_aabb_loss, avg_theta_loss=avg_theta_loss, avg_cls_loss=avg_cls_loss) return log, avg_loss
def train_model(self, **kwargs): model = get_valid('model', kwargs) model.train() optimizer = get_valid('optimizer', kwargs) epoch = get_valid('epoch', kwargs) criterion = get_valid('criterion', kwargs) train_loader = get_valid('train_loader', kwargs) total_loss = 0 iter_num = len(train_loader) log_step = iter_num // 100 log_step = max(1, log_step) if iter_num == 0: raise RuntimeError('training data num < batch num!') for idx, (image, label, mask) in enumerate(train_loader): target = self.encode(label, False) target = torch.from_numpy(target) target = target.long() target_cp = self.encode(label, True) target_cp = torch.from_numpy(target_cp) target_cp = target_cp.long() if self.use_gpu: image = image.cuda() mask = mask.cuda() target = target.cuda() target_cp = target_cp.cuda() output = model(image=image, target=target, mask=mask) output = output.contiguous().view(-1, self.class_num) target_cp = target_cp[:, 1:].contiguous().view(-1) loss = criterion(output, target_cp, ignore_index=-1) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.data.item() if idx % log_step == 0 and idx != 0: finish_percent = int(idx / iter_num * 100) print('Train: finish {}%, loss: {}'.format( finish_percent, loss.data.item())) avg_loss = total_loss / iter_num log = generate_log(epoch=epoch, name='Train', avg_loss=avg_loss) return log, avg_loss
def __init__(self, **kwargs): super(PseDetectionModel, self).__init__(**kwargs) n = get('n', kwargs, 6) scale = get('scale', kwargs, 1) channels = get_valid('channels', kwargs) self.scale = scale conv_out = 256 # Top layer self.toplayer = nn.Conv2d(channels[0], conv_out, kernel_size=1, stride=1, padding=0) # Reduce channels # Lateral layers self.latlayer1 = nn.Conv2d(channels[1], conv_out, kernel_size=1, stride=1, padding=0) self.latlayer2 = nn.Conv2d(channels[2], conv_out, kernel_size=1, stride=1, padding=0) self.latlayer3 = nn.Conv2d(channels[3], conv_out, kernel_size=1, stride=1, padding=0) # Smooth layers self.smooth1 = nn.Conv2d(conv_out, conv_out, kernel_size=3, stride=1, padding=1) self.smooth2 = nn.Conv2d(conv_out, conv_out, kernel_size=3, stride=1, padding=1) self.smooth3 = nn.Conv2d(conv_out, conv_out, kernel_size=3, stride=1, padding=1) self.conv = nn.Sequential( nn.Conv2d(conv_out * 4, conv_out, kernel_size=3, padding=1, stride=1), nn.BatchNorm2d(conv_out), nn.ReLU(inplace=True) ) self.out_conv = nn.Conv2d(conv_out, n, kernel_size=1, stride=1)
def init_testloader(self, **kwargs): super(EastDetectionTemplate, self).init_testloader(**kwargs) test_transforms = get('test_transforms', kwargs, None) if test_transforms is None: test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) kwargs['transforms'] = test_transforms test_folder = get_valid('test_folder', kwargs) assert os.path.exists(test_folder) test_dataset = EastDataset(folder=test_folder, **kwargs) test_data_num = len(test_dataset) test_batch = get('test_batch', kwargs, 4) test_worker = get('test_worker', kwargs, 8) if self.gpu is None: batch_size = test_batch else: batch_size = test_batch * len(self.gpu) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=test_worker) print('Generate test data loader, test data folder: {}, train data num: {}'.format(test_folder, test_data_num)) return test_loader
def init_testloader(self, **kwargs): test_transforms = get('test_transforms', kwargs, None) if test_transforms is None: test_transforms = transforms.Compose([ Denoise(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_folder = get_valid('test_folder', kwargs) test_dataset = MultiDecoderClassifyDataset(folder=test_folder, is_training=False, transforms=test_transforms, **kwargs) test_batch = get('test_batch', kwargs, 16) test_worker = get('test_worker', kwargs, 8) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch, num_workers=test_worker) test_data_num = len(test_dataset) self.test_data_num = test_data_num print( 'generate test data loader, test data folder: {}, test data num:{}' .format(test_folder, test_data_num)) return test_loader
def init_trainloader(self, **kwargs): train_transforms = get('train_transforms', kwargs, None) if train_transforms is None: distort_num = get('distort_num', kwargs, 10) distort_ratio = get('distort_ratio', kwargs, 0.1) train_transforms = transforms.Compose([ RandomDistort(distort_num, distort_ratio), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_folder = get_valid('train_folder', kwargs) train_dataset = MultiDecoderClassifyDataset( folder=train_folder, is_training=True, transforms=train_transforms, **kwargs) train_batch = get('train_batch', kwargs, 16) train_worker = get('train_worker', kwargs, 8) drop_last = get('drop_last', kwargs, True) shuffle = get('shuffle', kwargs, True) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch, num_workers=train_worker, drop_last=drop_last, shuffle=shuffle) train_data_num = len(train_dataset) self.train_data_num = train_data_num print( 'Generate train data loader, train data folder: {}, train data num: {}' .format(train_folder, train_data_num)) return train_loader
def __init__(self, **kwargs): super(SarRecognitionTemplate, self).__init__(**kwargs) word_index_path = get_valid('word_index_path', kwargs) self.word2num, self.num2word = self.generate_dict(word_index_path) assert len(self.word2num) == len(self.num2word) self.class_num = len(self.word2num) self.max_len = get('max_len', kwargs, 64)
def forward(self, **kwargs): super(NTSClassifyModel, self).forward(**kwargs) image = get_valid('image', kwargs) feature_list = self.backbone(image) resnet_out = feature_list[2] rpn_feature = feature_list[0] feature = feature_list[1] x_pad = F.pad( image, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0) batch = image.size(0) # we will reshape rpn to shape: batch * nb_anchor rpn_score = self.proposal_net(rpn_feature.detach()) all_cdds = [ np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1) for x in rpn_score.data.cpu().numpy() ] top_n_cdds = [ self.hard_nms(x, topn=self.top_n, iou_thresh=0.25) for x in all_cdds ] top_n_cdds = np.array(top_n_cdds) top_n_index = top_n_cdds[:, :, -1].astype(np.int) if self.use_gpu: top_n_index = torch.from_numpy(top_n_index).cuda() else: top_n_index = torch.from_numpy(top_n_index) top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index.long()) if self.use_gpu: part_imgs = torch.zeros([batch, self.top_n, 3, 224, 224]).cuda() else: part_imgs = torch.zeros([batch, self.top_n, 3, 224, 224]) for i in range(batch): for j in range(self.top_n): [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int) part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear', align_corners=True) part_imgs = part_imgs.view(batch * self.top_n, 3, 224, 224) feature_list2 = self.backbone(part_imgs.detach()) part_features = feature_list2[1] part_feature = part_features.view(batch, self.top_n, -1) part_feature = part_feature[:, :self.cat_num, ...].contiguous() part_feature = part_feature.view(batch, -1) concat_out = torch.cat([part_feature, feature], dim=1) concat_logits = self.concat_net(concat_out) raw_logits = resnet_out part_logits = self.partcls_net(part_features).view( batch, self.top_n, -1) return [ raw_logits, concat_logits, part_logits, top_n_index, top_n_prob ]
def __init__(self, **kwargs): self.folder = get_valid('folder', kwargs) self.transforms = get('transforms', kwargs, None) self.input_size = get('input_size', kwargs, (768, 768)) self.detection_transforms = get('detection_transforms', kwargs, None) self.data_list = self.__get_data_list() self.n = get('n', kwargs, 6) self.m = get('m', kwargs, 0.5)
def __init__(self, **kwargs): super(PredictClassification, self).__init__(**kwargs) self.class_map = get_valid('class_map', kwargs) model = self.init_model(**kwargs) model = self.load_model(model=model, **kwargs) if self.use_gpu: model = model.cuda() model = DataParallel(model) self.model = model.eval()
def __init__(self, **kwargs): super(ProposalNet, self).__init__() self.nts_fc_ratio = get_valid("nts_fc_ratio", kwargs) self.down1 = nn.Conv2d(512 * self.nts_fc_ratio, 128, 3, 1, 1) self.down2 = nn.Conv2d(128, 128, 3, 2, 1) self.down3 = nn.Conv2d(128, 128, 3, 2, 1) self.ReLU = nn.ReLU() self.tidy1 = nn.Conv2d(128, 6, 1, 1, 0) self.tidy2 = nn.Conv2d(128, 6, 1, 1, 0) self.tidy3 = nn.Conv2d(128, 9, 1, 1, 0)
def train_model(self, **kwargs): super(PseDetectionTemplate, self).train_model(**kwargs) model = get_valid('model', kwargs) model.train() optimizer = get_valid('optimizer', kwargs) epoch = get_valid('epoch', kwargs) criterion = get_valid('criterion', kwargs) train_loader = get_valid('train_loader', kwargs) total_loss = 0 total_text_loss = 0 total_kernels_loss = 0 iter_num = len(train_loader) if iter_num == 0: raise RuntimeError('training data num < batch num!') for data in train_loader: img = data['img'] score_maps = data['score_maps'] training_mask = data['training_mask'] if self.gpu is not None: img = img.cuda() score_maps = score_maps.cuda() training_mask = training_mask.cuda() output = model(image=img) loss_text, loss_kernels, loss = criterion(output, score_maps, training_mask) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.data.item() total_text_loss += loss_text.data.item() total_kernels_loss += loss_kernels.data.item() avg_loss = total_loss / iter_num avg_text_loss = total_text_loss / iter_num avg_kernels_loss = total_kernels_loss / iter_num log = generate_log(epoch=epoch, name='Train', avg_loss=avg_loss, avg_text_loss=avg_text_loss, avg_kernels_loss=avg_kernels_loss) return log, avg_loss
def init_optimizer(self, **kwargs): super(ClassifyTemplate, self).init_optimizer(**kwargs) model = get_valid('model', kwargs) lr = get('lr', kwargs, 0.001) weight_decay = get('weight_decay', kwargs, 1e-4) raw_optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay) return raw_optimizer