batch_size = args.bs test_batch_size = args.bs train_loader, _ = cifar_loaders(batch_size) _, test_loader = cifar_loaders(test_batch_size) if args.model == 'CNN': model = CNN() elif args.model == 'CNN_ReLU': model = CNN_ReLU() elif args.model == 'resnet18': model = resnet18(num_classes=10) elif args.model == 'resnet34': model = resnet34(num_classes=10) elif args.model == 'resnet50': model = resnet50(num_classes=10) elif args.model == 'vgg16': model = VGG(vgg_name='vgg16', num_classes=10) elif args.model == 'MLP': model = MLP() else: raise ValueError('Unrecognized training model') if args.optim == 'SGD': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'LBFGS': optimizer = optim.LBFGS(model.parameters(), lr=args.lr)
def test(args): import torch data_loader = IC15TestLoader(root_dir=args.root_dir, long_size=args.long_size) test_loader = torch.utils.data.DataLoader( data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=False, num_classes=1, scale=args.scale, train_mode=False) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=1, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=1, scale=args.scale) for param in model.parameters(): param.requires_grad = False if args.gpus > 0: model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) device = torch.device('cpu') if args.gpus < 0 else None checkpoint = torch.load(args.resume, map_location=device) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() if args.onnx: import torch.onnx.symbolic_opset9 dummy_input = torch.autograd.Variable(torch.randn(1, 3, 640, 640)).cpu() torch.onnx.export(model, dummy_input, 'dbnet.onnx', verbose=False) return 0 total_frame = 0.0 total_time = 0.0 for idx, (org_img, img, scale_val) in enumerate(test_loader): print('progress: %d / %d'%(idx, len(test_loader))) sys.stdout.flush() if args.gpus > 0: img = Variable(img.cuda(), volatile=True) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() if args.gpus > 0: torch.cuda.synchronize() start = time.time() outputs = model(img) probability_map, threshold_map, binarization_map = outputs score = binarization_map[0, 0] text = torch.where(score > 0.9, torch.ones_like(score), torch.zeros_like(score)) text = text.data.cpu().numpy().astype(np.uint8) prob_map = probability_map.cpu().numpy()[0, 0] * 255 thre_map = threshold_map.cpu().numpy()[0, 0] * 255 bin_map = binarization_map.cpu().numpy()[0, 0] * 255 out_path = 'outputs/vis_ic15/' image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] print("im_name:", image_name) # cv2.imwrite(out_path + image_name + '_prob.png', prob_map.astype(np.uint8)) # cv2.imwrite(out_path + image_name + '_thre.png' , thre_map.astype(np.uint8)) cv2.imwrite(out_path + image_name + '_bin.png', bin_map.astype(np.uint8)) scale = (org_img.shape[1] * 1.0 / img.shape[1], org_img.shape[0] * 1.0 / img.shape[0]) print("[shape_info:]", text.shape, img.shape, org_img.shape, scale, scale_val) bboxes = [] scale_val = scale_val.cpu().numpy() nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text.astype(np.uint8), connectivity=4) img_h, img_w = text.shape for k in range(1, nLabels): # size filtering size = stats[k, cv2.CC_STAT_AREA] if size < 100: continue # make segmentation map segmap = np.zeros(text.shape, dtype=np.uint8) segmap[labels==k] = 255 x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP] w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT] # niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2) # sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1 # # boundary check # if sx < 0 : sx = 0 # if sy < 0 : sy = 0 # if ex >= img_w: ex = img_w # if ey >= img_h: ey = img_h # kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter)) # segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel) np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2) rectangle = cv2.minAreaRect(np_contours) box = cv2.boxPoints(rectangle) * 4 box = box / scale_val box = box.astype('int32') bboxes.append(box) # find contours bboxes = np.array(bboxes) num_box = bboxes.shape[0] try: unshrink_bboxes = unshrink(bboxes.reshape((num_box, -1, 2)), rate=1.19) except: continue for i in range(unshrink_bboxes.shape[0]): cv2.drawContours(text_box, [unshrink_bboxes[i]], -1, (0, 255, 255), 2) if args.gpus > 0: torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f'%(total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] write_result_as_txt(image_name, bboxes.reshape((-1, 8)), 'outputs/submit_ic15/') # text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_ic15/')
def main(args): if args.checkpoint == '': args.checkpoint = "checkpoints/ctw1500_%s_bs_%d_ep_%d" % ( args.arch, args.batch_size, args.n_epoch) # # if args.pretrain: # if 'synth' in args.pretrain: # args.checkpoint += "_pretrain_synth" # else: # args.checkpoint += "_pretrain_ic17" print('checkpoint path: %s' % args.checkpoint) #print ('init lr: %.8f'%args.lr) #print ('schedule: ', args.schedule) sys.stdout.flush() if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) kernel_num = 7 min_scale = 0.4 start_epoch = 0 data_loader = CTW1500Loader(is_transform=True, img_size=args.img_size, kernel_num=kernel_num, min_scale=min_scale) #train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size) if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=kernel_num) #resnet18 and 34 didn't inplement pretrained elif args.arch == "resnet18": model = models.resnet18(pretrained=False, num_classes=kernel_num) elif args.arch == "resnet34": model = models.resnet34(pretrained=False, num_classes=kernel_num) elif args.arch == "mobilenetv2": model = models.mobilenetv2(pretrained=True, num_classes=kernel_num) elif args.arch == "mobilenetv3large": model = models.mobilenetv3_large(pretrained=False, num_classes=kernel_num) elif args.arch == "mobilenetv3small": model = models.mobilenetv3_small(pretrained=False, num_classes=kernel_num) optimizer = tf.keras.optimizers.SGD(learning_rate=args.lr, momentum=0.99, decay=5e-4) title = 'CTW1500' if args.pretrain: print('Using pretrained model.') assert os.path.isfile( args.pretrain), 'Error: no checkpoint directory found!' logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) elif args.resume: print('Resuming from checkpoint.') model.load_weights(args.resume) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print('Training from scratch.') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) for epoch in range(start_epoch, args.n_epoch): optimizer = get_new_optimizer(args, optimizer, epoch) print( '\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.n_epoch, optimizer.get_config()['learning_rate'])) train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size) train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train( train_loader, model, dice_loss, optimizer, epoch) model.save_weights('%s%s' % (args.checkpoint, '/model_tf/weights')) logger.append([ optimizer.get_config()['learning_rate'], train_loss, train_te_acc, train_te_iou ]) logger.close()
# if not os.path.exists('./result/%s' % file_name): # os.makedirs('./result/%s' % file_name) if not os.path.exists('./result/%s.txt' % file_name): txt_mode = 'w' else: txt_mode = 'a' with open('./result/%s.txt' % file_name, txt_mode) as acc_file: acc_file.write('\n%s %s\n' % (time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(time.time())), file_name)) train_data_list = pd.read_csv("./data/k_folds/train_{}.csv".format(index), sep=",") val_data_list = pd.read_csv("./data/k_folds/test_{}.csv".format(index), sep=",") train_data = Trainset(train_data_list, transform=train_augs) val_data = Valset(val_data_list, transform=val_augs) train_iter = DataLoader(train_data, batch_size, shuffle=True) test_iter = DataLoader(val_data, batch_size, shuffle=False) if resume: net = models.resnet50(pretrained=False) net.fc = nn.Linear(2048, 4) net.load_state_dict(torch.load('./model/resnet18_5/best.pth')) train(train_iter, test_iter, net, feature_params, loss, device, num_epochs, file_name) else: train(train_iter, test_iter, pretrained_net, feature_params, loss, device, num_epochs, file_name) # train_fine_tuning(pretrained_net, optimizer,100,20,train_data,val_data ) # train_fine_tuning(scratch_net, optimizer)
styleV.resize_(style.size()).copy_(style) # forward sF = vgg(styleV) cF = vgg(contentV) if (opt.layer == 'r41'): feature, transmatrix = matrix(cF[opt.layer], sF[opt.layer]) else: feature, transmatrix = matrix(cF, sF) transfer = dec(feature) PRETRAIN = 'imagenet' # 'real', 'imagenet', 'none' ARCHITECTURE = 'resnet50' if PRETRAIN == 'imagenet': resnet = models.resnet50(pretrained=True) else: resnet = models.__dict__[ARCHITECTURE](pretrained=False) for param in resnet.parameters(): param.requires_grad_(False) resnet.cuda() resnet.eval() def get_features(image, model, layers=None): if ARCHITECTURE == 'resnet50_softmax_lastlayer': if layers is None: layers1 = {'0': 'conv1_0', '1': 'conv1_1', '2': 'conv1_2'} layers2 = { '0': 'conv2_0',
batch_size = 1, shuffle = True, num_workers = 1) test_dataset = ImageFolder('../../cat_dog_dataset/test_set',transform=tr1) test_loader = DataLoader(test_dataset, batch_size = 1, shuffle = False, num_workers = 1) # GLOBAL VARIABLES noe = 25 noc = 2 # MODEL INSTANCE 1 net = m.resnet50() net.fc = nn.Linear(2048,noc) #net=net.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters()) def train(epoch): net.train() train_loss_epoch=0.0 # running_loss=0 for batch in train_loader: inputs, labels = batch inputs, labels = Variable(inputs), Variable(labels)
def __init__(self, args): self.args = args # Create dataloader source_train_loader, source_val_loader, target_loader, nclasses = datasets.form_visda_datasets( config=args) self.source_train_loader = source_train_loader self.source_val_loader = source_val_loader self.target_loader = target_loader self.nclasses = nclasses # Create model if args.model == 'resnet18': self.netF = models.resnet18(pretrained=True) self.nemb = 512 elif args.model == 'resnet34': self.netF = models.resnet34(pretrained=True) self.nemb = 512 elif args.model == 'resnet50': self.netF = models.resnet50(pretrained=True) self.nemb = 2048 elif args.model == 'resnet101': self.netF = models.resnet101(pretrained=True) self.nemb = 2048 elif args.model == 'resnet152': self.netF = models.resnet152(pretrained=True) self.nemb = 2048 else: raise ValueError('Model cannot be recognized.') print(self.netF) self.netC = models.Classifier(self.nemb, self.nclasses, nlayers=1) utils.weights_init(self.netC) print(self.netC) self.netF = torch.nn.DataParallel(self.netF).cuda() self.netC = torch.nn.DataParallel(self.netC).cuda() # Create optimizer self.optimizerF = optim.SGD(self.netF.parameters(), lr=self.args.lr, momentum=args.momentum, weight_decay=0.0005) self.optimizerC = optim.SGD(self.netC.parameters(), lr=self.args.lrC, momentum=args.momentum, weight_decay=0.0005) self.lr_scheduler_F = optim.lr_scheduler.StepLR(self.optimizerF, step_size=7000, gamma=0.1) self.lr_scheduler_C = optim.lr_scheduler.StepLR(self.optimizerC, step_size=7000, gamma=0.1) # restoring checkpoint print('Restoring checkpoint ...') try: ckpt_data = torch.load( os.path.join(args.save_path, 'checkpoint.pth')) self.start_iter = ckpt_data['iter'] self.netF.load_state_dict(ckpt_data['F_dict']) self.netC.load_state_dict(ckpt_data['C_dict']) except: # If loading failed, begin from scratch print('Checkpoint not found. Training from scratch ...') self.start_iter = 0 # Other vars self.criterion = nn.CrossEntropyLoss().cuda() self.vat_pert_gen = VATPerturbationGenerator(xi=10.0, eps=1.0, ip=1) self.entropy_criterion = EntropyLoss()
def test(args): data_loader = CTW1500TestLoader(long_size=args.long_size) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) submit_path = 'outputs_shape' if os.path.exists(submit_path): shutil.rmtree(submit_path) os.mkdir(submit_path) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 for idx, (org_img, img) in enumerate(test_loader): print('progress: %d / %d' % (idx, len(test_loader))) sys.stdout.flush() img = Variable(img.cuda(), volatile=True) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # # c++ version pse # # pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # # python version pse # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) # # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) # scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0]) # label = pred # label_num = np.max(label) + 1 # bboxes = [] tmp_marker = kernels[-1, :, :] # for i in range(args.kernel_num-2, -1, -1): # sure_fg = tmp_marker # sure_bg = kernels[i, :, :] # watershed_source = cv2.cvtColor(sure_bg, cv2.COLOR_GRAY2BGR) # unknown = cv2.subtract(sure_bg,sure_fg) # ret, marker = cv2.connectedComponents(sure_fg) # label_num = np.max(marker) # marker += 1 # marker[unknown==1] = 0 # marker = cv2.watershed(watershed_source, marker) # marker[marker==-1] = 1 # marker -= 1 # tmp_marker = np.asarray(marker, np.uint8) sure_fg = kernels[-1, :, :] sure_bg = text watershed_source = cv2.cvtColor(sure_bg, cv2.COLOR_GRAY2BGR) unknown = cv2.subtract(sure_bg, sure_fg) ret, marker = cv2.connectedComponents(sure_fg) label_num = np.max(marker) marker += 1 marker[unknown == 1] = 0 marker = cv2.watershed(watershed_source, marker) marker -= 1 label = marker # label = tmp_marker # scale = (w / marker.shape[1], h / marker.shape[0]) scale = (org_img.shape[1] * 1.0 / marker.shape[1], org_img.shape[0] * 1.0 / marker.shape[0]) bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: continue # rect = cv2.minAreaRect(points)1 binary = np.zeros(label.shape, dtype='uint8') binary[label == i] = 1 # a=cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(a) contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contour = contours[0] # epsilon = 0.01 * cv2.arcLength(contour, True) # bbox = cv2.approxPolyDP(contour, epsilon, True) bbox = contour if bbox.shape[0] <= 2: continue bbox = bbox * scale bbox = bbox.astype('int32') bbox = bbox.reshape(-1) temp = bbox[0::2].copy() temp1 = bbox[1::2].copy() bbox[1::2] = temp bbox[0::2] = temp1 # print(bbox) bboxes.append(bbox) torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f' % (total_frame / total_time)) sys.stdout.flush() # for bbox in bboxes: # cv2.drawContours(text_box, [bbox.reshape(bbox.shape[0] // 2, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] write_result_as_txt(image_name, bboxes, 'outputs_shape/submit_ctw1500/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0]))
def test(args): import torch data_loader = IC15TestLoader(root_dir=args.root_dir, long_size=args.long_size) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=False, num_classes=1, scale=args.scale, train_mode=False) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=1, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=1, scale=args.scale) for param in model.parameters(): param.requires_grad = False if args.gpus > 0: model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) device = torch.device('cpu') if args.gpus < 0 else None checkpoint = torch.load(args.resume, map_location=device) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() if args.onnx: import torch.onnx.symbolic_opset9 dummy_input = torch.autograd.Variable(torch.randn(1, 3, 640, 640)).cpu() torch.onnx.export(model, dummy_input, 'dbnet.onnx', verbose=False) return 0 total_frame = 0.0 total_time = 0.0 for idx, (org_img, img, scale_val) in enumerate(test_loader): print('progress: %d / %d' % (idx, len(test_loader))) sys.stdout.flush() if args.gpus > 0: img = Variable(img.cuda(), volatile=True) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() resize_img = img.cpu().numpy().astype('uint8')[0].transpose( (1, 2, 0)).copy() if args.gpus > 0: torch.cuda.synchronize() start = time.time() outputs = model(img) infer_time = time.time() probability_map, border_map = outputs[0].sigmoid(), outputs[1].sigmoid( ) # print(probability_map.max(), probability_map.min()) score = probability_map[0, 0] border_score = border_map[0, 0] # geo_map = outputs[2][0] # geo_scores, geo_pts = btw_nms(score.cpu().numpy(), geo_map.cpu().numpy()) # print( geo_pts.shape) # prediction_map = textfill(score.cpu().numpy(), border_score, top_threshold=0.7, end_thershold=0.2) post_time = time.time() center_text = torch.where(score > 0.7, torch.ones_like(score), torch.zeros_like(score)) center_text = center_text.data.cpu().numpy().astype(np.uint8) text_region = torch.where(score > 0.3, torch.ones_like(score), torch.zeros_like(score)) border_region = torch.where(border_score > 0.9, torch.ones_like(border_score), torch.zeros_like(border_score)) prediction_map = text_region.data.cpu().numpy() border_region = border_region.data.cpu().numpy() # prediction_map[border_region==1] = 0 prob_map = probability_map.cpu().numpy()[0, 0] * 255 bord_map = border_map[0, 0].cpu().numpy() * 255 out_path = 'outputs/vis_ic15/' image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] # cv2.imwrite(out_path + image_name + '_prob.png', prob_map.astype(np.uint8)) # # cv2.imwrite(out_path + image_name + '_bd.png', bord_map.astype(np.uint8)) # # cv2.imwrite(out_path + image_name + '_tr.png', text_region.astype(np.uint8) * 255) # cv2.imwrite(out_path + image_name + '_fl.png', prediction_map.astype(np.uint8) * 255) scale = (org_img.shape[1] * 1.0 / img.shape[1], org_img.shape[0] * 1.0 / img.shape[0]) bboxes = [] scale_val = scale_val.cpu().numpy() nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats( prediction_map.astype(np.uint8), connectivity=4) t5 = time.time() # nLabels = prediction_map.max() # print("nLabels:", nLabels) img_h, img_w = prediction_map.shape[:2] for k in range(1, nLabels): size = stats[k, cv2.CC_STAT_AREA] if size < 4: continue # make segmentation map segmap = np.zeros(prediction_map.shape, dtype=np.uint8) segmap[labels == k] = 255 # segmap[np.logical_and(border_score > 0.7, score.cpu().numpy() < 0.05)] = 0 # remove link area x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP] w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT] print("xywh:", x, y, w, h, " size:", (w * h, size), "area:", np.sum(segmap) / 255.) if size * 1. / (w * h) > 0.4: niter = int(math.sqrt(size * min(w, h) / (w * h)) * 4.3) else: new_w = math.sqrt(w**2 + h**2) niter = int(math.sqrt(size * 1.0 / new_w) * 4.3) print("abs:", size * 1. / (w * h), 'niter:', niter) sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1 # print("info:", sy, ey, sx, ex) # boundary check if sx < 0: sx = 0 if sy < 0: sy = 0 if ex >= img_w: ex = img_w if ey >= img_h: ey = img_h kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1 + niter, 1 + niter)) segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel) ############### original postprocess ################ # # make segmentation map # segmap = np.zeros(score.shape, dtype=np.uint8) # segmap[prediction_map==k] = 255 # # contourexpand # text_area = np.sum(segmap) # kernel = dilated_kernel(text_area) ############## original postprocess ################ # segmap = cv2.dilate(segmap, kernel, iterations=1) np_contours = np.roll(np.array(np.where(segmap != 0)), 1, axis=0).transpose().reshape(-1, 2) rectangle = cv2.minAreaRect(np_contours) box = cv2.boxPoints(rectangle) * 4 box = box / scale_val box = box.astype('int32') bboxes.append(box) t6 = time.time() print("infer_time:{}, post_time:{}, expand_time:{}".format( infer_time - start, post_time - infer_time, t6 - t5)) # find contours bboxes = np.array(bboxes) num_box = bboxes.shape[0] if args.gpus > 0: torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) sys.stdout.flush() # geo_pts = geo_pts * 1.0 / scale_val # print(geo_pts.shape) for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (255, 0, 0), 2) # for bbox in geo_pts.astype('int32'): # cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (255, 0, 255), 2) image_name = ".".join( data_loader.img_paths[idx].split('/')[-1].split('.')[:-1]) write_result_as_txt(image_name, bboxes.reshape((-1, 8)), 'outputs/submit_ic15/') debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_ic15/')
record.write('batch size: %f\n' % args.batch_size) record.write('start iter: %d\n' % args.start_iter) record.write('mid iter: %d\n' % args.mid_iter) record.flush() loader = dataloader.clothing_dataloader(batch_size=args.batch_size, num_workers=5, shuffle=True) train_loader, val_loader, test_loader = loader.run() best = 0 init = True # Model print('\nModel setup') print('| Building net') net = models.resnet50(pretrained=True) net.fc = nn.Linear(2048, 14) tch_net = models.resnet50(pretrained=True) tch_net.fc = nn.Linear(2048, 14) pretrain_net = models.resnet50(pretrained=True) pretrain_net.fc = nn.Linear(2048, 14) test_net = models.resnet50(pretrained=True) test_net.fc = nn.Linear(2048, 14) print('| load pretrain from checkpoint...') checkpoint = torch.load('./checkpoint/%s.pth.tar' % args.checkpoint) pretrain_net.load_state_dict(checkpoint['state_dict']) if use_cuda: net.cuda() tch_net.cuda()
def test(args): data_loader = IC15TestLoader(long_size=args.long_size) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False # model.cuda() will send your model to the "current device", which can be set with torch.cuda.set_device(device). model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 for idx, (org_img, img, s) in enumerate(test_loader): print('progress: %d / %d' % (idx, len(test_loader))) sys.stdout.flush() # img = Variable(img, volatile=True) img = Variable(img.cuda(), volatile=True) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, 0, :, :] print("text.shape:{}".format(text.shape)) print("outputs.shape:{}".format(outputs.shape)) kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse # pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # python version pse pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0]) print(pred.shape) print("scale:{}".format(scale)) label = pred label_num = np.max(label) + 1 bboxes = [] unscaled_bboxes = [] s = s.numpy() # s: Tensor to numpy ; s.float(): 1 d tensor ro float for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: continue # print(points.shape) rect = cv2.minAreaRect(points) bbox = cv2.boxPoints( rect ) * scale # bbox [[a1 a2][b1 b2]...[c1 c2]]形式的nparray.可进行乘除array或单个数字的操作 bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) # print(s) # print(cv2.boxPoints(rect)) # print(cv2.boxPoints(rect) / s) unscaled_bboxes.append( (cv2.boxPoints(rect) / s).astype('int32').reshape(-1)) torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f' % (total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] # 1 检测框txt路径 dataset_model # write_result_as_txt(image_name, bboxes, 'outputs/submit_funs_model3_epoch_48/') write_result_as_txt(image_name, bboxes, 'test_img/') #write_result_as_txt(image_name, bboxes, 'outputs/submit_funs_model1/') # 2 unscaled txt 路径 # write_result_as_txt(image_name, unscaled_bboxes, 'outputs/submit_funs_model3_epoch_48/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) print("text_box.shape:{}".format(text_box.shape)) # 3 img 路径 # debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_ic15/') # debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_funs_model3_epoch_48/') debug(idx, data_loader.img_paths, [[text_box]], 'test_img/') # 4 submit.zip制作 # cmd = 'cd %s;zip -j %s %s/*'%('./outputs/', 'submit_ic15.zip', 'submit_ic15'); # cmd = 'cd %s;zip -j %s %s/*'%('./outputs/', 'submit_funs_model3_epoch_48.zip', 'submit_funs_model3_epoch_48'); sys.stdout.flush()
def test(args): data_loader = IC15TestLoader(long_size=args.long_size) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=18, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=18, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=18, scale=args.scale) elif args.arch == "vgg16": model = models.vgg16(pretrained=True, num_classes=18) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print(("Loading model and optimizer from checkpoint '{}'".format( args.resume))) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in list(checkpoint['state_dict'].items()): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print(("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch']))) sys.stdout.flush() else: print(("No checkpoint found at '{}'".format(args.resume))) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for idx, (org_img, img) in enumerate(test_loader): print(('progress: %d / %d' % (idx, len(test_loader)))) sys.stdout.flush() img = img.to(device) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize(device) start = time.time() cls_logits, link_logits = model(img) outputs = torch.cat((cls_logits, link_logits), dim=1) shape = outputs.shape pixel_pos_scores = F.softmax(outputs[:, 0:2, :, :], dim=1)[:, 1, :, :] # pixel_pos_scores=torch.sigmoid(outputs[:,1,:,:]) # FIXME the dimention should be changed link_scores = outputs[:, 2:, :, :].view(shape[0], 2, 8, shape[2], shape[3]) link_pos_scores = F.softmax(link_scores, dim=1)[:, 1, :, :, :] mask, bboxes = to_bboxes(org_img, pixel_pos_scores.cpu().numpy(), link_pos_scores.cpu().numpy()) score = pixel_pos_scores[0, :, :] score = score.data.cpu().numpy().astype(np.float32) torch.cuda.synchronize(device) end = time.time() total_frame += 1 total_time += (end - start) print(('fps: %.2f' % (total_frame / total_time))) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] # print(image_name) # cv2.imwrite(image_name+"minarea.jpg", text_box) write_result_as_txt(image_name, bboxes, 'outputs/OHEM_c640') text_box = cv2.resize(text_box, (org_img.shape[1], org_img.shape[0])) score_s = cv2.resize( np.repeat(score[:, :, np.newaxis] * 255, 3, 2).astype(np.uint8), (org_img.shape[1], org_img.shape[0])) mask = cv2.resize( np.repeat(mask[:, :, np.newaxis], 3, 2).astype(np.uint8), (org_img.shape[1], org_img.shape[0])) link_score = (link_pos_scores[0, 0, :, :]).cpu().numpy() * ( score > 0.5).astype(np.float) link_score = cv2.resize( np.repeat(link_score[:, :, np.newaxis] * 255, 3, 2).astype(np.uint8), (org_img.shape[1], org_img.shape[0])) debug(idx, data_loader.img_paths, [[text_box, score_s], [link_score, mask]], 'outputs/vis_OHEM_c640/') cmd = 'cd %s;zip -j %s %s/*' % ('./outputs/', 'OHEM_c640.zip', 'OHEM_c640') print(cmd) sys.stdout.flush() util.cmd.cmd(cmd) cmd_eval = 'cd eval/ic19/; python script.py -g=submit.zip -s=../../outputs/OHEM_c640.zip ' sys.stdout.flush() util.cmd.cmd(cmd_eval)
def train(): transform = transforms.Compose([transforms.ToTensor()]) card_dataset = cardDataset(transform) train_dataloader = DataLoader(card_dataset, batch_size=config.batch_size, shuffle=True, num_workers=1, collate_fn=collate_fn) model = models.resnet50(pretrained=True) model.fc = None model.fc = torch.nn.Conv2d(512 * 4, 6, kernel_size=1, stride=1, bias=True) model.cuda() model.train() optimizer = optim.Adam(model.parameters(), lr=2e-4) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5) loss_fn = torch.nn.BCELoss() # loss_fn = torch.nn.L1Loss() # loss_fn = torch.nn.CrossEntropyLoss() for e in range(config.epoch): for i, (img, target) in enumerate(train_dataloader): img = img.to(device='cuda:0', dtype=torch.float32) target = target.to(device='cuda:0', dtype=torch.float32) model.zero_grad() out = model(img) loss = loss_fn(torch.sigmoid(out), target) # 手写bceloss: # p = torch.sigmoid(out) + 1e-7 # loss = (target * torch.log(p) + (1-target) * torch.log(1-p)) * -1 # loss = loss.sum() if i % 30 == 0: data.show_six_feature( torch.sigmoid(out).cpu().detach().numpy(), figure_num=2, title='outfeat', img=img) plt_save_feature_name = './save_feature/epoch_{}_iter_{}_outfeat.jpg'.format( e, i) plt.savefig(plt_save_feature_name) plt.close() # plt.figure() # plt.imshow(img[0][0].cpu().numpy()) # plt.show() data.show_six_feature(target.cpu().numpy(), figure_num=1, title='tatget') plt_save_feature_name = './save_feature/epoch_{}_iter_{}_target.jpg'.format( e, i) plt.savefig(plt_save_feature_name) plt.close() loss.backward() optimizer.step() print("======> epoch:{}, iter:{}/{}, loss:{}, lr:{}".format( e, i, card_dataset.__len__() // config.batch_size, round(loss.item(), 7), exp_lr_scheduler.get_last_lr()[0])) # plt.show() # 这句不能与下面的同用 # plt.savefig("./save_feature/filename.png") # plt.ion() # 打开交互模式 # plt.pause(3) # 显示秒数 # # plt.ioff() # 显示完后一定要配合使用plt.ioff()关闭交互模式,否则可能出奇怪的问题 # plt.clf() # 清空图片 # plt.close() # 清空窗口 exp_lr_scheduler.step() if e % 3 == 0: save_path = r'./save_model/epoch_{}.pth'.format(str(e).zfill(2)) print("save model :", save_path) torch.save(model.state_dict(), save_path, _use_new_zipfile_serialization=False)
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': # optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], # lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) optimizer = InsightFaceOptimizer( torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay)) else: optimizer = torch.optim.Adam([{'params': model.parameters()}, {'params': metric_fc.parameters()}], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = Dataset(root=args.train_path, phase='train',input_shape=(3, 112, 112)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8) # Epochs for epoch in range(start_epoch, args.end_epoch): # One epoch's training train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) print('\nCurrent effective learning rate: {}\n'.format(optimizer.lr)) print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) writer.add_scalar('model/learning_rate', optimizer.lr, epoch) # Save checkpoint if epoch % 10 == 0: save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc)
def build_model(self, image_batch, rot_batch, pose_batch, shape_batch, gt2d_batch, gt3d_batch): self.out, self.Var = resnet50(image_batch, self.pose_params, reuse=False) pred_pose = self.out # shapes = self.train_loader['shape'] # pred_rot = batch_rodrigues(tf.reshape(self.out, [self.batch_size, -1, 3])) self.loss_pose = self.get_3d_loss(pose_batch, pred_pose)
dataloader_trn = torch.utils.data.DataLoader( dataset=dataset_trn, batch_size=params_batch_size_fWGAN, shuffle=True, # sampler=WeightedRandomSampler(np.ones(len(dataset_trn)), num_samples=len(dataset_trn)//100), num_workers=params_num_workers, pin_memory=True, ) torch.cuda.set_device(params_cuda_device) # Model & Loss lst_tsClassEmb = [ Tensor(emb).cuda() for emb in [arr_ClassNameVec, arr_ClassAttr] ] emb_size = [emb.shape[1] for emb in lst_tsClassEmb] ConvNet = resnet50(num_classes=NClassTotal).cuda() state_dict = torch.load(path_ptmdlConvNet, map_location={ 'cuda:1': 'cuda:0', 'cuda:2': 'cuda:0' }) state_dict = OrderedDict({ key: state_dict[key] for key in state_dict if key in ConvNet.state_dict().keys() }) ConvNet.load_state_dict(state_dict) G = G_Feat( emb_size=emb_size, dimHid=params_dimHid_G, feat_size=params_dimVisFeat,
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = 0 writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) elif args.network == 'mobile': model = MobileNet(1.0) else: model = resnet_face18(args.use_se) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1) # Epochs for epoch in range(start_epoch, args.end_epoch): scheduler.step() if args.full_log: lfw_acc, threshold = lfw_test(model) writer.add_scalar('LFW_Accuracy', lfw_acc, epoch) full_log(epoch) start = datetime.now() # One epoch's training train_loss, train_top5_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch, logger=logger) writer.add_scalar('Train_Loss', train_loss, epoch) writer.add_scalar('Train_Top5_Accuracy', train_top5_accs, epoch) end = datetime.now() delta = end - start print('{} seconds'.format(delta.seconds)) # One epoch's validation lfw_acc, threshold = lfw_test(model) writer.add_scalar('LFW_Accuracy', lfw_acc, epoch) # Check if there was an improvement is_best = lfw_acc > best_acc best_acc = max(lfw_acc, best_acc) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best)
def main(): start_epoch = 0 pretrained_model = os.path.join("./pre_trained", args.dataset, args.net_type + ".pth.tar") save_model = "./save_model_animal_OE" tensorboard_dir = "./tensorboard/OOD_animal_OE" # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters eps = 1e-8 ### data config train_data = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="train") train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=2) outlier_data = load_data.Dog_dataloader(image_dir=OOD_image_dir, num_class=1, mode="voc_data", repeat=10000) outlier_loader = torch.utils.data.DataLoader(outlier_data, batch_size=args.batch_size, shuffle=True, num_workers=2) test_data = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="test") test_loader = torch.utils.data.DataLoader(test_data, batch_size=8, shuffle=False, num_workers=2) ##### model, optimizer config if args.net_type == "resnet50": model = models.resnet50(num_c=args.num_classes, pretrained=True) elif args.net_type == "resnet34": model = models.resnet34(num_c=args.num_classes, pretrained=True) if args.load == True: print("loading model") checkpoint = torch.load(pretrained_model) ##### load model model.load_state_dict(checkpoint["model"]) optimizer = optim.SGD(model.parameters(), lr=args.init_lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.num_epochs * len(train_loader)) #### loss config criterion = nn.BCEWithLogitsLoss() uniform = torch.ones( (args.batch_size, args.num_classes)) / args.num_classes uniform = uniform.to(device) #### create folder Path(os.path.join(save_model, env, args.net_type)).mkdir(exist_ok=True, parents=True) if args.board_clear == True: files = glob.glob(tensorboard_dir + "/*") for f in files: shutil.rmtree(f) i = 0 while True: if Path(os.path.join(tensorboard_dir, str(i))).exists() == True: i += 1 else: Path(os.path.join(tensorboard_dir, str(i))).mkdir(exist_ok=True, parents=True) break summary = SummaryWriter(os.path.join(tensorboard_dir, str(i))) test_acc = 0 if args.load == True: with torch.no_grad(): for i, (org_image, gt) in enumerate(test_loader): org_image = org_image.to(device) model = model.to(device).eval() gt = gt.type(torch.FloatTensor).to(device) #### forward path output = model(org_image) gt_label = torch.argmax(gt, dim=1).cpu().detach().tolist() output_label = torch.argmax(torch.sigmoid(output), dim=1).cpu().detach().tolist() test_acc += sum( torch.argmax(torch.sigmoid(output), dim=1) == torch.argmax( gt, dim=1)).cpu().detach().item() print("initial model accuracy = {:.4f}".format( test_acc / test_data.num_image)) # Start training j = 0 best_score = 0 score = 0 for epoch in range(start_epoch, args.num_epochs): outlier_loader.dataset.offset = np.random.randint( outlier_data.num_image) for i in range(args.num_classes): locals()["train_label{}".format(i)] = 0 locals()["test_label{}".format(i)] = 0 total_class_loss = 0 total_outlier_loss = 0 train_acc = 0 test_acc = 0 stime = time.time() # for i, (org_image, gt) in enumerate(train_loader): for i, ((org_image, gt), (outlier_image, _)) in enumerate(zip(train_loader, outlier_loader)): #### initialized org_image += 0.01 * torch.randn_like(org_image) org_image = org_image.to(device) model = model.to(device).train() gt = gt.type(torch.FloatTensor).to(device) optimizer.zero_grad() #### forward path output = model(org_image) #### calc loss class_loss = criterion(torch.sigmoid(output), gt) outlier_image += 0.01 * torch.randn_like(outlier_image) outlier_image = outlier_image.to(device) outlier_output = model(outlier_image) outlier_loss = criterion(outlier_output, uniform) loss = class_loss + 0.1 * outlier_loss #### calc accuracy train_acc += sum( torch.argmax(torch.sigmoid(output), dim=1) == torch.argmax( gt, dim=1)).cpu().detach().item() gt_label = torch.argmax(gt, dim=1).cpu().detach().tolist() output_label = torch.argmax(torch.sigmoid(output), dim=1).cpu().detach().tolist() for idx, label in enumerate(gt_label): if label == output_label[idx]: locals()["train_label{}".format(label)] += 1 loss.backward() optimizer.step() scheduler.step() total_class_loss += class_loss.item() total_outlier_loss += outlier_loss.item() with torch.no_grad(): for i, (org_image, gt) in enumerate(test_loader): org_image = org_image.to(device) model = model.to(device).eval() gt = gt.type(torch.FloatTensor).to(device) #### forward path output = model(org_image) gt_label = torch.argmax(gt, dim=1).cpu().detach().tolist() output_label = torch.argmax(torch.sigmoid(output), dim=1).cpu().detach().tolist() for idx, label in enumerate(gt_label): if label == output_label[idx]: locals()["test_label{}".format(label)] += 1 test_acc += sum( torch.argmax(torch.sigmoid(output), dim=1) == torch.argmax( gt, dim=1)).cpu().detach().item() print( 'Epoch [{}/{}], Step {}, class_loss = {:.4f}, out_loss = {:.4f}, exe time: {:.2f}, lr: {:.4f}*e-4' .format(epoch, args.num_epochs, i + 1, total_class_loss / len(train_loader), total_outlier_loss / len(train_loader), time.time() - stime, scheduler.get_last_lr()[0] * 10**4)) print("train accuracy total : {:.4f}".format(train_acc / train_data.num_image)) for num in range(args.num_classes): print("label{} : {:.4f}".format( num, locals()["train_label{}".format(num)] / train_data.len_list[num]), end=" ") print() print("test accuracy total : {:.4f}".format(test_acc / test_data.num_image)) for num in range(args.num_classes): print("label{} : {:.4f}".format( num, locals()["test_label{}".format(num)] / test_data.len_list[num]), end=" ") print("\n") summary.add_scalar('loss/class_loss', total_class_loss / len(train_loader), epoch) summary.add_scalar('loss/outlier_loss', total_outlier_loss / len(train_loader), epoch) summary.add_scalar('acc/train_acc', train_acc / train_data.num_image, epoch) summary.add_scalar('acc/test_acc', test_acc / test_data.num_image, epoch) summary.add_scalar("learning_rate/lr", scheduler.get_last_lr()[0], epoch) time.sleep(0.001) torch.save( { 'model': model.state_dict(), 'epoch': epoch, 'init_lr': scheduler.get_last_lr()[0] }, os.path.join(save_model, env, args.net_type, 'checkpoint_last_OE.pth.tar'))
def main(args): if args.checkpoint == '': args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d"%(args.arch, args.batch_size, args.n_epoch) if args.pretrain: if 'synth' in args.pretrain: args.checkpoint += "_pretrain_synth" else: args.checkpoint += "_pretrain_ic17" print ('checkpoint path: %s'%args.checkpoint) print ('init lr: %.8f'%args.lr) print ('schedule: ', args.schedule) sys.stdout.flush() if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) kernel_num = 7 min_scale = 0.4 start_epoch = 0 data_loader = IC15Loader(is_transform=True, img_size=args.img_size, kernel_num=kernel_num, min_scale=min_scale) train_loader = torch.utils.data.DataLoader( data_loader, batch_size=args.batch_size, shuffle=True, num_workers=3, drop_last=True, pin_memory=True) if args.arch == "resnet50": model = models.resnet50(pretrained=False, num_classes=kernel_num) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=kernel_num) model = torch.nn.DataParallel(model).cuda() if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) title = 'icdar2015' if args.pretrain: print('Using pretrained model.') assert os.path.isfile(args.pretrain), 'Error: no checkpoint directory found!' print(args.pretrain) checkpoint = torch.load(args.pretrain) state = model.state_dict() for key in state.keys(): if key in checkpoint.keys(): state[key] = pretrained_model[key] model.load_state_dict(state) #model.load_state_dict(checkpoint['state_dict']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss','Train Acc.', 'Train IOU.']) elif args.resume: print('Resuming from checkpoint.') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print('Training from scratch.') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss','Train Acc.', 'Train IOU.']) for epoch in range(start_epoch, args.n_epoch): adjust_learning_rate(args, optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr'])) train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss, optimizer, epoch) if epoch % 10 == 0: save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'lr': args.lr, 'optimizer' : optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.append([optimizer.param_groups[0]['lr'], train_loss, train_te_acc, train_te_iou]) logger.close()
def test(args): data_loader = IC15TestLoader(long_size=args.long_size) test_loader = torch.utils.data.DataLoader(data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=False, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 with torch.no_grad(): for idx, (org_img, img) in enumerate(test_loader): print('progress: %d / %d' % (idx, len(test_loader))) sys.stdout.flush() img = Variable(img.cuda()) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 # pyplot.imshow(score[0]) # pyplot.savefig('./heatmap_out/1_'+ str(idx)+'.jpg') text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # python version pse #pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) #pred contains the connected components whose value is different label scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) label = pred label_num = np.max(label) + 1 #equals to the number of boxxes bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose( (1, 0))[:, ::-1] #the pixels belong to connected components i if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: #score_threshold continue # rect = cv2.minAreaRect(points) # bbox = cv2.boxPoints(rect) * scale # bbox = bbox.astype('int32') # bboxes.append(bbox.reshape(-1)) binary = np.zeros(label.shape, dtype='uint8') binary[label == i] = 1 # contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contour = contours[0] # epsilon = 0.01 * cv2.arcLength(contour, True) # bbox = cv2.approxPolyDP(contour, epsilon, True) bbox = contour if bbox.shape[0] <= 2: continue bbox = bbox * scale bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f' % (total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(bbox.shape[0] / 2, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split( '.')[0] write_result_as_txt(image_name, bboxes, 'outputs/submit_LSVT/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_LSVT/')
def test(args): testset = CTW1500Testset_Bound() testloader = torch.utils.data.DataLoader(dataset=testset, batch_size=1, shuffle=False, num_workers=1, drop_last=True) if args.backbone == 'res50': model = resnet50(pretrained=True, num_classes=6) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.exists(args.resume): print('Load from', args.resume) checkpoint = torch.load(args.resume) # 这里为什么不直接用model.load_state_dict(checkpoint['state_dict']) # 是因为训练时使用多卡训练,模型中各个参数的名字前面有个前缀,需要去除该前缀 d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) else: print('No such checkpoint file at', args.resume) model.eval() for idx, (img, original_img) in tqdm(enumerate(testloader)): img = Variable(img.cuda()) original_img = original_img.numpy().astype('uint8')[0] original_img = original_img.copy() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - 1.0) + 1) / 2 output_text = outputs[:, 0, :, :] kernel = outputs[:, 1, :, :] # top = outputs[:, 2, :, :] # bot = outputs[:, 3, :, :] top_left = outputs[:, 2, :, :] top_right = outputs[:, 3, :, :] bot_right = outputs[:, 4, :, :] bot_left = outputs[:, 5, :, :] output_kernel = outputs[:, 1, :, :] * output_text # output_top = outputs[:, 2, :, :] * output_text # output_bot = outputs[:, 3, :, :] * output_text output_top_left = top_left * output_text output_top_right = top_right * output_text output_bot_right = bot_right * output_text output_bot_left = bot_left * output_text score = score.data.cpu().numpy()[0].astype(np.float32) output_text = output_text.data.cpu().numpy()[0].astype(np.uint8) output_kernel = output_kernel.data.cpu().numpy().astype(np.uint8) # output_top = output_top.data.cpu().numpy().astype(np.uint8) # output_bot = output_bot.data.cpu().numpy().astype(np.uint8) output_top_left = output_top_left.data.cpu().numpy().astype(np.uint8) output_top_right = output_top_right.data.cpu().numpy().astype(np.uint8) output_bot_right = output_bot_right.data.cpu().numpy().astype(np.uint8) output_bot_left = output_bot_left.data.cpu().numpy().astype(np.uint8) kernel = kernel.data.cpu().numpy()[0].astype(np.uint8) # top = top.data.cpu().numpy().astype(np.uint8) # bot = bot.data.cpu().numpy().astype(np.uint8) # pred = pypse(output_kernel, 10) pred_top = pypse(output_top_left, 0, connectivity=8) # scale = (original_img.shape[1] / pred.shape[1], original_img.shape[0] / pred.shape[0]) # bboxes = [] # num_label = np.max(pred) + 1 # for i in range(1, num_label): # points_loc = np.array(np.where(pred == i)).transpose((1, 0)) # # points = points[:,::-1] # if points_loc.shape[0] < 300: # continue # score_i = np.mean(score[pred == i]) # if score_i < 0.93: # continue # # binary = np.zeros(pred.shape, dtype='uint8') # binary[pred == i] = 1 # # contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # contour = contours[0] # contour = contour * scale # if contour.shape[0] <= 2: # continue # contour = contour.astype('int32') # bboxes.append(contour.reshape(-1)) scale = (original_img.shape[1] / pred_top.shape[1], original_img.shape[0] / pred_top.shape[0]) # pred_top = np.reshape(pred_top, (pred_top.shape[0], pred_top.shape[1], 1)) # pred_top = cv2.resize(pred_top, dsize=(original_img.shape[0], original_img.shape[1])) # pred_top = pred_top[:, :, 0] # original_img[pred_top > 0.5, :] = (0, 0, 255) bboxes = [] num_label = np.max(pred_top) + 1 for i in range(1, num_label): points_loc = np.array(np.where(pred_top == i)).transpose((1, 0)) # points = points[:,::-1] # if points_loc.shape[0] < 1: # continue # score_i = np.mean(score[pred_top == i]) # if score_i < 0.93: # continue binary = np.zeros(pred_top.shape, dtype='uint8') binary[pred_top == i] = 1 contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contour = contours[0] contour = contour * scale if contour.shape[0] <= 2: continue contour = contour.astype('int32') bboxes.append(contour.reshape(-1)) torch.cuda.synchronize() for bbox in bboxes: cv2.drawContours(original_img, [bbox.reshape(int(bbox.shape[0] / 2), 2)], -1, (0, 0, 255), 1) image_name = testset.img_paths[idx].split('/')[-1].split('.')[0] # generate_txt_result_ctw(bboxes, image_name, 'outputs/result_ctw_txt_wh_new') generate_img_result(original_img, image_name, 'outputs/result_ctw_img_my_top_left')
#encoding=utf-8 import os import cv2 import sys import time import collections import torch import argparse import numpy as np import models path_model = "./checkpoint.pth.tar" path_save_wts = "./psenet0419.wts" args_scale = 1.0 model = models.resnet50(pretrained=True, num_classes=7, scale=args_scale) for param in model.parameters(): param.requires_grad = False checkpoint = torch.load(path_model) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) model.eval() import struct f = open(path_save_wts, 'w')
def test(args): data_loader = IC15TestLoader(long_size=args.long_size) test_loader = torch.utils.data.DataLoader( data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 for idx, (org_img, img) in enumerate(test_loader): print('progress: %d / %d'%(idx, len(test_loader))) sys.stdout.flush() img = Variable(img.cuda(), volatile=True) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text score = score.data.cpu().numpy()[0].astype(np.float32) text = text.data.cpu().numpy()[0].astype(np.uint8) kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) # c++ version pse pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale)) # python version pse # pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale)) scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1]) label = pred label_num = np.max(label) + 1 bboxes = [] for i in range(1, label_num): points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] if points.shape[0] < args.min_area / (args.scale * args.scale): continue score_i = np.mean(score[label == i]) if score_i < args.min_score: continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f'%(total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (255, 0, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] write_result_as_txt(image_name, bboxes, 'outputs/submit_ic15/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_ic15/') cmd = 'cd %s;zip -j %s %s/*'%('./outputs/', 'submit_ic15.zip', 'submit_ic15'); print(cmd) sys.stdout.flush() util.cmd.cmd(cmd)
def test(args): data_loader = IC15TestLoader(long_size=args.long_size) test_loader = torch.utils.data.DataLoader( data_loader, batch_size=1, shuffle=False, num_workers=2, drop_last=True) submit_path = 'outputs_mlt' if os.path.exists(submit_path): shutil.rmtree(submit_path) os.mkdir(submit_path) # Setup Model if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale) for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) # model.load_state_dict(checkpoint['state_dict']) d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) sys.stdout.flush() else: print("No checkpoint found at '{}'".format(args.resume)) sys.stdout.flush() model.eval() total_frame = 0.0 total_time = 0.0 for idx, (org_img, img) in enumerate(test_loader): print('progress: %d / %d'%(idx, len(test_loader))) sys.stdout.flush() img = Variable(img.cuda(), volatile=True) org_img = org_img.numpy().astype('uint8')[0] text_box = org_img.copy() torch.cuda.synchronize() start = time.time() # with torch.no_grad(): # outputs = model(img) # outputs = torch.sigmoid(outputs) # score = outputs[:, 0, :, :] # outputs = outputs > args.threshold # torch.uint8 # text = outputs[:, 0, :, :] # kernels = outputs[:, 0:args.kernel_num, :, :] * text # score = score.squeeze(0).cpu().numpy() # text = text.squeeze(0).cpu().numpy() # kernels = kernels.squeeze(0).cpu().numpy() # print(img.shape) outputs = model(img) score = torch.sigmoid(outputs[:, 0, :, :]) outputs = (torch.sign(outputs - args.binary_th) + 1) / 2 text = outputs[:, 0, :, :] kernels = outputs[:, 0:args.kernel_num, :, :] * text # print(score.shape) # score = score.data.cpu().numpy()[0].astype(np.float32) # text = text.data.cpu().numpy()[0].astype(np.uint8) # kernels = kernels.data.cpu().numpy()[0].astype(np.uint8) score = score.squeeze(0).cpu().numpy().astype(np.float32) text = text.squeeze(0).cpu().numpy().astype(np.uint8) kernels = kernels.squeeze(0).cpu().numpy().astype(np.uint8) tmp_marker = kernels[-1, :, :] # for i in range(args.kernel_num-2, -1, -1): # sure_fg = tmp_marker # sure_bg = kernels[i, :, :] # watershed_source = cv2.cvtColor(sure_bg, cv2.COLOR_GRAY2BGR) # unknown = cv2.subtract(sure_bg,sure_fg) # ret, marker = cv2.connectedComponents(sure_fg) # label_num = np.max(marker) # marker += 1 # marker[unknown==1] = 0 # marker = cv2.watershed(watershed_source, marker) # marker[marker==-1] = 1 # marker -= 1 # tmp_marker = np.asarray(marker, np.uint8) sure_fg = kernels[-1, :, :] sure_bg = text watershed_source = cv2.cvtColor(sure_bg, cv2.COLOR_GRAY2BGR) unknown = cv2.subtract(sure_bg,sure_fg) ret, marker = cv2.connectedComponents(sure_fg) label_num = np.max(marker) marker += 1 marker[unknown==1] = 0 marker = cv2.watershed(watershed_source, marker) marker -= 1 label = marker # label = tmp_marker # scale = (w / marker.shape[1], h / marker.shape[0]) scale = (org_img.shape[1] * 1.0 / marker.shape[1], org_img.shape[0] * 1.0 / marker.shape[0]) bboxes = [] # print(label_num) for i in range(1, label_num+1): # get [x,y] pair, points.shape=[n, 2] points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1] # similar to pixellink's min_area when post-processing if points.shape[0] < args.min_area / (args.scale * args.scale): continue #this filter op is very important, f-score=68.0(without) vs 69.1(with) score_i = np.mean(score[label == i]) if score_i < args.min_score: continue rect = cv2.minAreaRect(points) bbox = cv2.boxPoints(rect) * scale bbox=bbox.reshape(-1).tolist() bbox.append(float(score_i)) # print(score_i) # print(float(score_i)) bboxes.append(np.array(bbox)) # bbox = bbox.astype('int32') torch.cuda.synchronize() end = time.time() total_frame += 1 total_time += (end - start) print('fps: %.2f'%(total_frame / total_time)) sys.stdout.flush() for bbox in bboxes: bbox=bbox[:8] bbox = bbox.astype('int32') cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0] write_result_as_txt(image_name, bboxes, 'outputs_mlt/submit_ic15/') text_box = cv2.resize(text_box, (text.shape[1], text.shape[0])) if idx % 200 == 0: debug(idx, data_loader.img_paths, [[text_box]], 'outputs_mlt/vis_ic15/') cmd = 'cd %s;zip -j %s %s/*'%('./outputs/', 'submit_ic15.zip', 'submit_ic15'); print(cmd) sys.stdout.flush() util.cmd.Cmd(cmd)
def predict(args): testset = IC15TestDataset() testloader = torch.utils.data.DataLoader(dataset=testset, batch_size=1, shuffle=False, num_workers=1, drop_last=True) if args.backbone == 'res50': model = resnet50(pretrained=True, num_classes=6) elif args.backbone == 'res18': model = Model() else: raise NotImplementedError for param in model.parameters(): param.requires_grad = False model = model.cuda() if args.resume is not None: if os.path.exists(args.resume): print('Load from', args.resume) checkpoint = torch.load(args.resume) # 这里为什么不直接用model.load_state_dict(checkpoint['state_dict']) # 是因为训练时使用多卡训练,模型中各个参数的名字前面有个前缀,需要去除该前缀 d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) else: print('No such checkpoint file at', args.resume) model.eval() for idx, (img, original_img) in tqdm(enumerate(testloader)): img = Variable(img.cuda()) original_img = original_img.numpy().astype('uint8')[0] original_img = original_img.copy() outputs = model(img) bboxes = generate_result_PAN(outputs, original_img, threshold=0.7) for i in range(len(bboxes)): bboxes[i] = bboxes[i].reshape(4, 2)[:, [1, 0]].reshape(-1) for bbox in bboxes: cv2.drawContours(original_img, [bbox.reshape(4, 2)], -1, (0, 255, 0), 1) image_name = testset.img_paths[idx].split('/')[-1].split('.')[0] generate_txt_result_PAN( bboxes, image_name, 'outputs/result_ic15_txt_PAN_baseline600_v4_4_90_7') generate_img_result( original_img, image_name, 'outputs/result_ic15_img_PAN_baseline600_v4_4_90_7') cmd = 'cd %s;zip -j %s %s/*' % ('./outputs/', 'submit_ic15.zip', 'result_txt_ic15_PAN_baseline') print(cmd)
def main(): # set the path to pre-trained model and output args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) out_dist_list = [ 'skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'imgnet', 'nct', 'final_test' ] # load networks if args.net_type == 'densenet_121': model = densenet_121.Net(models.densenet121(pretrained=False), 8) ckpt = torch.load("../checkpoints/densenet-121/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() elif args.net_type == 'mobilenet': model = mobilenet.Net(models.mobilenet_v2(pretrained=False), 8) ckpt = torch.load("../checkpoints/mobilenet/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'resnet_50': model = resnet_50.Net(models.resnet50(pretrained=False), 8) ckpt = torch.load("../checkpoints/resnet-50/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'vgg_16': model = vgg_16.Net(models.vgg16_bn(pretrained=False), 8) ckpt = torch.load("../checkpoints/vgg-16/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") else: raise Exception(f"There is no net_type={args.net_type} available.") in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) train_loader, test_loader = data_loader.getTargetDataSet( args.dataset, args.batch_size, in_transform, args.dataroot) # set information about feature extaction model.eval() temp_x = torch.rand(2, 3, 224, 224).cuda() temp_x = Variable(temp_x) temp_list = model.feature_list(temp_x)[1] num_output = len(temp_list) feature_list = np.empty(num_output) count = 0 for out in temp_list: feature_list[count] = out.size(1) count += 1 print('get sample mean and covariance') sample_mean, precision = lib_generation.sample_estimator( model, args.num_classes, feature_list, train_loader) print('get Mahalanobis scores') m_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005] for magnitude in m_list: print('Noise: ' + str(magnitude)) for i in range(num_output): M_in = lib_generation.get_Mahalanobis_score(model, test_loader, args.num_classes, args.outf, \ True, args.net_type, sample_mean, precision, i, magnitude) M_in = np.asarray(M_in, dtype=np.float32) if i == 0: Mahalanobis_in = M_in.reshape((M_in.shape[0], -1)) else: Mahalanobis_in = np.concatenate( (Mahalanobis_in, M_in.reshape((M_in.shape[0], -1))), axis=1) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet( out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) for i in range(num_output): M_out = lib_generation.get_Mahalanobis_score(model, out_test_loader, args.num_classes, args.outf, \ False, args.net_type, sample_mean, precision, i, magnitude) M_out = np.asarray(M_out, dtype=np.float32) if i == 0: Mahalanobis_out = M_out.reshape((M_out.shape[0], -1)) else: Mahalanobis_out = np.concatenate( (Mahalanobis_out, M_out.reshape((M_out.shape[0], -1))), axis=1) Mahalanobis_in = np.asarray(Mahalanobis_in, dtype=np.float32) Mahalanobis_out = np.asarray(Mahalanobis_out, dtype=np.float32) Mahalanobis_data, Mahalanobis_labels = lib_generation.merge_and_generate_labels( Mahalanobis_out, Mahalanobis_in) file_name = os.path.join( args.outf, 'Mahalanobis_%s_%s_%s.npy' % (str(magnitude), args.dataset, out_dist)) Mahalanobis_data = np.concatenate( (Mahalanobis_data, Mahalanobis_labels), axis=1) np.save(file_name, Mahalanobis_data)
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_acc = float('-inf') writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: if args.network == 'r18': model = resnet18(args) elif args.network == 'r34': model = resnet34(args) elif args.network == 'r50': model = resnet50(args) elif args.network == 'r101': model = resnet101(args) elif args.network == 'r152': model = resnet152(args) else: raise TypeError('network {} is not supported.'.format( args.network)) model = nn.DataParallel(model) metric_fc = ArcMarginModel(args) metric_fc = nn.DataParallel(metric_fc) if args.optimizer == 'sgd': optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, momentum=args.mom, nesterov=True, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }], lr=args.lr, weight_decay=args.weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] metric_fc = checkpoint['metric_fc'] optimizer = checkpoint['optimizer'] # Move to GPU, if available model = model.to(device) metric_fc = metric_fc.to(device) # Loss function if args.focal_loss: criterion = FocalLoss(gamma=args.gamma) else: criterion = nn.CrossEntropyLoss() # Custom dataloaders train_dataset = ArcFaceDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=num_workers) scheduler = MultiStepLR(optimizer, milestones=[5, 10, 15, 20], gamma=0.1) # Epochs for epoch in range(start_epoch, args.end_epoch): scheduler.step(epoch) lr = optimizer.param_groups[0]['lr'] logger.info('\nCurrent effective learning rate: {}\n'.format(lr)) # print('Step num: {}\n'.format(optimizer.step_num)) writer.add_scalar('model/learning_rate', lr, epoch) # One epoch's training train_loss, train_top1_accs = train(train_loader=train_loader, model=model, metric_fc=metric_fc, criterion=criterion, optimizer=optimizer, epoch=epoch) writer.add_scalar('model/train_loss', train_loss, epoch) writer.add_scalar('model/train_accuracy', train_top1_accs, epoch) # One epoch's validation megaface_acc = megaface_test(model) writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch) # Check if there was an improvement is_best = megaface_acc > best_acc best_acc = max(megaface_acc, best_acc) if not is_best: epochs_since_improvement += 1 logger.info("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, metric_fc, optimizer, best_acc, is_best, scheduler)
def main(args): if args.checkpoint == '': args.checkpoint = "checkpoints/ic15_%s_bs_%d_ep_%d"%(args.arch, args.batch_size, args.n_epoch) if args.pretrain: if 'synth' in args.pretrain: args.checkpoint += "_pretrain_synth" else: args.checkpoint += "_pretrain_ic17" print(('checkpoint path: %s'%args.checkpoint)) print(('init lr: %.8f'%args.lr)) print(('schedule: ', args.schedule)) sys.stdout.flush() if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) writer=SummaryWriter(args.checkpoint) kernel_num=18 start_epoch = 0 data_loader = IC15Loader(is_transform=True, img_size=args.img_size) train_loader = torch.utils.data.DataLoader( data_loader, batch_size=args.batch_size, shuffle=True, num_workers=0, drop_last=False, pin_memory=True) if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=kernel_num) elif args.arch == "vgg16": model = models.vgg16(pretrained=False,num_classes=kernel_num) elif args.arch == "googlenet": model = models.googlenet(pretrained=True,num_classes=kernel_num) model = torch.nn.DataParallel(model).cuda() model.train() summary(model, (3, 640, 640)) if hasattr(model.module, 'optimizer'): optimizer = model.module.optimizer else: # NOTE 这个地方的momentum对训练影响相当之大,使用0.99时训练crossentropy无法收敛. #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) title = 'icdar2015' if args.pretrain: print('Using pretrained model.') assert os.path.isfile(args.pretrain), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.pretrain) model.load_state_dict(checkpoint['state_dict']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss','Train Acc.', 'Train IOU.']) elif args.resume: print('Resuming from checkpoint.') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print('Training from scratch.') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss','Train Acc.', 'Train IOU.']) for epoch in range(start_epoch, args.n_epoch): adjust_learning_rate(args, optimizer, epoch) print(('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.n_epoch, optimizer.param_groups[0]['lr']))) train_loss, train_te_acc, train_te_iou = train(train_loader, model, dice_loss, optimizer, epoch,writer) if epoch % 4 == 3: save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'lr': args.lr, 'optimizer' : optimizer.state_dict(), }, checkpoint=args.checkpoint,filename='checkpoint_%d.pth'%epoch) logger.append([optimizer.param_groups[0]['lr'], train_loss, train_te_acc, train_te_iou]) logger.close()
def main(args): num_classes = 6 # gt_text, gt_kernel, sim_vector trainset = IC15Trainset_PAN(kernel_scale=0.5, with_coord=False) trainloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=16, shuffle=True, num_workers=1, drop_last=True, pin_memory=True) # data_time = AverageMeter() # batch_time = AverageMeter() # current_time = time.time() # for i in range(5): # for batch_idx, (imgs, gt_texts, gt_kernels, gt_texts_labeled, gt_kernels_labeled, training_masks) in enumerate( # trainloader): # data_time.update(time.time() - current_time) # batch_time.update(time.time() - current_time) # if (batch_idx + 1) % 10 == 0: # print(data_time.avg) # print(batch_time.avg) # current_time = time.time() if args.backbone == 'res50': model = resnet50(pretrained=True, num_classes=num_classes) elif args.backbone == 'res18': model = Model() else: raise NotImplementedError max_epoch = 600 start_epoch = 0 start_lr = args.lr if args.resume is not None: if os.path.exists(args.resume): print('Resume From:', args.resume) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] start_lr = args.lr * (1 - start_epoch / max_epoch) ** 0.9 d = collections.OrderedDict() for key, value in checkpoint['state_dict'].items(): tmp = key[7:] d[tmp] = value model.load_state_dict(d) else: print('No Such Checkpoint File at', args.resume) else: print('Training From the Beginning') model = torch.nn.DataParallel(model).cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) scheduler = PolynomialLR(optimizer=optimizer, max_iter=max_epoch * len(trainloader), power=0.9) # if args.resume is not None: # scheduler.step(start_epoch * len(trainloader)) criterion = PANLoss() min_loss = 1000 for epoch in range(start_epoch, max_epoch): # adjust_learning_rate_StepLR(args, optimizer, epoch) loss = train(model, trainloader, optimizer, epoch, scheduler=scheduler, criterion=criterion) checkpoint_info = {'epoch': epoch + 1, 'state_dict': model.state_dict(), 'lr': args.lr, 'optimizer': optimizer.state_dict()} torch.save(checkpoint_info, '/home/data1/zhm/ic15_PAN_res18_Poly.pth.tar') if loss < min_loss: min_loss = loss torch.save(checkpoint_info, '/home/data1/zhm/best_models/ic15_PAN_res18_Poly_best.pth.tar')
import transform import utils from datasets import CocoSingleKPS IMAGE_SIZE = 256, 256 data_path, remaining_args = utils.get_args() engine = eng.Engine.command_line_init(args=remaining_args) data_transform = transform.Compose([ transform.ResizeKPS(IMAGE_SIZE), transform.extract_keypoints, transform.ToTensor(), transform.ImageTargetWrapper(T.Normalize(CocoSingleKPS.MEAN, CocoSingleKPS.STD)) ]) selected_kps = ['left_eye', 'right_eye'] coco_train = CocoSingleKPS.from_data_path(data_path, train=True, transforms=data_transform, keypoints=selected_kps) coco_val = CocoSingleKPS.from_data_path(data_path, train=False, transforms=data_transform, keypoints=selected_kps) num_instructions = len(selected_kps) model = models.resnet50(td_outplanes=1, num_instructions=num_instructions) if len(selected_kps) == 1: model.one_iteration() model = models.SequentialInstructor(model, num_instructions) train_eval = eval.Evaluator() val_eval = eval.Evaluator() plot = eval.Visualizer(CocoSingleKPS.MEAN, CocoSingleKPS.STD) engine.run(model, coco_train, coco_val, train_eval, val_eval, plot_fn=plot)