def pred_file(filename, files, office): # Note: delete all the exists files [os.remove(file) for file in files if os.path.isfile(file)] print('Loading trained network ...') net = build_yolo('test', cfg, eval=True) net.load_state_dict(torch.load(cfg.trained_model)) net.eval() net = net.cuda() if cfg.test_cuda else net print('Loading VOC dataset ...') if office: mean = (0, 0, 0) else: mean = (104, 117, 123) if use_cv2 else (123, 117, 104) transform = BaseTransform(size=416, mean=mean, scale=True) dataset = VOCDetection(cfg.voc_root, [('2007', 'test')], transform, AnnotationTransform()) num_images = len(dataset) _t = {'im_detect': Timer(), 'misc': Timer()} x = torch.randn((1, 3, 416, 416)) x = x.cuda() if cfg.test_cuda else x for i in range(num_images): print('Testing image {:d}/{:d}....'.format(i + 1, num_images)) im, gt, h, w = dataset.pull_item(i) idx = dataset.ids[i] x.copy_(im.unsqueeze(0)) _t['im_detect'].tic() with torch.no_grad(): y = net(x) detect_time = _t['im_detect'].toc(avg=False) # TODO: unfinish for k in range(0, y.size(1)): dets = y[0, k, :] mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.size(0) == 0: continue boxes = dets[:, 1:] boxes[:, 0::2] *= w boxes[:, 1::2] *= h scores = dets[:, 0].cpu().numpy() cls_dets = np.c_[boxes.cpu().numpy(), scores] for j in range(cls_dets.shape[0]): with open(filename, mode='a') as f: f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( idx, cls_dets[j, 4], cls_dets[j, 0], cls_dets[j, 1], cls_dets[j, 2], cls_dets[j, 3])) with open(files[k], mode='a') as f: f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( idx, cls_dets[j, 4], cls_dets[j, 0], cls_dets[j, 1], cls_dets[j, 2], cls_dets[j, 3]))
def demo(img_list, save_path=None): net = build_yolo('test', cfg) net.load_state_dict(torch.load(cfg.trained_model)) if cfg.test_cuda: net = net.cuda() net.eval() # Note: official version not minus mean transform = BaseTransform(size=416, mean=(0, 0, 0), scale=True) for img in img_list: if use_cv2: image = cv2.imread(img) h, w, _ = image.shape else: image = Image.open(img) w, h = image.size scale = np.array([w, h, w, h]) x, _, _ = transform(image) x = x[:, :, (2, 1, 0)] if use_cv2 else x # official version use rgb form x = torch.from_numpy(x).permute(2, 0, 1).unsqueeze(0) if cfg.test_cuda: x = x.cuda() with torch.no_grad(): y = net(x) for i in range(y.size(1)): idx = (y[0, i, :, 0] > 0.5) dets = y[0, i][idx].view(-1, 5) if dets.numel() == 0: continue print('Find {} {} for {}.'.format(dets.size(0), labelmap[i], img.split('/')[-1])) score, loc = dets[:, 0], dets[:, 1:].cpu().numpy() * scale for k in range(len(score)): label = '{} {:.2f}'.format(labelmap[i], score[k]) draw_box(image, label, loc[k], i) if use_cv2: cv2.imwrite(os.path.join(save_path, img.split('/')[-1]), image) else: image.save(os.path.join(save_path, img.split('/')[-1]), quality=90)
from yolo.yolov2 import build_yolo from yolo.utils_yolo.yolo_loss import YoloLoss from utils.utils_train import weights_init, adjust_learning_rate use_cv2 = importlib.util.find_spec('cv2') is not None if use_cv2: from dataset.voc0712_cv import VOCDetection, AnnotationTransform, detection_collate from dataset.augment_cv import Augmentation else: from dataset.voc0712_pil import VOCDetection, AnnotationTransform, detection_collate from dataset.augment_pil import Augmentation if not os.path.exists(cfg.save_folder): os.mkdir(cfg.save_folder) net = build_yolo('train') if cfg.resume: print('Resuming training, loading {}...'.format(cfg.resume)) net.load_state_dict(torch.load(cfg.resume_weights)) else: darknet_weights = torch.load(cfg.save_folder + cfg.basenet) print('Loading base network...') net.darknet.load_state_dict(darknet_weights) if not cfg.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method net.conv.apply(weights_init) net.conv1.apply(weights_init) net.conv2.apply(weights_init)
def weight2pth(config_path, weights_path, output_path): assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(config_path) assert weights_path.endswith('.weights'), '{} is not a .weights file'.format(weights_path) # weights header weights_file = open(weights_path, 'rb') weights_header = np.ndarray(shape=(4,), dtype='int32', buffer=weights_file.read(16)) print('Weights Header: ', weights_header) # convert config information unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) # network information cfgname = config_path.split('/')[-1].split('.')[0] cfg = importlib.import_module('config.' + cfgname) net = build_yolo(cfg) net_dict = net.state_dict() keys = list(net_dict.keys()) key_num, count, prev_filter = 0, 0, 3 print('loading the weights ...') for section in cfg_parser.sections(): if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) bn = 'batch_normalize' in cfg_parser[section] activation = cfg_parser[section]['activation'] # three special case if section == 'convolutional_20': prev_filter = 512 elif section == 'convolutional_21': prev_filter = 1280 elif section == 'convolutional_0': prev_filter = 3 else: prev_filter = weights_shape[0] weights_shape = (filters, prev_filter, size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if bn else ' ', activation, weights_shape) conv_bias = np.ndarray( shape=(filters,), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if bn: bn_weights = np.ndarray( shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters net_dict[keys[key_num + 1]].copy_(torch.from_numpy(bn_weights[0])) net_dict[keys[key_num + 2]].copy_(torch.from_numpy(conv_bias)) net_dict[keys[key_num + 3]].copy_(torch.from_numpy(bn_weights[1])) net_dict[keys[key_num + 4]].copy_(torch.from_numpy(bn_weights[2])) else: net_dict[keys[key_num + 1]].copy_(torch.from_numpy(conv_bias)) # conv parameter conv_weights = np.ndarray( shape=weights_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size net_dict[keys[key_num]].copy_(torch.from_numpy(conv_weights)) key_num = key_num + 5 if bn else key_num + 1 else: continue # check the convert remaining_weights = len(weights_file.read()) // 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) # save the net.state_dict torch.save(net_dict, os.path.join(output_path, cfgname + '.pth'))
score = y[0, i, j, 0].item() label_name = labelmap[i] pt = (y[0, i, j, 1:] * scale).cpu().numpy() coords = (pt[0], pt[1], pt[2], pt[3]) pred_num += 1 with open(filename, mode='a') as f: f.write( str(pred_num) + ' label: ' + ' || '.join('{:4.1f}'.format(c) for c in coords) + ' || ' + label_name + ' || ' + '{:4.2f}'.format(score) + '\n') j += 1 if __name__ == '__main__': file = get_output_dir(cfg.output_folder, 'test') filename = file + '/test.txt' open(filename, 'w').close() # clean the txt file # load network net = build_yolo('test') net.load_state_dict(torch.load(cfg.trained_model)) net.eval() print('Finished loading model !') if cfg.test_cuda: net = net.cuda() # load dataset testset = VOCDetection(cfg.voc_root, [('2007', 'test')], None, AnnotationTransform()) transform = BaseTransform(size=416, mean=(0, 0, 0), scale=True) test(net, testset, filename, transform)
scores = dets[:, 0].cpu().numpy() cls_dets = np.c_[boxes.cpu().numpy(), scores] for j in range(cls_dets.shape[0]): with open(filename, mode='a') as f: f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( idx, cls_dets[j, 4], cls_dets[j, 0], cls_dets[j, 1], cls_dets[j, 2], cls_dets[j, 3])) with open(files[k], mode='a') as f: f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( idx, cls_dets[j, 4], cls_dets[j, 0], cls_dets[j, 1], cls_dets[j, 2], cls_dets[j, 3])) if __name__ == '__main__': # load net net = build_yolo('test', cfg, eval=True) net.load_state_dict(torch.load(cfg.trained_model)) net.eval() if cfg.test_cuda: net = net.cuda() # load dataset if cfg.use_office: mean = (0, 0, 0) else: mean = (104, 117, 123) if use_cv2 else (123, 117, 104) transform = BaseTransform(size=416, mean=mean, scale=True) dataset = VOCDetection(cfg.voc_root, [('2007', 'test')], transform, AnnotationTransform()) # save file output_dir = get_output_dir(cfg.output_folder, 'eval') det_file = os.path.join(output_dir, 'detections.pkl')