def get_model(paths, feature_type): if feature_type == 'vgg': feature_network = roi_feature_model.Vgg16( num_classes=len(metadata.action_classes)) elif feature_type == 'resnet': feature_network = roi_feature_model.Resnet152( num_classes=len(metadata.action_classes)) elif feature_type == 'densenet': feature_network = roi_feature_model.Densenet( num_classes=len(metadata.action_classes)) else: raise ValueError('feature type not recognized') if feature_type.startswith('alexnet') or feature_type.startswith('vgg'): feature_network.features = torch.nn.DataParallel( feature_network.features) feature_network.cuda() else: feature_network = torch.nn.DataParallel(feature_network).cuda() checkpoint_dir = os.path.join(paths.tmp_root, 'checkpoints', 'hico', 'finetune_{}'.format(feature_type)) best_model_file = os.path.join(checkpoint_dir, 'model_best.pth') checkpoint = torch.load(best_model_file) feature_network.load_state_dict(checkpoint['state_dict']) return feature_network
def extract_node_features(paths,mode): input_h, input_w=224,224 node_feature_len=1000 transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize(metadata.train_mean_value,metadata.train_std_value)]) feature_network = roi_feature_model.Resnet152(num_classes=len(metadata.node_classes)) feature_network = torch.nn.DataParallel(feature_network).cuda() # get the processed annotation and corresponding original image ant_files=[f for f in sorted(listdir(os.path.join(paths.data_root,mode,'ant_processed'))) if f.endswith('_ant_all.npy')] #for ant_file_ind in [3]: for ant_file_ind in range(len(ant_files)): ant_f=ant_files[ant_file_ind] vid=ant_f[4:-12] # if os.path.isfile(os.path.join(paths.data_root, mode, 'node_feature_1000', 'vid_{}_resnet_node_feature.npy'.format(vid))): # continue print('node feature vid {}'.format(vid)) ant_all=np.load(os.path.join(paths.data_root,mode,'ant_processed',ant_f)) frame_num=len(ant_all) node_feature_all=list() for frame_ind in range(frame_num): ant=ant_all[frame_ind] orig_img=scipy.misc.imread(os.path.join(paths.data_root,mode,'img',vid,'{}.png'.format(str(frame_ind+1).zfill(5))),mode='RGB') node_feature_tmp=np.zeros((len(ant),node_feature_len)) for ant_ind in range(len(ant)): pos=ant[ant_ind]['pos'] roi_img=orig_img[int(pos[1]):(int(pos[3])+1), int(pos[0]):(int(pos[2])+1), :] # fig, ax = plt.subplots(1) # ax.imshow(roi_img) # # plt.show() roi_img=transform(cv2.resize(roi_img,(input_h,input_w),interpolation=cv2.INTER_LINEAR)) roi_img=torch.autograd.Variable(roi_img.unsqueeze(0)).cuda() feature,_=feature_network(roi_img) #node_feature_tmp.append(feature.data.cpu().numpy()) node_feature_tmp[ant_ind,:]=feature.data.cpu().numpy() node_feature_all.append(node_feature_tmp) np.save(os.path.join(paths.data_root, mode, 'node_feature_1000', 'vid_{}_resnet_node_feature'.format(vid)), node_feature_all)
def extract_edge_features(paths,mode): input_h, input_w = 224, 224 edge_feature_len = 1000 transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(metadata.train_mean_value, metadata.train_std_value)]) # get the finetuned feature network ready! feature_network = roi_feature_model.Resnet152(num_classes=len(metadata.node_classes)) feature_network = torch.nn.DataParallel(feature_network).cuda() # checkpoint_dir = os.path.join(paths.tmp_root, 'checkpoints', 'finetune_resnet') # if not os.path.exists(checkpoint_dir): # os.makedirs(checkpoint_dir) # # best_model_file = os.path.join(checkpoint_dir, 'model_best.pth') # # if os.path.isfile(best_model_file): # checkpoint = torch.load(best_model_file) # feature_network.load_state_dict(checkpoint['state_dict']) # print "Loading trained model successfully!" # get the processed annotation and corresponding original image ant_files = [f for f in sorted(listdir(os.path.join(paths.data_root, mode, 'ant_processed'))) if f.endswith('_ant_all.npy')] #for ant_file_ind in [3]: for ant_file_ind in range(len(ant_files)): ant_f = ant_files[ant_file_ind] vid = ant_f[4:-12] # if os.path.isfile(os.path.join(paths.data_root, mode, 'edge_feature_1000', 'vid_{}_resnet_edge_feature.npy'.format(vid))): # # continue print('edge feature vid {}'.format(vid)) ant_all = np.load(os.path.join(paths.data_root, mode, 'ant_processed', ant_f)) frame_num = len(ant_all) edge_feature_all = list() for frame_ind in range(frame_num): ant = ant_all[frame_ind] # get the human node amount and object node amount in the current frame human_num=0 obj_num=0 for i in range(len(ant)): if ant[i]['label'].startswith('Person'): human_num+=1 elif ant[i]['label'].startswith('Object'): obj_num+=1 orig_img = scipy.misc.imread( os.path.join(paths.data_root, mode, 'img', vid, '{}.png'.format(str(frame_ind + 1).zfill(5))), mode='RGB') edge_feature_tmp_per_frame=np.zeros((human_num+obj_num,human_num+obj_num,edge_feature_len)) #edge_feature_tmp_per_frame = list() for ant_ind1 in range(human_num): #edge_feature_tmp_per_person=list() for ant_ind2 in range(human_num+obj_num): if ant_ind2==ant_ind1: continue pos1=ant[ant_ind1]['pos'] pos2=ant[ant_ind2]['pos'] min_xy=np.minimum([int(pos1[0]),int(pos1[1])],[int(pos2[0]),int(pos2[1])]) max_xy=np.maximum([int(pos1[2]),int(pos1[3])],[int(pos2[2]),int(pos2[3])]) scipy.misc.imshow(orig_img[min_xy[1]:(max_xy[1]+1),min_xy[0]:(max_xy[0]+1),:]) roi_img = orig_img[min_xy[1]:(max_xy[1]+1),min_xy[0]:(max_xy[0]+1),:] pos1_x_center=(int(pos1[0])+int(pos1[2]))*1.0/2 pos2_x_center=(int(pos2[0])+int(pos2[2]))*1.0/2 if pos1_x_center>pos2_x_center: roi_img=np.fliplr(roi_img) scipy.misc.imshow(roi_img) roi_img = transform(cv2.resize(roi_img, (input_h, input_w), interpolation=cv2.INTER_LINEAR)) roi_img = torch.autograd.Variable(roi_img.unsqueeze(0)).cuda() feature, _ = feature_network(roi_img) edge_feature_tmp_per_frame[ant_ind1, ant_ind2,:]=feature.data.cpu().numpy() #edge_feature_tmp_per_person.append(feature.data.cpu().numpy()) #edge_feature_tmp_per_frame.append(edge_feature_tmp_per_person) edge_feature_all.append(edge_feature_tmp_per_frame) np.save(os.path.join(paths.data_root, mode, 'edge_feature_1000', 'vid_{}_resnet_edge_feature'.format(vid)), edge_feature_all)
def main(args): best_prec1 = 0.0 args.distributed = args.world_size > 1 if args.distributed: torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size) # create model if args.feature_type == 'vgg': model = roi_feature_model.Vgg16( num_classes=len(metadata.action_classes)) elif args.feature_type == 'resnet': model = roi_feature_model.Resnet152( num_classes=len(metadata.action_classes)) elif args.feature_type == 'densenet': model = roi_feature_model.Densenet( num_classes=len(metadata.action_classes)) input_imsize = (224, 224) if not args.distributed: if args.feature_type.startswith( 'alexnet') or args.feature_type.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() else: model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # optionally resume from a checkpoint if args.resume: if os.path.isfile(os.path.join(args.resume, 'model_best.pth')): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(os.path.join(args.resume, 'model_best.pth')) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format( os.path.join(args.resume, 'model_best.pth'))) torch.backends.cudnn.benchmark = True # Data loading code normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), normalize, ]) train_dataset = roi_feature_model.HICO(args.data, input_imsize, transform, 'train') #val_dataset = roi_feature_model.HICO(args.data, input_imsize, transform, 'val') test_dataset = roi_feature_model.HICO(args.data, input_imsize, transform, 'test') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) if epoch == 0 or epoch >= 5: # evaluate on validation set prec1 = validate(test_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) print('Best precision: {:.03f}'.format(best_prec1)) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.feature_type, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best) test_prec = validate(test_loader, model, criterion, test=True) print('Testing precision: {:.04f}'.format(test_prec))