Esempio n. 1
0
 def __init__(self, dataset='VCLA_GAZE'):
     self.paths_dict = {
         'WNP': wnp_config.Paths(),
         'VCLA_GAZE': vcla_gaze_config.Paths(),
         'CAD': cad_config.Paths(),
         'Breakfast': breakfast_config.Paths()
     }
     self.metadata_dict = {
         'WNP': WNP_METADATA(),
         'VCLA_GAZE': VCLA_METADATA(),
         'CAD': CAD_METADATA(),
         'Breakfast': BREAKFAST_METADATA()
     }
     self.dataset_dict = {
         'WNP':
         lambda path, mode, task, subsample: wnp.WNP(
             path, mode, task, subsample),
         'VCLA_GAZE':
         lambda path, mode, task, subsample: vcla_gaze.VCLA_GAZE(
             path, mode, task, subsample),
         'CAD':
         lambda path, mode, task, subsample: cad.CAD(
             path, mode, task, subsample),
         'Breakfast':
         lambda path, mode, task, subsample: breakfast.Breakfast(
             path, mode, task, subsample)
     }
     self.dataset = self.dataset_dict[dataset]
     self.paths = self.paths_dict[dataset]
     self.metadata = self.metadata_dict[dataset]
Esempio n. 2
0
def main():
    paths = vcla_gaze_config.Paths()
    start_time = time.time()
    with open(os.path.join(paths.tmp_root, 'image_list.p'), 'rb') as f:
        video_list = pickle.load(f)
    train_ratio = 0.1
    sequence_ids = np.random.permutation(video_list)
    sequence_ids = sequence_ids[:int(train_ratio * len(sequence_ids))]

    input_imsize = (224, 224)
    normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224, 0.225])
    transform = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        normalize,
    ])
    training_set = VCLA_GAZE_FEATURE(paths,
                                     sequence_ids,
                                     transform,
                                     input_imsize,
                                     'test',
                                     'activity',
                                     verbose=True)

    sequence_id, rgb_image, depth_image, aligned_image, activity, object_labels, \
                                            object_images, affordance, skeleton, object_pair = training_set[0]
    utils.visualize_bbox_rgb(sequence_id,
                             (rgb_image.permute(1, 2, 0), object_pair),
                             metadata.objects)
    utils.visualize_bbox_image(sequence_id, (object_labels, object_images),
                               metadata.objects)
    utils.visualize_skeleton_depth(sequence_id, (aligned_image, skeleton))
    print('Time elapsed: {}s'.format(time.time() - start_time))
    print(sequence_id)
Esempio n. 3
0
def main():
    paths = config.Paths()
    dataset = VCLA_GAZE(paths, 'train', 'affordance')
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True,
                                               num_workers=1, pin_memory=True)
    features_batch, labels_batch, activities, sequence_ids, total_lengths, obj_nums, ctc_labels, ctc_lengths, probs_batch, additional = dataset[0]

    print('Finished')
Esempio n. 4
0
def main():
    paths = config.Paths()
    learn_prior(paths)
Esempio n. 5
0
def main():
    paths = config.Paths()
    start_time = time.time()
    parse_data(paths)
    print('Time elapsed: {}'.format(time.time() - start_time))
Esempio n. 6
0
def main():
    paths = vcla_gaze_config.Paths()
    start_time = time.time()
    reformat_data(paths, verbose=False)
    tqdm.write('Time elapsed: {:.2f}s'.format(time.time() - start_time))
Esempio n. 7
0
def parse_args():
    def restricted_float(x, inter):
        x = float(x)
        if x < inter[0] or x > inter[1]:
            raise argparse.ArgumentTypeError("{} not in range [{}, {}]".format(
                x, inter[0], inter[1]))
        return x

    paths = vcla_gaze_config.Paths()
    model_name = 'resnet'
    tasks = ['affordance', 'activity']
    task = tasks[0]

    parser = argparse.ArgumentParser(description='VCLA feature extraction')
    parser.add_argument('--task',
                        default=task,
                        type=str,
                        help='Default task for network training')
    parser.add_argument(
        '--cuda',
        default=torch.cuda.is_available(),
        type=bool,
        help='Option flag for using cuda trining (default: True)')
    parser.add_argument(
        '--distributed',
        default=False,
        type=bool,
        help='Option flag for using distributed training (default: True)')
    parser.add_argument(
        '--model',
        default=model_name,
        type=str,
        help='model to use when extracting features (default: resnet)')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 1)')
    parser.add_argument('--start_epoch',
                        default=0,
                        type=int,
                        metavar='N',
                        help='starting epoch of training (default: 0)')
    parser.add_argument('--epochs',
                        default=10,
                        type=int,
                        metavar='N',
                        help='number of epochs for training (default: 100)')
    parser.add_argument('--batch_size',
                        default=16,
                        type=int,
                        metavar='N',
                        help='batch size for training (default: 16)')
    parser.add_argument(
        '--lr',
        default=1e-3,
        type=float,
        help='learning rate for the feature extraction process (default: 1e-3)'
    )
    parser.add_argument(
        '--lr_decay',
        type=lambda x: restricted_float(x, [0.01, 1]),
        help='decay rate of learning rate (default: between 0.01 and 1)')
    parser.add_argument('--log_interval',
                        type=int,
                        default=50,
                        metavar='N',
                        help='Intervals for logging (default: 10 batch)')
    parser.add_argument(
        '--save_interval',
        type=int,
        default=1,
        metavar='N',
        help='Intervals for saving checkpoint (default: 3 epochs)')

    parser.add_argument(
        '--train_ratio',
        type=float,
        default=0.6,
        help='ratio of data for training purposes (default: 0.65)')
    parser.add_argument(
        '--val_ratio',
        type=float,
        default=0.1,
        help='ratio of data for validation purposes (default: 0.1)')

    parser.add_argument(
        '--eval',
        default=False,
        type=bool,
        help='indicates whether need to run evaluation on testing set')
    parser.add_argument('--save',
                        default=False,
                        type=bool,
                        help='flag for saving likelihood')
    args = parser.parse_args()
    args.paths = paths
    args.save_path = os.path.join(paths.inter_root, 'finetune', args.task)
    args.resume = os.path.join(paths.checkpoint_root, 'finetune',
                               '{}'.format(model_name), '{}'.format(args.task))
    return args