예제 #1
0
def _extract_features(db_name,
                      db_path,
                      feat_type,
                      output_path,
                      kwargs_str=None):

    # read dataset and partitions
    logger.info("Reading dataset and split")
    db = dbac_data.IDataset.factory(db_name, db_path)

    # set up feature extractor function
    logger.info("Configuring Features Extractor")
    # read kwargs
    kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str)
    logger.info("Kwargs dictionary: {}".format(kwargs_dic))
    feat_extractor = dbac_feature_ext.IFeatureExtractor.factory(
        feat_type, **kwargs_dic)
    feat_extractor.load()

    # compute features
    logger.info("Computing features...")
    feat_dic = dict()
    for idx, image_path in enumerate(db.images_path):
        feat = feat_extractor.compute(image_path)
        feat_dic[image_path] = feat
        if idx % 1000 == 0:
            logger.info("Cached features for {}/{} images.".format(
                idx, len(db.images_path)))

    # save dictionary of features
    np.save(output_path, feat_dic)
    logger.info(
        "Dictionary of cached features saved to {}.".format(output_path))
def _learn_primitives(db_name,
                      db_dir,
                      split_file,
                      prim_rpr_file,
                      ex_size=10,
                      num_ex=10,
                      subset_prim_ids=None,
                      kwargs_str=None):
    # processing kwargs
    kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str)
    logger.info("Kwargs dictionary: {}".format(kwargs_dic))

    # read dataset and partitions
    logger.info("Reading dataset and split")
    db = dbac_data.IDataset.factory(db_name, db_dir)
    db.load_split(split_file)
    train_imgs_path = db.images_path[db.images_split ==
                                     dbac_data.DB_IMAGE_SPLITS.index('train')]
    train_labels = db.labels[db.images_split ==
                             dbac_data.DB_IMAGE_SPLITS.index('train')]

    # select subset of primitives
    if subset_prim_ids is None:
        subset_prim_ids = np.where(db.valid_primitives)[0].tolist()
    logger.info("Selected Primitives: {}".format(subset_prim_ids))

    # set up feature extractor function
    logger.info("Configuring Features Extractor")
    feat_extractor = dbac_feature_ext.IFeatureExtractor.factory(
        dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic)
    feat_extractor.load()

    # Learning exemplar SVMS for primitives
    prims = dbac_primitives.IPrimitiveCollection.factory(
        dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic)
    logger.info("Learning Primitives...")
    prims.learn(train_imgs_path,
                train_labels,
                feat_extractor,
                num_ex=num_ex,
                ex_size=ex_size,
                prim_ids=subset_prim_ids,
                **kwargs_dic)
    prims.save(prim_rpr_file)
    logger.info("Primitives saved to {}.".format(prim_rpr_file))
예제 #3
0
def _train(db_name, db_dir, db_split_file, db_comb_file, primitives_file, model_name, output_dir, kwargs_str=None):

    # processing kwargs
    kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str)
    logger.info("Kwargs dictionary: {}".format(kwargs_dic))

    # read dataset and partitions
    logger.info("Reading dataset and split")
    db = dbac_data.IDataset.factory(db_name, db_dir)
    db.load_split(db_split_file, db_comb_file)
    train_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')]
    train_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')]

    if db_comb_file:
        logger.info("Loading compositions...")
        train_exps = db.combinations[db.combinations_split == dbac_data.DB_COMB_SPLITS.index('train')]
    else:
        logger.info("Loading single expressions...")
        train_exps = db.expressions[db.expressions_split == dbac_data.DB_EXP_SPLITS.index('train')]

    # Set up feature extractor
    logger.info("Configuring Features Extractor")
    feat_extractor = dbac_feature_ext.IFeatureExtractor.factory(dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic)
    feat_extractor.load()

    # set up primitive collection
    logger.info("Configuring Primitive Collection")
    prim_collection = dbac_primitives.IPrimitiveCollection.factory(dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic)
    prim_collection.load(primitives_file)

    # setup model
    logger.info("Configuring Model")
    model = dbac_model.IModel.factory(model_name, feat_extractor, prim_collection, **kwargs_dic, is_train=True)
    logger.info("Training...")
    model.learning(train_imgs_path, train_labels, train_exps, **kwargs_dic)
    model_file = os.path.join(output_dir, 'model.npy')
    model.save(model_file)
    logger.info("Model Saved to {}".format(model_file))
def _test(db_name, db_dir, db_split_file, db_comb_file, primitives_file, model_name, model_file, output_dir, kwargs_str=None):
    # processing kwargs
    kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str)
    logger.info("Kwargs dictionary: {}".format(kwargs_dic))

    # read dataset and partitions
    logger.info("Reading dataset and split")
    db = dbac_data.IDataset.factory(db_name, db_dir)
    db.load_split(db_split_file, db_comb_file)
    train_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')]
    train_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')]
    val_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('val')]
    val_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('val')]
    test_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('test')]
    test_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('test')]

    if db_comb_file:
        logger.info("Loading compositions...")
        train_exps = db.combinations[db.combinations_split == dbac_data.DB_COMB_SPLITS.index('train')]
        test_exps = db.combinations[db.combinations_split == dbac_data.DB_COMB_SPLITS.index('test')]
    else:
        logger.info("Loading single expressions...")
        train_exps = db.expressions[db.expressions_split == dbac_data.DB_EXP_SPLITS.index('train')]
        test_exps = db.expressions[db.expressions_split == dbac_data.DB_EXP_SPLITS.index('test')]

    # Set up feature extractor
    logger.info("Configuring Features Extractor")
    feat_extractor = dbac_feature_ext.IFeatureExtractor.factory(dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic)
    feat_extractor.load()

    # set up primitive collection
    logger.info("Configuring Primitive Collection")
    prim_collection = dbac_primitives.IPrimitiveCollection.factory(dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic)
    prim_collection.load(primitives_file)

    # setup model
    logger.info("Configuring Model")
    model = dbac_model.IModel.factory(model_name, feat_extractor, prim_collection, **kwargs_dic, is_train=False)
    model.load(model_file)

    # test model
    logger.info("Testing on seen expressions on training images...")
    train_scores = model.score(train_imgs_path, train_exps, **kwargs_dic)
    logger.info("Testing on seen expressions on validation images...")
    val_scores = model.score(val_imgs_path, train_exps, **kwargs_dic)
    logger.info("Testing on unseen expressions on test images...")
    test_scores = model.score(test_imgs_path, test_exps, **kwargs_dic)

    # save results
    logger.info("Computing results.")
    report_dic = dict()
    results_iter = zip(['train', 'val', 'test'], [train_exps, train_exps, test_exps],
                       [train_labels, val_labels, test_labels], [train_scores, val_scores, test_scores],
                       [train_imgs_path, val_imgs_path, test_imgs_path])
    for key, exps, labels, scores, images in results_iter:
        # compute ground truth labels
        ground_truth = np.zeros_like(scores)
        for idx, exp_lst in enumerate(exps):
            exp_tree = dbac_expression.list2exp_parse(exp_lst)
            var_dic = {p: labels[:, int(p)] for p in dbac_expression.get_vars(exp_tree)}
            ground_truth[idx] = dbac_expression.eval_exp(exp_tree, var_dic)
        # fill report dictionary
        report_dic['_'.join([key, 'exps'])] = exps
        report_dic['_'.join([key, 'imgs'])] = images
        report_dic['_'.join([key, 'gt'])] = ground_truth
        report_dic['_'.join([key, 'pred'])] = scores
    result_file = os.path.join(output_dir, 'results.npy')
    np.save(result_file, report_dic)
    logger.info("Results file saved to {}.".format(result_file))
def _test_primitives(db_name,
                     db_dir,
                     split_file,
                     prim_rpr_file,
                     subset_prim_ids=None,
                     kwargs_str=None):
    # processing kwargs
    kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str)
    logger.info("Kwargs dictionary: {}".format(kwargs_dic))

    # read dataset and partitions
    logger.info("Reading dataset and split")
    db = dbac_data.IDataset.factory(db_name, db_dir)
    db.load_split(split_file)
    train_imgs_path = db.images_path[db.images_split ==
                                     dbac_data.DB_IMAGE_SPLITS.index('train')]
    train_labels = db.labels[db.images_split ==
                             dbac_data.DB_IMAGE_SPLITS.index('train')]
    test_imgs_path = db.images_path[db.images_split ==
                                    dbac_data.DB_IMAGE_SPLITS.index('test')]
    test_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index(
        'test')]
    val_imgs_path = db.images_path[db.images_split ==
                                   dbac_data.DB_IMAGE_SPLITS.index('val')]
    val_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index(
        'val')]

    # set up feature extractor function
    logger.info("Configuring Features Extractor")
    feat_extractor = dbac_feature_ext.IFeatureExtractor.factory(
        dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic)
    feat_extractor.load()

    # Learning exemplar SVMS for primitives
    prims = dbac_primitives.IPrimitiveCollection.factory(
        dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic)
    logger.info("Loading Primitive collection")
    prims.load(prim_rpr_file)

    # select subset of primitives
    if subset_prim_ids is None:
        subset_prim_ids = prims.get_ids()
    else:
        subset_prim_ids = list(
            set(subset_prim_ids).intersection(set(prims.get_ids())))
    logger.info("Selected Primitives: {}".format(subset_prim_ids))

    # test primitives
    report_dic = dict()
    for key, images, labels in zip(
        ['train', 'val', 'test'],
        [train_imgs_path, val_imgs_path, test_imgs_path],
        [train_labels, val_labels, test_labels]):
        logger.info("Testing partition: {}".format(key))
        images_feats = feat_extractor.compute(images)
        # considering uncalibrated scores
        #rprs = np.vstack([prims.get_rpr(pid)[0] for pid in subset_prim_ids])
        #scores = rprs[:, 0].reshape((-1, 1)) + np.dot(rprs[:, 1:], images_feats.T)
        # considering calibrated scores
        scores = np.vstack([
            prims.get_cls(pid)[0].predict_proba(images_feats)[:, 1]
            for pid in subset_prim_ids
        ])
        # fill report dictionary
        assert scores.shape == labels[:, subset_prim_ids].T.shape
        report_dic['_'.join([key, 'exps'])] = subset_prim_ids
        report_dic['_'.join([key, 'imgs'])] = images
        report_dic['_'.join([key, 'gt'])] = labels[:, subset_prim_ids].T
        report_dic['_'.join([key, 'pred'])] = scores

    result_file = "{}.results.npy".format(os.path.splitext(prim_rpr_file)[0])
    np.save(result_file, report_dic)
    logger.info("Results file saved to {}.".format(result_file))