def _extract_features(db_name, db_path, feat_type, output_path, kwargs_str=None): # read dataset and partitions logger.info("Reading dataset and split") db = dbac_data.IDataset.factory(db_name, db_path) # set up feature extractor function logger.info("Configuring Features Extractor") # read kwargs kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str) logger.info("Kwargs dictionary: {}".format(kwargs_dic)) feat_extractor = dbac_feature_ext.IFeatureExtractor.factory( feat_type, **kwargs_dic) feat_extractor.load() # compute features logger.info("Computing features...") feat_dic = dict() for idx, image_path in enumerate(db.images_path): feat = feat_extractor.compute(image_path) feat_dic[image_path] = feat if idx % 1000 == 0: logger.info("Cached features for {}/{} images.".format( idx, len(db.images_path))) # save dictionary of features np.save(output_path, feat_dic) logger.info( "Dictionary of cached features saved to {}.".format(output_path))
def _learn_primitives(db_name, db_dir, split_file, prim_rpr_file, ex_size=10, num_ex=10, subset_prim_ids=None, kwargs_str=None): # processing kwargs kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str) logger.info("Kwargs dictionary: {}".format(kwargs_dic)) # read dataset and partitions logger.info("Reading dataset and split") db = dbac_data.IDataset.factory(db_name, db_dir) db.load_split(split_file) train_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] train_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] # select subset of primitives if subset_prim_ids is None: subset_prim_ids = np.where(db.valid_primitives)[0].tolist() logger.info("Selected Primitives: {}".format(subset_prim_ids)) # set up feature extractor function logger.info("Configuring Features Extractor") feat_extractor = dbac_feature_ext.IFeatureExtractor.factory( dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic) feat_extractor.load() # Learning exemplar SVMS for primitives prims = dbac_primitives.IPrimitiveCollection.factory( dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic) logger.info("Learning Primitives...") prims.learn(train_imgs_path, train_labels, feat_extractor, num_ex=num_ex, ex_size=ex_size, prim_ids=subset_prim_ids, **kwargs_dic) prims.save(prim_rpr_file) logger.info("Primitives saved to {}.".format(prim_rpr_file))
def _train(db_name, db_dir, db_split_file, db_comb_file, primitives_file, model_name, output_dir, kwargs_str=None): # processing kwargs kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str) logger.info("Kwargs dictionary: {}".format(kwargs_dic)) # read dataset and partitions logger.info("Reading dataset and split") db = dbac_data.IDataset.factory(db_name, db_dir) db.load_split(db_split_file, db_comb_file) train_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] train_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] if db_comb_file: logger.info("Loading compositions...") train_exps = db.combinations[db.combinations_split == dbac_data.DB_COMB_SPLITS.index('train')] else: logger.info("Loading single expressions...") train_exps = db.expressions[db.expressions_split == dbac_data.DB_EXP_SPLITS.index('train')] # Set up feature extractor logger.info("Configuring Features Extractor") feat_extractor = dbac_feature_ext.IFeatureExtractor.factory(dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic) feat_extractor.load() # set up primitive collection logger.info("Configuring Primitive Collection") prim_collection = dbac_primitives.IPrimitiveCollection.factory(dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic) prim_collection.load(primitives_file) # setup model logger.info("Configuring Model") model = dbac_model.IModel.factory(model_name, feat_extractor, prim_collection, **kwargs_dic, is_train=True) logger.info("Training...") model.learning(train_imgs_path, train_labels, train_exps, **kwargs_dic) model_file = os.path.join(output_dir, 'model.npy') model.save(model_file) logger.info("Model Saved to {}".format(model_file))
def _test(db_name, db_dir, db_split_file, db_comb_file, primitives_file, model_name, model_file, output_dir, kwargs_str=None): # processing kwargs kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str) logger.info("Kwargs dictionary: {}".format(kwargs_dic)) # read dataset and partitions logger.info("Reading dataset and split") db = dbac_data.IDataset.factory(db_name, db_dir) db.load_split(db_split_file, db_comb_file) train_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] train_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] val_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('val')] val_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('val')] test_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('test')] test_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('test')] if db_comb_file: logger.info("Loading compositions...") train_exps = db.combinations[db.combinations_split == dbac_data.DB_COMB_SPLITS.index('train')] test_exps = db.combinations[db.combinations_split == dbac_data.DB_COMB_SPLITS.index('test')] else: logger.info("Loading single expressions...") train_exps = db.expressions[db.expressions_split == dbac_data.DB_EXP_SPLITS.index('train')] test_exps = db.expressions[db.expressions_split == dbac_data.DB_EXP_SPLITS.index('test')] # Set up feature extractor logger.info("Configuring Features Extractor") feat_extractor = dbac_feature_ext.IFeatureExtractor.factory(dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic) feat_extractor.load() # set up primitive collection logger.info("Configuring Primitive Collection") prim_collection = dbac_primitives.IPrimitiveCollection.factory(dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic) prim_collection.load(primitives_file) # setup model logger.info("Configuring Model") model = dbac_model.IModel.factory(model_name, feat_extractor, prim_collection, **kwargs_dic, is_train=False) model.load(model_file) # test model logger.info("Testing on seen expressions on training images...") train_scores = model.score(train_imgs_path, train_exps, **kwargs_dic) logger.info("Testing on seen expressions on validation images...") val_scores = model.score(val_imgs_path, train_exps, **kwargs_dic) logger.info("Testing on unseen expressions on test images...") test_scores = model.score(test_imgs_path, test_exps, **kwargs_dic) # save results logger.info("Computing results.") report_dic = dict() results_iter = zip(['train', 'val', 'test'], [train_exps, train_exps, test_exps], [train_labels, val_labels, test_labels], [train_scores, val_scores, test_scores], [train_imgs_path, val_imgs_path, test_imgs_path]) for key, exps, labels, scores, images in results_iter: # compute ground truth labels ground_truth = np.zeros_like(scores) for idx, exp_lst in enumerate(exps): exp_tree = dbac_expression.list2exp_parse(exp_lst) var_dic = {p: labels[:, int(p)] for p in dbac_expression.get_vars(exp_tree)} ground_truth[idx] = dbac_expression.eval_exp(exp_tree, var_dic) # fill report dictionary report_dic['_'.join([key, 'exps'])] = exps report_dic['_'.join([key, 'imgs'])] = images report_dic['_'.join([key, 'gt'])] = ground_truth report_dic['_'.join([key, 'pred'])] = scores result_file = os.path.join(output_dir, 'results.npy') np.save(result_file, report_dic) logger.info("Results file saved to {}.".format(result_file))
def _test_primitives(db_name, db_dir, split_file, prim_rpr_file, subset_prim_ids=None, kwargs_str=None): # processing kwargs kwargs_dic = dbac_util.get_kwargs_dic(kwargs_str) logger.info("Kwargs dictionary: {}".format(kwargs_dic)) # read dataset and partitions logger.info("Reading dataset and split") db = dbac_data.IDataset.factory(db_name, db_dir) db.load_split(split_file) train_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] train_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('train')] test_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('test')] test_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index( 'test')] val_imgs_path = db.images_path[db.images_split == dbac_data.DB_IMAGE_SPLITS.index('val')] val_labels = db.labels[db.images_split == dbac_data.DB_IMAGE_SPLITS.index( 'val')] # set up feature extractor function logger.info("Configuring Features Extractor") feat_extractor = dbac_feature_ext.IFeatureExtractor.factory( dbac_feature_ext.FEAT_TYPE[1], **kwargs_dic) feat_extractor.load() # Learning exemplar SVMS for primitives prims = dbac_primitives.IPrimitiveCollection.factory( dbac_primitives.PRIMITIVE_TYPES[0], **kwargs_dic) logger.info("Loading Primitive collection") prims.load(prim_rpr_file) # select subset of primitives if subset_prim_ids is None: subset_prim_ids = prims.get_ids() else: subset_prim_ids = list( set(subset_prim_ids).intersection(set(prims.get_ids()))) logger.info("Selected Primitives: {}".format(subset_prim_ids)) # test primitives report_dic = dict() for key, images, labels in zip( ['train', 'val', 'test'], [train_imgs_path, val_imgs_path, test_imgs_path], [train_labels, val_labels, test_labels]): logger.info("Testing partition: {}".format(key)) images_feats = feat_extractor.compute(images) # considering uncalibrated scores #rprs = np.vstack([prims.get_rpr(pid)[0] for pid in subset_prim_ids]) #scores = rprs[:, 0].reshape((-1, 1)) + np.dot(rprs[:, 1:], images_feats.T) # considering calibrated scores scores = np.vstack([ prims.get_cls(pid)[0].predict_proba(images_feats)[:, 1] for pid in subset_prim_ids ]) # fill report dictionary assert scores.shape == labels[:, subset_prim_ids].T.shape report_dic['_'.join([key, 'exps'])] = subset_prim_ids report_dic['_'.join([key, 'imgs'])] = images report_dic['_'.join([key, 'gt'])] = labels[:, subset_prim_ids].T report_dic['_'.join([key, 'pred'])] = scores result_file = "{}.results.npy".format(os.path.splitext(prim_rpr_file)[0]) np.save(result_file, report_dic) logger.info("Results file saved to {}.".format(result_file))