def main(force):
    cub_head = CUB_200_2011(settings.CUB_ROOT, images_folder_name='images_b_head')
    cub_body = CUB_200_2011(settings.CUB_ROOT, images_folder_name='images_b_body')
    cub_crop = CUB_200_2011(settings.CUB_ROOT, images_folder_name='images_b_cropped')

    st_head = datastore(settings.storage('bmbh'))
    st_body = datastore(settings.storage('bmbb'))
    st_crop = datastore(settings.storage('bmbcflp'))

    ext_head = Berkeley_Extractor(st_head, pretrained_file=settings.BERKELEY_HEAD_PRET)
    ext_body = Berkeley_Extractor(st_body, pretrained_file=settings.BERKELEY_BODY_PRET)
    ext_crop = Berkeley_Extractor(st_crop, pretrained_file=settings.BERKELEY_CROP_PRET)

    number_of_images_in_dataset = sum(1 for _ in cub_crop.get_all_images())

    bar = pyprind.ProgBar(number_of_images_in_dataset, width=80)
    for t, des in ext_crop.extract_all(cub_crop.get_all_images(), flip=True, force=force):
        bar.update()
    print 'DONE CROP'

    bar = pyprind.ProgBar(number_of_images_in_dataset, width=80)
    for t, des in ext_head.extract_all(cub_head.get_all_images(), force=force):
        bar.update()
    print 'DONE HEAD'

    bar = pyprind.ProgBar(number_of_images_in_dataset, width=80)
    for t, des in ext_body.extract_all(cub_body.get_all_images(), force=force):
        bar.update()
    print 'DONE BODY'
Esempio n. 2
0
def main(out_path, part, random_state, pgs, net_name):
    utils.ensure_dir(out_path)

    cub = CUB_200_2011(settings.CUB_ROOT)
    lfrg = rects.BerkeleyRG(settings.BERKELEY_ANNOTATION_BASE_PATH, cub, part)
    RG = rects.RandomForestRG(datastore(settings.storage('rf')),
                              lfrg,
                              cub_utils.DeepHelper.get_custom_net(
                                  settings.model(net_name),
                                  settings.pretrained(net_name)),
                              net_name,
                              cub,
                              random_state=random_state,
                              point_gen_strategy=pgs,
                              use_seg=True,
                              pt_n_part=20,
                              pt_n_bg=100)
    RG.setup()

    for i, image in enumerate(cub.get_all_images()):
        print i
        image_path = image['img_file']
        img_id = int(image['img_id'])
        rel_image_path = image['img_file_rel']

        o_image = cv2.imread(image_path)
        rect = RG.generate(img_id)
        t_img_part = rect.get_rect(o_image)

        out_image_path = os.path.join(out_path, rel_image_path)
        utils.ensure_dir(os.path.dirname(out_image_path))
        cv2.imwrite(out_image_path, t_img_part)
    print 'Done'
Esempio n. 3
0
def main(sname, iteration, cropped, full, flipped, force, dataset,
         storage_name):
    new_name = '%s-%d' % (sname, iteration)
    if dataset == 'segmented':
        cub = CUB_200_2011_Segmented(settings.CUB_ROOT, full=full)
    elif dataset == 'part-head':
        cub = CUB_200_2011_Parts_Head(settings.CUB_ROOT, full=full)
    elif dataset == 'part-body':
        cub = CUB_200_2011_Parts_Body(settings.CUB_ROOT, full=full)
    elif dataset == 'part-head-rf-new':
        cub = CUB_200_2011(settings.CUB_ROOT, 'images_head_rf_new')
    elif dataset == 'part-body-rf-new':
        cub = CUB_200_2011(settings.CUB_ROOT, 'images_body_rf_new')
    else:
        cub = CUB_200_2011(settings.CUB_ROOT,
                           images_folder_name=dataset,
                           full=full)
    if not storage_name:
        ft_storage = datastore(settings.storage(new_name))
    else:
        ft_storage = datastore(settings.storage(storage_name))
    ft_extractor = CNN_Features_CAFFE_REFERENCE(
        ft_storage,
        model_file=settings.model(new_name),
        pretrained_file=settings.pretrained(new_name),
        full=full,
        crop_index=0)
    number_of_images_in_dataset = sum(1 for _ in cub.get_all_images())
    bar = pyprind.ProgBar(number_of_images_in_dataset, width=80)
    for t, des in ft_extractor.extract_all(cub.get_all_images(),
                                           flip=flipped,
                                           crop=cropped,
                                           bbox=cub.get_bbox(),
                                           force=force):
        bar.update()
    print 'DONE'
Esempio n. 4
0
def main(sname, svm_c, segmented):
    if segmented:
        cub = CUB_200_2011_Segmented(settings.CUB_ROOT)
    else:
        cub = CUB_200_2011(settings.CUB_ROOT)
    ft_storage = datastore(settings.storage(sname))
    ft_extractor = CNN_Features_CAFFE_REFERENCE(ft_storage, make_net=False)

    Xtrain, ytrain, Xtest, ytest = cub.get_train_test(ft_extractor.extract_one)
    model = svm.LinearSVC(C=svm_c)
    model.fit(Xtrain, ytrain)
    predictions = model.predict(Xtest)

    print 'accuracy', accuracy_score(ytest, predictions)
    print 'mean accuracy', utils.mean_accuracy(ytest, predictions)

    pred_storage = datastore(settings.PREDICTIONS_BASE, global_key='preds')
    storage_path = pred_storage.get_instance_path('preds', sname,
                                                  '%s.mat' % sname)
    pred_storage.ensure_dir(os.path.dirname(storage_path))
    pred_storage.save_instance(storage_path, predictions)
Esempio n. 5
0
def main(model_name, iteration, storage_name):
    name = '%s-%s' % (model_name, iteration)
    print settings.model(name), settings.pretrained(name)

    safe = datastore(settings.storage(storage_name))
    safe.super_name = 'features'
    safe.sub_name = name

    layer_names = ['fc7', 'fc6', 'pool5', 'conv5', 'conv4', 'conv3']
    layer_dims = [4096, 4096, 9216, 43264, 64896, 64896]

    net = caffe.Classifier(settings.model(name),
                           settings.pretrained(name),
                           mean=np.load(settings.ILSVRC_MEAN),
                           channel_swap=(2, 1, 0),
                           raw_scale=255)
    net.set_mode_gpu()
    net.set_phase_test()

    cub = CUB_200_2011(settings.CUB_ROOT)

    dataset_size = sum(1 for _ in cub.get_all_images())

    instance = {}
    for layer, dim in zip(layer_names, layer_dims):
        instance[layer] = np.zeros((dataset_size, dim))
        print instance[layer].shape

    for i, info in enumerate(cub.get_all_images(cropped=True)):
        print info['img_id']
        img = caffe.io.load_image(info['img_file'])
        net.predict([img], oversample=False)
        for layer in layer_names:
            instance[layer][i, :] = net.blobs[layer].data[0].flatten()

    for layer in layer_names:
        safe.save_large_instance(
            safe.get_instance_path(safe.super_name, safe.sub_name,
                                   'feat_cache_%s' % layer), instance[layer],
            4)
def main():
    cub = CUB_200_2011(settings.CUB_ROOT)
    cub_images = cub.get_all_images()
    IDtrain, IDtest = cub.get_train_test_id()
    bah = cub_utils.BerkeleyAnnotationsHelper(
        settings.BERKELEY_ANNOTATION_BASE_PATH, IDtrain, IDtest)
    for i, image in enumerate(cub_images):
        image_path = image['img_file']
        image_id = int(image['img_id'])

        rel_image_path = image_path[len(settings.CUB_IMAGES_FOLDER):]
        o_image = cv2.imread(image_path)

        head_info = bah.get_berkeley_annotation(image_id, 'head')
        body_info = bah.get_berkeley_annotation(image_id, 'body')
        crop_info = bah.get_berkeley_annotation(image_id, 'bbox')

        if -1 in head_info:
            print 'NO HEAD \t IMG-ID: %d' % image_id
        else:
            head_image = utils.get_rect(o_image, head_info)
            head_out_path = os.path.join(b_head_folder, rel_image_path)
            utils.ensure_dir(os.path.dirname(head_out_path))
            cv2.imwrite(head_out_path, head_image)

        if -1 in body_info:
            print 'NO BODY \t IMG-ID: %d' % image_id
        else:
            body_image = utils.get_rect(o_image, body_info)
            body_out_path = os.path.join(b_body_folder, rel_image_path)
            utils.ensure_dir(os.path.dirname(body_out_path))
            cv2.imwrite(body_out_path, body_image)

        if -1 in crop_info:
            print 'NO CROP \t IMG-ID: %d' % image_id
        else:
            crop_image = utils.get_rect(o_image, crop_info)
            crop_out_path = os.path.join(b_crop_folder, rel_image_path)
            utils.ensure_dir(os.path.dirname(crop_out_path))
            cv2.imwrite(crop_out_path, crop_image)
Esempio n. 7
0
def main(out_path):
    cub = CUB_200_2011(settings.CUB_ROOT)
    cub_images = cub.get_all_images()
    cub_parts = cub.get_parts()
    for i, image in enumerate(cub_images):
        image_path = image['img_file']
        image_id = image['img_id']

        rel_image_path = image_path[len(settings.CUB_IMAGES_FOLDER):]
        o_image = cv2.imread(image_path)

        parts = cub_parts.for_image(image_id)
        p_parts = parts.filter_by_name(Parts.BODY_PART_NAMES)

        if len(p_parts) <= 2:
            print "#parts:%d \tID:%d \tName:%s" % (len(p_parts), int(image_id),
                                                   rel_image_path)

        part_image = p_parts.get_rect(o_image, alpha=0.6)

        out_image_path = os.path.join(out_path, rel_image_path)
        utils.ensure_dir(os.path.dirname(out_image_path))
        cv2.imwrite(out_image_path, part_image)
Esempio n. 8
0
import sys
import os

sys.path.append(os.path.dirname(os.path.dirname(__file__)))
import settings
from dataset import CUB_200_2011, CUB_200_2011_Parts_Head, CUB_200_2011_Parts_Body
from storage import datastore
from deep_extractor import CNN_Features_CAFFE_REFERENCE
import pyprind

cub = CUB_200_2011(settings.CUB_ROOT, full=False)
cub_head = CUB_200_2011_Parts_Head(settings.CUB_ROOT)
cub_body = CUB_200_2011_Parts_Body(settings.CUB_ROOT)

features_storage = datastore(settings.storage('ccr'))
feature_extractor = CNN_Features_CAFFE_REFERENCE(features_storage, full=False)

features_storage_ft = datastore(settings.storage('ccrft'))
feature_extractor_ft = CNN_Features_CAFFE_REFERENCE(features_storage_ft,
                                                    full=True)

features_storage_flipped = datastore(settings.storage('ccf'))
feature_extractor_flipped = CNN_Features_CAFFE_REFERENCE(
    features_storage_flipped, full=False)

features_storage_flipped_ft = datastore(settings.storage('ccfft'))
feature_extractor_flipped_ft = CNN_Features_CAFFE_REFERENCE(
    features_storage_flipped_ft, full=True)

features_storage_cropped = datastore(settings.storage('ccc'))
feature_extractor_cropped = CNN_Features_CAFFE_REFERENCE(
Esempio n. 9
0
DO_TRAIN = True

data_folder = 'cub-part-head-rf-new-3'
DO_CROP = False

base_folder = '/home/ipl/installs/caffe-rc/data/%s/' % data_folder

utils.ensure_dir(base_folder)

fine_tune_test_file = '%s/test.txt' % base_folder
fine_tune_train_file = '%s/train.txt' % base_folder
fine_tune_val_file = '%s/val.txt' % base_folder
fine_tune_train_val_file = '%s/trainval.txt' % base_folder

# cub = CUB_200_2011(settings.CUB_ROOT)
cub = CUB_200_2011(settings.CUB_ROOT, 'images_head_rf_def_unif')
class_dict = cub.get_class_dict()

IDtrain, IDtest = cub.get_train_test_id()

if DO_TEST:
    test_file = open(fine_tune_test_file, 'w')
    all_images = cub.get_all_images(cropped=DO_CROP)
    for img_inf in all_images:
        img_id = img_inf['img_id']
        img_file = img_inf['img_file']
        if int(img_id) in IDtest:
            test_file.write("%s %s\n" %
                            (img_file, str(class_dict[img_id] - 1)))
    test_file.close()
Esempio n. 10
0
def main(storage_name, layer, model, iteration, normalize_feat, n_neighbors,
         parts, feat_layer, add_noise, to_oracle, noise_std_c, noise_std_d,
         augment_training, augmentation_fold, augmentation_noise):
    if len(parts) == 0:
        print 'no parts where needed'
        exit()

    name = '%s-%s' % (model, iteration)

    nn_storage_name = 'nn-parts'
    nn_storage = datastore(settings.storage(nn_storage_name))
    nn_storage.super_name = '%s_%s' % (storage_name, name)
    nn_storage.sub_name = layer
    nn_storage.instance_name = 'norm_%s.mat' % str(normalize_feat)
    nn_storage.instance_path = nn_storage.get_instance_path(
        nn_storage.super_name, nn_storage.sub_name, nn_storage.instance_name)

    cub = CUB_200_2011(settings.CUB_ROOT)

    safe = datastore(settings.storage(storage_name))
    safe.super_name = 'features'
    safe.sub_name = name

    instance_path = safe.get_instance_path(safe.super_name, safe.sub_name,
                                           'feat_cache_%s' % layer)
    feat = safe.load_large_instance(instance_path, 4)

    # should we normalize the feats?
    if normalize_feat:
        # snippet from : http://stackoverflow.com/a/8904762/428321
        # I've went for l2 normalization.
        # row_sums = feat.sum(axis=1)
        row_norms = np.linalg.norm(feat, axis=1)
        new_feat = feat / row_norms[:, np.newaxis]
        feat = new_feat

    IDtrain, IDtest = cub.get_train_test_id()

    # the following line is not really a good idea. Only works for this dataset.
    Xtrain = feat[IDtrain - 1, :]
    Xtest = feat[IDtest - 1, :]

    print 'init load done'

    if not nn_storage.check_exists(nn_storage.instance_path):
        print 'calculating'
        # the actual NN search
        nn_model = sklearn.neighbors.NearestNeighbors(n_neighbors=n_neighbors,
                                                      algorithm='ball_tree',
                                                      metric='minkowski',
                                                      p=2)
        tic = time()
        nn_model.fit(Xtrain)
        toc = time() - tic
        print 'fitted in: ', toc

        tic = time()
        NNS = nn_model.kneighbors(Xtest, 1, return_distance=False)
        toc = time() - tic
        print 'found in: ', toc
        nn_storage.save_instance(nn_storage.instance_path, NNS)
    else:
        # load the NNS
        NNS = nn_storage.load_instance(nn_storage.instance_path)
        print 'loaded'

    # convert (N, 1) to (N,)
    NNS = NNS.T[0]

    # transfer part locations
    all_parts_cub = cub.get_parts()
    estimated_test_parts = Parts()
    all_image_infos = cub.get_all_image_infos()
    bbox = cub.get_bbox()

    tic = time()
    # estimate test parts with NN part transfer
    for i in range(IDtest.shape[0]):
        test_id = IDtest[i]
        nn_id = IDtrain[NNS[i]]
        nn_parts = all_parts_cub.for_image(nn_id)

        test_bbox = bbox[test_id - 1]
        nn_bbox = bbox[nn_id - 1]

        estimated_parts = nn_parts.transfer(nn_bbox, test_bbox)
        estimated_parts.set_for(test_id)
        estimated_test_parts.appends(estimated_parts)

    toc = time() - tic
    print 'transfered in', toc

    # load data
    tic = time()
    features_storage_r = datastore(settings.storage('ccrft'))
    feature_extractor_r = CNN_Features_CAFFE_REFERENCE(features_storage_r,
                                                       make_net=False)

    features_storage_c = datastore(settings.storage('cccft'))
    feature_extractor_c = CNN_Features_CAFFE_REFERENCE(features_storage_c,
                                                       make_net=False)

    if 'head' in parts:
        features_storage_p_h = datastore(settings.storage('ccpheadft-100000'))
        feature_extractor_p_h = CNN_Features_CAFFE_REFERENCE(
            features_storage_p_h, make_net=False)

    if 'body' in parts:
        features_storage_p_b = datastore(settings.storage('ccpbodyft-100000'))
        feature_extractor_p_b = CNN_Features_CAFFE_REFERENCE(
            features_storage_p_b, make_net=False)

    Xtrain_r, ytrain_r, Xtest_r, ytest_r = cub.get_train_test(
        feature_extractor_r.extract_one)
    Xtrain_c, ytrain_c, Xtest_c, ytest_c = cub.get_train_test(
        feature_extractor_c.extract_one)

    if 'head' in parts:
        Xtrain_p_h, ytrain_p_h, Xtest_p_h, ytest_p_h = cub.get_train_test(
            feature_extractor_p_h.extract_one)
    if 'body' in parts:
        Xtrain_p_b, ytrain_p_b, Xtest_p_b, ytest_p_b = cub.get_train_test(
            feature_extractor_p_b.extract_one)

    toc = time() - tic
    print 'loaded data in', toc

    def compute_estimated_part_data(model_name, shape, IDS,
                                    part_names_to_filter, add_noise,
                                    noise_std_c, noise_std_d):
        net = caffe.Classifier(settings.model(model_name),
                               settings.pretrained(model_name),
                               mean=np.load(settings.ILSVRC_MEAN),
                               channel_swap=(2, 1, 0),
                               raw_scale=255)
        net.set_phase_test()
        net.set_mode_gpu()

        # compute estimated head data
        new_Xtest_part = np.zeros(shape)

        for i, t_id in enumerate(IDS):
            if to_oracle:
                t_parts = all_parts_cub.for_image(t_id)
            else:
                t_parts = estimated_test_parts.for_image(t_id)
            t_img_addr = all_image_infos[t_id]
            t_img = caffe.io.load_image(t_img_addr)
            t_parts_part = t_parts.filter_by_name(part_names_to_filter)
            t_img_part = t_parts_part.get_rect(t_img,
                                               add_noise=add_noise,
                                               noise_std_c=noise_std_c,
                                               noise_std_d=noise_std_d)
            try:
                net.predict([t_img_part], oversample=False)
            except Exception, e:
                print '------', t_id, '----------'
                print part_names_to_filter
                print t_img_addr
                print '------------'
                print t_img.shape
                print t_parts
                print '------------'
                print t_img_part.shape
                print t_parts_part
                raise e
            new_Xtest_part[i, :] = net.blobs[feat_layer].data[0].flatten()

        return new_Xtest_part
Esempio n. 11
0
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(__file__)))

from dataset import CUB_200_2011
from storage import datastore
from deep_extractor import CNN_Features_CAFFE_REFERENCE
from datetime import datetime as dt
import numpy
import settings
import utils

cub = CUB_200_2011(settings.CUB_ROOT)
features_storage = datastore(settings.storage('ccrft'))
feature_extractor = CNN_Features_CAFFE_REFERENCE(features_storage)

features_storage_c = datastore(settings.storage('cccft'))
feature_extractor_c = CNN_Features_CAFFE_REFERENCE(features_storage_c)

features_storage_f = datastore(settings.storage('ccfft'))
feature_extractor_f = CNN_Features_CAFFE_REFERENCE(features_storage_f)

features_storage_fc = datastore(settings.storage('ccfcft'))
feature_extractor_fc = CNN_Features_CAFFE_REFERENCE(features_storage_fc)

Xtrain, ytrain, Xtest, ytest = cub.get_train_test(
    feature_extractor.extract_one)
Xtrain_c, ytrain_c, Xtest_c, ytest_c = cub.get_train_test(
    feature_extractor_c.extract_one)
Xtrain_f, ytrain_f, Xtest_f, ytest_f = cub.get_train_test(
    feature_extractor_f.extract_one)
Esempio n. 12
0
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(__file__)))

from dataset import CUB_200_2011
from storage import datastore
from deep_extractor import CNN_Features_CAFFE_REFERENCE
from datetime import datetime as dt
import numpy
import settings
import utils

cub_full = CUB_200_2011(settings.CUB_ROOT, full=True)
cub = CUB_200_2011(settings.CUB_ROOT, full=False)
features_storage = datastore(settings.storage('ccc'))
feature_extractor_full = CNN_Features_CAFFE_REFERENCE(features_storage,
                                                      full=True)
feature_extractor = CNN_Features_CAFFE_REFERENCE(features_storage, full=False)

features_storage_f = datastore(settings.storage('ccfc'))
feature_extractor_full_f = CNN_Features_CAFFE_REFERENCE(features_storage_f,
                                                        full=True)
feature_extractor_f = CNN_Features_CAFFE_REFERENCE(features_storage_f,
                                                   full=False)

Xtrain, ytrain, Xtest, ytest = cub_full.get_train_test(
    feature_extractor_full.extract_one, feature_extractor.extract_one)
Xtrain_f, ytrain_f, Xtest_f, ytest_f = cub_full.get_train_test(
    feature_extractor_full_f.extract_one, feature_extractor_f.extract_one)

print Xtrain.shape, ytrain.shape
Esempio n. 13
0
    os.makedirs(config.CHECKPOINTS_PATH, exist_ok=True)

    # Augment
    train_transform = trans.Compose([
        trans.RandomResizedCrop(224),
        # trans.Resize((224, 224)),
        trans.RandomHorizontalFlip(),
        trans.ToTensor()])
    val_transform = trans.Compose([
        trans.Resize(256),
        trans.CenterCrop(224),
        # trans.Resize((224, 224)),
        trans.ToTensor()])

    # Get dataloader
    trainSet = CUB_200_2011(config.DATA_PATH, phase='train', transform=train_transform)
    trainLoader = torch.utils.data.DataLoader(
        trainSet,
        batch_size=config.TRAIN.batch_size,
        shuffle=True,
        num_workers=config.TRAIN.num_workers,
        pin_memory=True)
    valSet = CUB_200_2011(config.DATA_PATH, phase='val', transform=val_transform)
    valLoader = torch.utils.data.DataLoader(
        valSet,
        batch_size=config.TRAIN.batch_size,
        shuffle=True,
        num_workers=config.TRAIN.num_workers,
        pin_memory=True)

    # Initiate model
Esempio n. 14
0
def main(c, f):
    instance_split = 10
    feat_layer = 'fc7'
    load_rf_test = False
    recalculate_training = True
    C = c
    force = f

    dh = cub_utils.DeepHelper()
    cub = CUB_200_2011(settings.CUB_ROOT)
    cub_parts = cub.get_parts()
    IDtrain, IDtest = cub.get_train_test_id()
    all_image_infos = cub.get_all_image_infos()
    all_segmentaion_infos = cub.get_all_segmentation_infos()

    rf_safe = datastore(settings.storage('rf'))
    rf_safe.super_name = 'features'
    rf_safe.sub_name = 'head-points'
    rf_safe.other_sub_name = 'head-final-features'

    Xtrain_rf_ip = rf_safe.get_instance_path(rf_safe.super_name,
                                             rf_safe.sub_name, 'Xtrain_rf')
    Xtest_rf_ip = rf_safe.get_instance_path(rf_safe.super_name,
                                            rf_safe.sub_name, 'Xtest_rf')
    ytrain_rf_ip = rf_safe.get_instance_path(rf_safe.super_name,
                                             rf_safe.sub_name, 'ytrain_rf.mat')
    ytest_rf_ip = rf_safe.get_instance_path(rf_safe.super_name,
                                            rf_safe.sub_name, 'ytest_rf.mat')
    Xtrain_ip = rf_safe.get_instance_path(rf_safe.super_name,
                                          rf_safe.other_sub_name, 'Xtrain')
    Xtest_ip = rf_safe.get_instance_path(rf_safe.super_name,
                                         rf_safe.other_sub_name, 'Xtest')

    tic = time()
    if rf_safe.check_exists(ytrain_rf_ip) and not force:
        print 'loading'
        Xtrain_rf = rf_safe.load_large_instance(Xtrain_rf_ip, instance_split)
        ytrain_rf = rf_safe.load_instance(ytrain_rf_ip)
        ytrain_rf = ytrain_rf[0, :]
    else:
        print 'calculating'
        Xtrain_rf, ytrain_rf = dh.part_features_for_rf(all_image_infos,
                                                       all_segmentaion_infos,
                                                       cub_parts, IDtrain,
                                                       Parts.HEAD_PART_NAMES)

        rf_safe.save_large_instance(Xtrain_rf_ip, Xtrain_rf, instance_split)
        rf_safe.save_instance(ytrain_rf_ip, ytrain_rf)

    if load_rf_test:
        if rf_safe.check_exists(ytest_rf_ip) and not force:
            Xtest_rf = rf_safe.load_large_instance(Xtest_rf_ip, instance_split)
            ytest_rf = rf_safe.load_instance(ytest_rf_ip)
            ytest_rf = ytest_rf[0, :]
        else:
            Xtest_rf, ytest_rf = dh.part_features_for_rf(
                all_image_infos, all_segmentaion_infos, cub_parts, IDtest,
                Parts.HEAD_PART_NAMES)

            rf_safe.save_large_instance(Xtest_rf_ip, Xtest_rf, instance_split)
            rf_safe.save_instance(ytest_rf_ip, ytest_rf)
    toc = time()
    print 'loaded or calculated in', toc - tic

    tic = time()
    model_rf = sklearn.ensemble.RandomForestClassifier(n_estimators=10,
                                                       bootstrap=False,
                                                       max_depth=10,
                                                       n_jobs=3,
                                                       random_state=None,
                                                       verbose=0)
    model_rf.fit(Xtrain_rf, ytrain_rf)
    toc = time()
    print 'fitted rf model in', toc - tic

    dense_points = gen_dense_points(227, 227)

    # load whole and bbox and head part data
    # load data
    tic = time()
    features_storage_r = datastore(settings.storage('ccrft2st-10000'))
    feature_extractor_r = CNN_Features_CAFFE_REFERENCE(features_storage_r,
                                                       make_net=False)

    features_storage_c = datastore(settings.storage('cccft2st-50000'))
    feature_extractor_c = CNN_Features_CAFFE_REFERENCE(features_storage_c,
                                                       make_net=False)

    features_storage_p_h = datastore(settings.storage('ccpheadft-100000'))
    feature_extractor_p_h = CNN_Features_CAFFE_REFERENCE(features_storage_p_h,
                                                         make_net=False)

    Xtrain_r, ytrain_r, Xtest_r, ytest_r = cub.get_train_test(
        feature_extractor_r.extract_one)
    Xtrain_c, ytrain_c, Xtest_c, ytest_c = cub.get_train_test(
        feature_extractor_c.extract_one)
    Xtrain_p_h, ytrain_p_h, Xtest_p_h, ytest_p_h = cub.get_train_test(
        feature_extractor_p_h.extract_one)

    toc = time()
    print 'loaded whole and bbox and head part data in', toc - tic

    def compute_estimated_part_data(model_name, shape, IDS, model_rf):
        net = caffe.Classifier(settings.model(model_name),
                               settings.pretrained(model_name),
                               mean=np.load(settings.ILSVRC_MEAN),
                               channel_swap=(2, 1, 0),
                               raw_scale=255)
        net.set_phase_test()
        net.set_mode_gpu()
        # compute estimated head data
        new_Xtest_part = np.zeros(shape)

        for i, t_id in enumerate(IDS):
            print i
            img = caffe.io.load_image(all_image_infos[t_id])
            dh.init_with_image(img)
            X = dh.features(dense_points)
            preds_prob = model_rf.predict_proba(X)
            max_prob = np.max(preds_prob[:, 1])
            preds_prob = preds_prob[:, 1].reshape((227, 227)).T
            preds = preds_prob >= (max_prob / 2)
            preds = skimage.morphology.closing(preds,
                                               skimage.morphology.square(10))
            preds = skimage.morphology.remove_small_objects(preds,
                                                            min_size=10,
                                                            connectivity=1)
            L, N = skimage.measure.label(preds, return_num=True, background=0)
            L_no_bg = L[L != -1].flatten()
            vals, counts = scipy.stats.mode(L_no_bg)
            part_label = int(vals[0])

            indices = np.where(L == part_label)
            xmin = indices[0].min()
            xmax = indices[0].max()
            ymin = indices[1].min()
            ymax = indices[1].max()

            pmin = Part(-1, '?', -1, xmin, ymin, 1)
            pmax = Part(-1, '?', -1, xmax, ymax, 1)
            rect_parts = Parts(parts=[pmin, pmax])
            rect_parts.denorm_for_size(img.shape[0], img.shape[1], size=227)
            rect_info = rect_parts[0].x, rect_parts[1].x, rect_parts[
                0].y, rect_parts[1].y

            t_img_part = Parts().get_rect(img, rect_info=rect_info)
            try:
                net.predict([t_img_part], oversample=False)
            except Exception:
                print '------', t_id, '----------'

            new_Xtest_part[i, :] = net.blobs[feat_layer].data[0].flatten()

        return new_Xtest_part

    tic = time()
    if rf_safe.check_exists_large(Xtest_ip) and not force:
        print 'loading test'
        Xtest_p_h = rf_safe.load_large_instance(Xtest_ip, instance_split)
    else:
        print 'calculating test'
        Xtest_p_h = compute_estimated_part_data('ccpheadrfft-100000',
                                                Xtest_p_h.shape, IDtest,
                                                model_rf)

        rf_safe.save_large_instance(Xtest_ip, Xtest_p_h, instance_split)

    if recalculate_training:
        if rf_safe.check_exists_large(Xtrain_ip) and not force:
            print 'loading train'
            Xtrain_p_h = rf_safe.load_large_instance(Xtrain_ip, instance_split)
        else:
            print 'calculating train'
            Xtrain_p_h = compute_estimated_part_data('ccpheadrfft-100000',
                                                     Xtrain_p_h.shape, IDtrain,
                                                     model_rf)

            rf_safe.save_large_instance(Xtrain_ip, Xtrain_p_h, instance_split)

    toc = time()
    print 'features loaded or calculated in', toc - tic

    Xtrain = np.concatenate((Xtrain_r, Xtrain_c, Xtrain_p_h), axis=1)
    Xtest = np.concatenate((Xtest_r, Xtest_c, Xtest_p_h), axis=1)
    ytrain = ytrain_r
    ytest = ytest_r

    print Xtrain.shape, Xtest.shape

    # do classification
    tic = time()
    model = sklearn.svm.LinearSVC(C=C)
    model.fit(Xtrain, ytrain)
    predictions = model.predict(Xtest)
    toc = time() - tic

    print 'classification in', toc
    print '--------------------'
    print 'C:', C
    print '--------------------'
    print 'accuracy', sklearn.metrics.accuracy_score(
        ytest,
        predictions), 'mean accuracy', utils.mean_accuracy(ytest, predictions)
    print '===================='