Example #1
0
def perform_file(fileName):
    settings.dataset = fileName + ".arff"
    print(fileName)

    print("Loading data")
    data = load_data(settings)

    param, cache = precompute_minimal(data, settings)
    mf = MondrianForest(settings, data)

    for idx_minibatch in range(settings.n_minibatches):
        train_ids_current_minibatch = data['train_ids_partition']['current'][
            idx_minibatch]

        if idx_minibatch == 0:
            with open(settings.data_path + fileName + '.csv', 'w') as f:
                f.write("target;prediction\n")
            print("Training 0 batch", len(train_ids_current_minibatch))
            # Batch training for first minibatch
            mf.fit(data, train_ids_current_minibatch, settings, param, cache)
        else:
            print('Evaluation on batch', idx_minibatch, 'in', fileName)
            # Evaluate
            weights_prediction = np.ones(
                settings.n_mondrians) * 1.0 / settings.n_mondrians
            results = mf.evaluate_predictions(
                data, data['x_train'][train_ids_current_minibatch],
                data['y_train'][train_ids_current_minibatch], settings, param,
                weights_prediction, True)
            # prediction
            predictions = results[0]['pred_mean']
            real = data['y_train'][train_ids_current_minibatch].flatten()
            for i in range(len(predictions)):
                # print(i, predictions[i], real[i])
                with open(settings.data_path + fileName + '.csv', 'a') as f:
                    f.write("{0},{1}\n".format(real[i], predictions[i]))

            print("Training on next batch ", idx_minibatch,
                  len(train_ids_current_minibatch))
            # Online update
            mf.partial_fit(data, train_ids_current_minibatch, settings, param,
                           cache)
        print("Finished training ...")
Example #2
0
def main():
    # Import settings from command line
    settings = process_command_line()
    print 'Current settings:'
    pp.pprint(vars(settings))

    # Resetting random seed
    reset_random_seed(settings)

    # Loading batch data
    batch_type_list = ['office', 'kitchen', 'bookstore']
    #batch_type_list = ['kitchen','office','bathroom','bedroom','bookstore','living_room']

    incremental_type_list = [
        'computer_room', 'home_office', 'office_kitchen', 'classroom'
    ]
    #incremental_type_list = ['study_space','classroom','computer_room','lobby','home_office','office_kitchen','playroom','reception_room','study','dining_room','cafeteria','furniture_store','conference_room','dinette','gym','storage_room','indoor_balcony','laundromat','printer_room','basement','recreation_room']
    training_perc = 0

    data, training_list, test_list = load_type_dataset(settings,
                                                       batch_type_list,
                                                       incremental_type_list,
                                                       training_perc)

    param, cache = precompute_minimal(data, settings)

    mf = MondrianForest(settings, data)

    batch_train_ids = data['train_ids_partition']['batch']
    print '\nBatch training on %d element...' % (len(batch_train_ids))

    mf.fit(data, batch_train_ids, settings, param, cache)

    print '...batch training done. \n'

    if training_perc > 0:
        incremental_train_ids = data['train_ids_partition']['incremental']
        print 'Incremental training on %d element...' % (
            len(incremental_train_ids))

        mf.partial_fit(data, incremental_train_ids, settings, param, cache)

        print '...incremental training done. \n'

    #Evaluation
    print 'Evaluation... \n'
    weights_prediction = np.ones(
        settings.n_mondrians) * 1.0 / settings.n_mondrians
    pred_forest_test, metrics_test = \
        mf.evaluate_predictions(data, data['x_test'], data['y_test'], \
        settings, param, weights_prediction, False)
    name_metric = settings.name_metric  # acc or mse
    metric_test = metrics_test[name_metric]
    tree_numleaves = np.zeros(settings.n_mondrians)
    for i_t, tree in enumerate(mf.forest):
        tree_numleaves[i_t] = len(tree.leaf_nodes)
    forest_numleaves = np.mean(tree_numleaves)
    f_stats = open(
        '/home/alberto/tesi/mondrianforest/src/results/statistics.txt', 'w')
    print '%s\t\tnum_leaves' % (name_metric)
    f_stats.write(str(name_metric) + ' : ' + str(metric_test))
    f_stats.write('\n')
    f_stats.write('num_leaves : ' + str(forest_numleaves))
    f_stats.write('\n')
    f_stats.write('\n')
    print '%.3f\t\t%.3f' % (metric_test, forest_numleaves)

    print '\nFinal forest stats:'
    f_stats.write('Final forest stats: \n')
    tree_stats = np.zeros((settings.n_mondrians, 2))
    tree_average_depth = np.zeros(settings.n_mondrians)
    for i_t, tree in enumerate(mf.forest):
        tree_stats[i_t, -2:] = np.array(
            [len(tree.leaf_nodes),
             len(tree.non_leaf_nodes)])
        tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0]
    print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \
            % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth))
    print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \
            % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth))

    f_stats.write('mean(num_leaves) = ' + str(np.mean(tree_stats[:, -2])))
    f_stats.write('  mean(num_non_leaves) = ' +
                  str(np.mean(tree_stats[:, -1])))
    f_stats.write('  mean(tree_average_depth) = ' +
                  str(np.mean(tree_average_depth)) + '\n')

    f_stats.write('n_train = ' + str(data['n_train']))
    f_stats.write('  log_2(n_train) = ' + str(np.log2(data['n_train'])))
    f_stats.write('  mmean(tree_average_depth) = ' +
                  str(np.mean(tree_average_depth)) + ' +- ' +
                  str(np.std(tree_average_depth)) + '\n')

    f_stats.write(
        '\n------------------------------------------------------------------\n'
    )

    print '\n...evaluation done.'
    print 'Computing confusion matrices...'
    uf_dir = settings.data_path + '/unary_csv'
    labels_dir = settings.data_path + '/labels_csv'
    cm_res_dir = '../results/cm'
    for file_name in test_list:
        curr_uf_csv = uf_dir + '/' + file_name
        curr_lables_csv = labels_dir + '/' + file_name
        x_df = pd.read_csv(curr_uf_csv, usecols=unary_features)
        y_df = pd.read_csv(curr_lables_csv, dtype=int)
        x_test = x_df.to_numpy()
        y_test = y_df.to_numpy()
        y_test.shape = (y_test.shape[0], )

        if settings.normalize_features == 1:
            min_d = np.minimum(np.min(data['x_train'], 0),
                               np.min(data['x_test'], 0))
            max_d = np.maximum(np.max(data['x_train'], 0),
                               np.max(data['x_test'], 0))
            range_d = max_d - min_d
            idx_range_d_small = range_d <= 0.  # find columns where all features are identical
            if data['n_dim'] > 1:
                range_d[
                    idx_range_d_small] = 1e-3  # non-zero value just to prevent division by 0
            elif idx_range_d_small:
                range_d = 1e-3
            x_test -= min_d + 0.
            x_test /= range_d

        cm_weights_prediction = np.ones(
            settings.n_mondrians) * 1.0 / settings.n_mondrians
        cm_pred_forest_test, cm_metrics_test = \
            mf.evaluate_predictions(data, x_test, y_test, \
            settings, param, weights_prediction, False)

        #y_test_pred = get_y_pred(cm_pred_forest_test['pred_prob'])
        y_test_pred = get_label_predictions(cm_pred_forest_test['pred_prob'])

        f_stats.write(str(file_name) + '\n')

        print 'y_test'
        print y_test[:25]

        print 'y_test_pred'
        print y_test_pred[:25]

        f_stats.write('\n y_test     y_mf_pred: \n')
        for x in range(25):
            if (y_test[x] < 10):
                f_stats.write('#  ' + str(y_test[x]) + '       #  ' +
                              str(y_test_pred[x]) + '\n')
            else:
                f_stats.write('#  ' + str(y_test[x]) + '      #  ' +
                              str(y_test_pred[x]) + '\n')

        f_stats.write('\n---------------------------------------\n')

        cm = compute_confusion_matrix(y_test, y_test_pred, print_cm=False)
        cm_path = cm_res_dir + '/' + file_name
        #np.savetxt(cm_path, cm, delimiter=",")

        # SAVE PREDICTIONS ON CORRESPONDING PCD
        '''end_idx = file_name.rfind('.')
        input_path = '/home/alberto/tesi/dataset/NYUDV2/trained_semseg_data/clustering/'
        pcd_name = file_name[0:end_idx] + '.pcd'
        print pcd_name
        pcd_path = input_path + pcd_name
        input_cloud = pypcd.PointCloud.from_path(pcd_path)

        point_x_list = input_cloud.pc_data['x']
        point_y_list = input_cloud.pc_data['y']
        point_z_list = input_cloud.pc_data['z']
        cluster_idx_list = input_cloud.pc_data['label']

        new_cloud = input_cloud.pc_data.copy
        new_cloud = input_cloud.pc_data.view(np.float32).reshape(input_cloud.pc_data.shape + (-1,))

        print 'Cluster cloud shape:' 
        print new_cloud.shape

        print 'Cluster label length: %d' % (len(cluster_idx_list))


        for n in range(new_cloud.shape[0]):
            new_cloud[n][0] = point_x_list[n]
            new_cloud[n][1] = point_y_list[n]
            new_cloud[n][2] = point_z_list[n]
            if(cluster_idx_list[n] > 4000):
                new_cloud[n][3] = 0
            else: 
                new_cloud[n][3] = y_test_pred[cluster_idx_list[n]]
        
        #res_pcd = pypcd.make_xyz_rgb_point_cloud(new_cloud)
        res_pcd = pypcd.make_xyz_label_point_cloud(new_cloud)
        output_path = '/home/alberto/tesi/mondrianforest/src/results/pcd/'+pcd_name
        res_pcd.save(output_path)'''

    print '...computation done.\n END.'
    f_stats.close()
Example #3
0

# Import settings from command line
settings = process_command_line()
print 'Current settings:'
pp.pprint(vars(settings))

# Resetting random seed
reset_random_seed(settings)

# Loading data
data = load_dataset(settings)

param, cache = precompute_minimal(data, settings)

mf = MondrianForest(settings, data)

print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves'

for idx_minibatch in range(settings.n_minibatches):
    train_ids_current_minibatch = data['train_ids_partition']['current'][
        idx_minibatch]
    if idx_minibatch == 0:
        # Batch training for first minibatch
        mf.fit(data, train_ids_current_minibatch, settings, param, cache)
    else:
        # Online update
        mf.partial_fit(data, train_ids_current_minibatch, settings, param,
                       cache)

    # Evaluate
Example #4
0
from mondrianforest import process_command_line, MondrianForest

settings = process_command_line()
print 'Current settings:'
pp.pprint(vars(settings))

# Resetting random seed
reset_random_seed(settings)

# Loading data
data = load_data(settings)
print "Data: ", data
print type(settings)

param, cache = precompute_minimal(data, settings)

mf = MondrianForest(settings, data)

data['x_test']
print(data)

train_ids_current_minibatch = data['train_ids_partition']['current'][0]
print train_ids_current_minibatch.shape

print "First batch train on 5 data points"
mf.fit(data, train_ids_current_minibatch[0:5], settings, param, cache)
print mf.forest[0].counts
print "\nNow the extension\n"
mf.partial_fit(data, train_ids_current_minibatch[5:10], settings, param, cache)
print mf.forest[0].counts
Example #5
0
File: ORF.py Project: teamZeta/arp
class flow(object):
    def __init__(self, image, region):

        self.window = max(region.width, region.height) * 2

        left = max(region.x, 0)
        top = max(region.y, 0)

        right = min(region.x + region.width, image.shape[1] - 1)
        bottom = min(region.y + region.height, image.shape[0] - 1)

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[top:bottom, left:right]
        self.position = (region.x + region.width / 2, region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        self.hsv = np.zeros_like(image)
        self.hsv[..., 1] = 255

        self.pos = np.array([image[top:bottom, left:right].copy().tolist()])
        self.neg = np.array([])

        vred = [(0, 0)]
        while (1):
            l = random.randint(-int((right - left) * 0.1), int((right - left) * 0.1))
            t = random.randint(-int((bottom - top) * 0.1), int((bottom - top) * 0.1))
            if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 5:
                break
        self.pos = np.array(
            [np.array(self.old_img[top + t:bottom + t, left + l:right + l].copy().tolist()) for (l, t) in vred])
        vred = []
        infloop = 0
        while (1):
            l = random.randint((right - left) / 2, image.shape[1] - (right - left) / 2 - 1)
            t = random.randint((bottom - top) / 2, image.shape[0] - (bottom - top) / 2 - 1)
            if abs(l - left - (right - left) / 2) > (right - left) and abs(t - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l, t)]
            if len(vred) > 5 or infloop > 10000:
                break
            infloop+=1
        self.neg = np.array(
            [np.array(self.old_img[t - (bottom - top) / 2:t + (bottom - top) / 2,
                      l - (right - left) / 2:l + (right - left) / 2].copy().tolist()) for (l, t) in vred])

        print("tu je image")
        stevec = 1
        for i in self.pos:
            cv2.imwrite("file" + str(stevec) + ".png", i)
            stevec += 1


        # Resetting random seed
        set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'],
               'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0,
               'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2,
               'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0,
               'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0,
               'n_mondrians': 10, 'debug': 0, 'n_minibatches': 2, 'name_metric': 'acc', 'budget_to_use': inf}
        self.settings = Map(set)
        reset_random_seed(self.settings)
        stevec = -30
        for i in self.neg:
            cv2.imwrite("file" + str(stevec) + ".png", i)
            stevec -= 1
        x_trainp = [x.flatten().tolist() for x in self.pos]
        x_trainn = [x.flatten().tolist() for x in self.neg]
        x_train = x_trainp + x_trainn
        print(len(x_train[0]))
        self.data = {'n_dim': 1, 'x_test':
                array([x_train[5]]),
                'x_train': array(x_train),
                'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2,
                'y_test': array([]),
                'n_test': 0}

        self.param, self.cache = precompute_minimal(self.data, self.settings)

        self.mf = MondrianForest(self.settings, self.data)

        for idx_minibatch in range(self.settings.n_minibatches):
            #train_ids_current_minibatch = self.data['train_ids_partition']['current'][idx_minibatch]
            if idx_minibatch == 0:
                # Batch training for first minibatch
                self.mf.fit(self.data, array(range(0, len(x_train)/2)), self.settings, self.param, self.cache)
            else:
                # Online update
                self.mf.partial_fit(self.data, array(range(len(x_train)/2, len(x_train))), self.settings, self.param, self.cache)
                print("updatalo je")

        weights_prediction = np.ones(self.settings.n_mondrians) * 1.0 / self.settings.n_mondrians
        #train_ids_cumulative = self.data['train_ids_partition']['cumulative'][idx_minibatch]
        print(self.mf.evaluate_predictions(self.data, array([x_train[5]]), [1], \
                    self.settings, self.param, weights_prediction, False))



    def set_region(self, position):
        self.position = position



    def updateTree(self, image):
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        t = len(self.template) / 2
        l = len(self.template[0]) / 2
        left = self.position[0] -l
        top = self.position[1] -t
        right = self.position[0] +l
        bottom = self.position[1] +t

        #self.pos = np.array([image[top:bottom, left:right].copy().tolist()])
        #self.neg = np.array([])
        print(t)
        print(l)
        vred = [(0, 0)]
        while (1):
            l2 = random.randint(-int((right - left) * 0.1), int((right - left) * 0.1))
            t2 = random.randint(-int((bottom - top) * 0.1), int((bottom - top) * 0.1))
            if l2 + left >= 0 and l2 + right < image.shape[1] and t2 + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l2, t2)]
            if len(vred) > 5:
                break
        self.pos = np.array(
            [np.array(image[top + t2:bottom + t2, left + l2:right + l2].copy().tolist()) for (l2, t2) in vred])
        vred = []

        infloop = 0


        while (1):
            l2 = random.randint((right - left) / 2, image.shape[1] - (right - left) / 2 - 1)
            t2 = random.randint((bottom - top) / 2, image.shape[0] - (bottom - top) / 2 - 1)
            if abs(l2 - left - (right - left) / 2) > (right - left) and abs(t2 - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l2, t2)]
            if len(vred) > 5 or infloop > 10000:
                break
            infloop+=1
        print(infloop)
        self.neg = np.array(
            [np.array(image[t2 - (bottom - top) / 2:t2 + (bottom - top) / 2,
                    l2 - (right - left) / 2:l2 + (right - left) / 2].tolist()) for (l2, t2) in vred])


        stevec = -1

        x_trainp = [x.flatten().tolist() for x in self.pos]
        x_trainn = [x.flatten().tolist() for x in self.neg]
        x_train = x_trainp + x_trainn
        self.data['x_train'] = np.append(self.data['x_train'], array(x_train), axis=0)
        self.data['y_train'] = np.append(self.data['y_train'], array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()))
        #self.param, self.cache = precompute_minimal(self.data, self.settings)
        self.mf.partial_fit(self.data, array(range(len(self.data['x_train'])-len(x_train), len(self.data['x_train']))), self.settings, self.param, self.cache)



    def track(self, image):
        image2 = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        left = int(max(round(self.position[0] - float(self.window) / 2), 0))
        top = int(max(round(self.position[1] - float(self.window) / 2), 0))
        right = int(min(round(self.position[0] + float(self.window) / 2), image2.shape[1] - 1))
        bottom = int(min(round(self.position[1] + float(self.window) / 2), image2.shape[0] - 1))

        if right - left < self.template.shape[1] or bottom - top < self.template.shape[0]:
            return vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0],
                                 self.size[1])

        cut = image2[top:bottom, left:right]

        t = len(self.template)/2
        l = len(self.template[0])/2
        weights_prediction = np.ones(self.settings.n_mondrians) * 1.0 / self.settings.n_mondrians
        predicta = []
        for i in range(t, bottom-top-t, 15):
            for j in range(l, right-left-l, 15):
                imclass = cut[i-t:i+t, j-l:j+l]
                pred = self.mf.evaluate_predictions(self.data, array([imclass.flatten().tolist()]), [1], \
                                                   self.settings, self.param, weights_prediction, False)[0]
                #print(pred['pred_prob'][0])
                predicta += [(pred['pred_prob'][0][1],i,j)]

        terk = predicta[0]
        for i in range(0, len(predicta)):
            if terk[0] < predicta[i][0]:
                terk = predicta[i]

        print(terk)


        # matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED)
        # min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches)
        # image[top:bottom, left:right] = rgb
        self.position = (left + terk[2], top + terk[1])
        self.updateTree(image)
        # a = plt.imshow(rgb)
        return vot.Rectangle(left+ terk[2]-l, top + terk[1] -t, self.size[0], self.size[1])
Example #6
0
File: ORF.py Project: teamZeta/arp
    def __init__(self, image, region):

        self.window = max(region.width, region.height) * 2

        left = max(region.x, 0)
        top = max(region.y, 0)

        right = min(region.x + region.width, image.shape[1] - 1)
        bottom = min(region.y + region.height, image.shape[0] - 1)

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[top:bottom, left:right]
        self.position = (region.x + region.width / 2, region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        self.hsv = np.zeros_like(image)
        self.hsv[..., 1] = 255

        self.pos = np.array([image[top:bottom, left:right].copy().tolist()])
        self.neg = np.array([])

        vred = [(0, 0)]
        while (1):
            l = random.randint(-int((right - left) * 0.1), int((right - left) * 0.1))
            t = random.randint(-int((bottom - top) * 0.1), int((bottom - top) * 0.1))
            if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 5:
                break
        self.pos = np.array(
            [np.array(self.old_img[top + t:bottom + t, left + l:right + l].copy().tolist()) for (l, t) in vred])
        vred = []
        infloop = 0
        while (1):
            l = random.randint((right - left) / 2, image.shape[1] - (right - left) / 2 - 1)
            t = random.randint((bottom - top) / 2, image.shape[0] - (bottom - top) / 2 - 1)
            if abs(l - left - (right - left) / 2) > (right - left) and abs(t - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l, t)]
            if len(vred) > 5 or infloop > 10000:
                break
            infloop+=1
        self.neg = np.array(
            [np.array(self.old_img[t - (bottom - top) / 2:t + (bottom - top) / 2,
                      l - (right - left) / 2:l + (right - left) / 2].copy().tolist()) for (l, t) in vred])

        print("tu je image")
        stevec = 1
        for i in self.pos:
            cv2.imwrite("file" + str(stevec) + ".png", i)
            stevec += 1


        # Resetting random seed
        set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'],
               'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0,
               'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2,
               'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0,
               'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0,
               'n_mondrians': 10, 'debug': 0, 'n_minibatches': 2, 'name_metric': 'acc', 'budget_to_use': inf}
        self.settings = Map(set)
        reset_random_seed(self.settings)
        stevec = -30
        for i in self.neg:
            cv2.imwrite("file" + str(stevec) + ".png", i)
            stevec -= 1
        x_trainp = [x.flatten().tolist() for x in self.pos]
        x_trainn = [x.flatten().tolist() for x in self.neg]
        x_train = x_trainp + x_trainn
        print(len(x_train[0]))
        self.data = {'n_dim': 1, 'x_test':
                array([x_train[5]]),
                'x_train': array(x_train),
                'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2,
                'y_test': array([]),
                'n_test': 0}

        self.param, self.cache = precompute_minimal(self.data, self.settings)

        self.mf = MondrianForest(self.settings, self.data)

        for idx_minibatch in range(self.settings.n_minibatches):
            #train_ids_current_minibatch = self.data['train_ids_partition']['current'][idx_minibatch]
            if idx_minibatch == 0:
                # Batch training for first minibatch
                self.mf.fit(self.data, array(range(0, len(x_train)/2)), self.settings, self.param, self.cache)
            else:
                # Online update
                self.mf.partial_fit(self.data, array(range(len(x_train)/2, len(x_train))), self.settings, self.param, self.cache)
                print("updatalo je")

        weights_prediction = np.ones(self.settings.n_mondrians) * 1.0 / self.settings.n_mondrians
        #train_ids_cumulative = self.data['train_ids_partition']['cumulative'][idx_minibatch]
        print(self.mf.evaluate_predictions(self.data, array([x_train[5]]), [1], \
                    self.settings, self.param, weights_prediction, False))
Example #7
0
File: ORF.py Project: teamZeta/arp2
    def __init__(self, img, region):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int)/4
        h, s, v = cv2.split(image)
        s+=45
        v+=109
        image = h#cv2.merge((h, s, v))
        self.window = max(region.width, region.height) * 2

        left = int(max(region.x, 0))
        top = int(max(region.y, 0))

        right = int(min(region.x + region.width, image.shape[1] - 1))
        bottom = int(min(region.y + region.height, image.shape[0] - 1))

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[int(top):int(bottom), int(left):int(right)]
        self.position = (region.x + region.width / 2, region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = image
        self.pos = np.array([image[int(top):int(bottom), int(left):int(right)].copy().tolist()])
        self.neg = np.array([])

        vred = [(0, 0)]
        infloop = 0
        while(1):
            l = random.randint(-int((right - left) * 0.03), int((right - left) * 0.03))
            t = random.randint(-int((bottom - top) * 0.03), int((bottom - top) * 0.03))
            if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop+=1

        self.pos = np.array(
            [np.array(self.old_img[top + int(t2):bottom + int(t2), left + int(l2):right + int(l2)].copy().tolist()) for (l2, t2) in vred])
        vred = []
        infloop = 0
        while (1):
            l = random.randint(int((right - left) / 2), int(image.shape[1] - (right - left) / 2 - 1))
            t = random.randint(int((bottom - top) / 2), int(image.shape[0] - (bottom - top) / 2 - 1))
            if abs(l - left - (right - left) / 2) > (right - left) or abs(t - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l, t)]
            if len(vred) > 45 or infloop > 10000:
                break
            infloop+=1
        self.neg = np.array(
            [np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) + (bottom - top) / 2,
                      int(l2) - (right - left) / 2:int(l2) + (right - left) / 2].copy().tolist()) for (l2, t2) in vred])
        print("pred update")
        print("neg" + str(len(self.neg)))
        print("pos" + str(len(self.pos)))

        set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'],
               'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0,
               'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2,
               'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0,
               'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0,
               'n_mondrians': 10, 'debug': 0, 'n_minibatches': 1, 'name_metric': 'acc', 'budget_to_use': inf}
        self.settings = Map(set)
        reset_random_seed(self.settings)

        x_trainp = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.pos])
        x_trainn = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.neg])
        x_train = np.append(x_trainp, x_trainn, axis=0)

        self.data = {'n_dim': 1, 'x_test':
                array([x_train[5]]),
                'x_train': array(x_train),
                'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2,
                'y_test': array([]),
                'n_test': 0}

        self.param, self.cache = precompute_minimal(self.data, self.settings)
        self.mf = MondrianForest(self.settings, self.data)
        self.mf.fit(self.data, array(range(0, len(x_train))), self.settings, self.param, self.cache)
        print("kones")
PLOT = False

settings = process_command_line()
print 'Current settings:'
pp.pprint(vars(settings))

# Resetting random seed
reset_random_seed(settings)

# Loading data
data = load_data(settings)

param, cache = precompute_minimal(data, settings)

mf = MondrianForest(settings, data)

print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves'

for idx_minibatch in range(settings.n_minibatches):
    train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch]
    if idx_minibatch == 0:
        # Batch training for first minibatch
        mf.fit(data, train_ids_current_minibatch, settings, param, cache)
    else:
        # Online update
        mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache)

    # Evaluate
    weights_prediction = np.ones(settings.n_mondrians) * 1.0 / settings.n_mondrians
    train_ids_cumulative = data['train_ids_partition']['cumulative'][idx_minibatch]
Example #9
0
File: ORF.py Project: teamZeta/arp2
class flow(object):
    def __init__(self, img, region):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int)/4
        h, s, v = cv2.split(image)
        s+=45
        v+=109
        image = h#cv2.merge((h, s, v))
        self.window = max(region.width, region.height) * 2

        left = int(max(region.x, 0))
        top = int(max(region.y, 0))

        right = int(min(region.x + region.width, image.shape[1] - 1))
        bottom = int(min(region.y + region.height, image.shape[0] - 1))

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[int(top):int(bottom), int(left):int(right)]
        self.position = (region.x + region.width / 2, region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = image
        self.pos = np.array([image[int(top):int(bottom), int(left):int(right)].copy().tolist()])
        self.neg = np.array([])

        vred = [(0, 0)]
        infloop = 0
        while(1):
            l = random.randint(-int((right - left) * 0.03), int((right - left) * 0.03))
            t = random.randint(-int((bottom - top) * 0.03), int((bottom - top) * 0.03))
            if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop+=1

        self.pos = np.array(
            [np.array(self.old_img[top + int(t2):bottom + int(t2), left + int(l2):right + int(l2)].copy().tolist()) for (l2, t2) in vred])
        vred = []
        infloop = 0
        while (1):
            l = random.randint(int((right - left) / 2), int(image.shape[1] - (right - left) / 2 - 1))
            t = random.randint(int((bottom - top) / 2), int(image.shape[0] - (bottom - top) / 2 - 1))
            if abs(l - left - (right - left) / 2) > (right - left) or abs(t - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l, t)]
            if len(vred) > 45 or infloop > 10000:
                break
            infloop+=1
        self.neg = np.array(
            [np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) + (bottom - top) / 2,
                      int(l2) - (right - left) / 2:int(l2) + (right - left) / 2].copy().tolist()) for (l2, t2) in vred])
        print("pred update")
        print("neg" + str(len(self.neg)))
        print("pos" + str(len(self.pos)))

        set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'],
               'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0,
               'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2,
               'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0,
               'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0,
               'n_mondrians': 10, 'debug': 0, 'n_minibatches': 1, 'name_metric': 'acc', 'budget_to_use': inf}
        self.settings = Map(set)
        reset_random_seed(self.settings)

        x_trainp = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.pos])
        x_trainn = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.neg])
        x_train = np.append(x_trainp, x_trainn, axis=0)

        self.data = {'n_dim': 1, 'x_test':
                array([x_train[5]]),
                'x_train': array(x_train),
                'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2,
                'y_test': array([]),
                'n_test': 0}

        self.param, self.cache = precompute_minimal(self.data, self.settings)
        self.mf = MondrianForest(self.settings, self.data)
        self.mf.fit(self.data, array(range(0, len(x_train))), self.settings, self.param, self.cache)
        print("kones")


    def set_region(self, position):
        self.position = position

    def reset_position(self, pos):
        self.position = pos

    def set_position(self, position):
        self.position = (position[0], position[1])
        self.size = [position[0] - self.size[0] / 2, position[1] - self.size[1] / 2, position[0] + self.size[0] / 2,
                   position[1] + self.size[1] / 2]


    def set_region(self, region):
        self.position = (int(region.x + region.width/2), int(region.y + region.height/2))
        self.size = (region.width, region.height)



    def updateTree(self, img):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int) / 4
        h, s, v = cv2.split(image)
        s += 45
        v += 109
        image = h#cv2.merge((h, s, v))


        t = len(self.template) / 2
        l = len(self.template[0]) / 2
        left = self.position[0] -l
        top = self.position[1] -t
        right = self.position[0] +l
        bottom = self.position[1] +t

        vred = [(0, 0)]
        infloop = 0
        while (1):
            l2 = random.randint(-int((right - left) * 0.03), int((right - left) * 0.03))
            t2 = random.randint(-int((bottom - top) * 0.03), int((bottom - top) * 0.03))
            if l2 + left >= 0 and l2 + right < image.shape[1] and t2 + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l2, t2)]
            if len(vred) > 5 or infloop > 10000:
                break
            infloop+=1
        self.pos = np.array(
            [np.array(image[top + t2:bottom + t2, left + l2:right + l2].copy().tolist()) for (l2, t2) in vred])
        vred = []
        infloop = 0
        while (1):
            l2 = random.randint((right - left) / 2, image.shape[1] - (right - left) / 2 - 1)
            t2 = random.randint((bottom - top) / 2, image.shape[0] - (bottom - top) / 2 - 1)
            if abs(l2 - left - (right - left) / 2) > (right - left) and abs(t2 - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l2, t2)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop+=1

        self.neg = np.array(
            [np.array(image[t2 - (bottom - top) / 2:t2 + (bottom - top) / 2,
                    l2 - (right - left) / 2:l2 + (right - left) / 2,].tolist()) for (l2, t2) in vred])

        x_trainp = np.array([np.bincount(x.flatten().astype(int), minlength=45) for x in self.pos])
        x_trainn = np.array([np.bincount(x.flatten().astype(int), minlength=45) for x in self.neg])

        if len(self.neg) > 0:
            print("pred update" + str(len(self.neg[0])))
            x_train = np.append(x_trainp, x_trainn, axis=0)
        else:
            print("neg je 0")
            x_train = x_trainp


        self.data['x_train'] = np.append(self.data['x_train'], array(x_train), axis=0)
        self.data['y_train'] = np.append(self.data['y_train'], array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()))
        self.mf.partial_fit(self.data, array(range(len(self.data['x_train'])-len(x_train), len(self.data['x_train']))), self.settings, self.param, self.cache)


    def track(self, img):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int) / 4
        h, s, v = cv2.split(image)
        s += 45
        v += 109
        image2 = h#cv2.merge((h, s, v))
        left = int(max(round(self.position[0] - self.size[0]), 0))
        top = int(max(round(self.position[1] - self.size[1]), 0))
        right = int(min(round(self.position[0] + self.size[0]), image2.shape[1] - 1))
        bottom = int(min(round(self.position[1] + self.size[1]), image2.shape[0] - 1))
        if right - left < self.template.shape[1] or bottom - top < self.template.shape[0]:
            return [0, vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0],
                                 self.size[1])]

        cut = image2[top:bottom, left:right]

        t = self.size[1]/2
        l = self.size[0]/2
        weights_prediction = np.ones(self.settings.n_mondrians) * 1.0 / self.settings.n_mondrians
        predicta = []
        for i in range(t, bottom-top-t, 5):
            for j in range(l, right-left-l, 5):
                imclass = cut[i-t:i+t, j-l:j+l]
                pred = self.mf.evaluate_predictions(self.data, array([np.bincount(imclass.flatten().astype(int),minlength=45)]), [1], \
                                                   self.settings, self.param, weights_prediction, False)[0]
                predicta += [(pred['pred_prob'][0][1],i,j)]

        if len(predicta) < 1:
            return [0, vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2,
                                     self.size[0],
                                     self.size[1])]

        terk = predicta[0]
        for i in range(0, len(predicta)):
            if terk[0] < predicta[i][0]:
                terk = predicta[i]

        t = self.size[1] / 2*0.9
        l = self.size[0] / 2*0.9
        imclass = image2[int(top + terk[1] - t):int(top + terk[1] + t), int(left + terk[2] - l):int(left +terk[2] + l)]
        predmin = \
        self.mf.evaluate_predictions(self.data, array([np.bincount(imclass.flatten().astype(int), minlength=45)]), [1], \
                                     self.settings, self.param, weights_prediction, False)[0]

        t = self.size[1] / 2*1.1
        l = self.size[0] / 2*1.1
        imclass = image2[int(top + terk[1] - t):int(top + terk[1] + t), int(left + terk[2] - l):int(left + terk[2] + l)]
        predmax = \
            self.mf.evaluate_predictions(self.data, array([np.bincount(imclass.flatten().astype(int), minlength=45)]),
                                         [1], \
                                         self.settings, self.param, weights_prediction, False)[0]

        if predmax['pred_prob'][0][1] > predmin['pred_prob'][0][1]:
            if predmax['pred_prob'][0][1] > terk[0]:
                self.size = (int(self.size[0]*1.1), int(self.size[1]*1.1))

        else:
            if predmin['pred_prob'][0][1] > terk[0]:
                self.size = (int(self.size[0] * 0.9), int(self.size[1] * 0.9))
        self.position = (left + terk[2], top + terk[1])
        return [terk[0], vot.Rectangle(left+ terk[2]-l, top + terk[1] -t, self.size[0], self.size[1])]
Example #10
0
def execute_mf():
    """
    Executing the algorithm. Train and predict stepwise.
    Tracking execution time for training and predicting.
    """

    # Get required data
    settings, data, param, cache = pre_processing()

    # Track time data for execution
    time_method_sans_init = 0.
    time_prediction = 0.

    # Get Mondrian Forest
    mf = MondrianForest(settings, data)

    print '\nminibatch\tmetric_test\tnum_leaves'

    start_pos = 0
    number_batches = settings.n_minibatches
    accuracy = []

    if settings.store_every:

        log_prob_test_minibatch = -np.inf * np.ones(settings.n_minibatches)
        log_prob_train_minibatch = -np.inf * np.ones(settings.n_minibatches)
        metric_test_minibatch = -np.inf * np.ones(settings.n_minibatches)
        metric_train_minibatch = -np.inf * np.ones(settings.n_minibatches)
        time_method_minibatch = np.inf * np.ones(settings.n_minibatches)
        forest_numleaves_minibatch = np.zeros(settings.n_minibatches)

    # Bunch of information for later analysis
    pred_prob_overall_test = []

    # Algorithm execution
    for idx_minibatch in range(settings.n_minibatches):

        time_method_init = t_ime.clock()
        train_ids_current_minibatch = data['train_ids_partition']['current'][
            idx_minibatch]

        # Train the model always for the initial data
        if idx_minibatch == 0:
            # Batch training for first minibatch
            mf.fit(data, train_ids_current_minibatch, settings, param, cache)

        # Train the model if the size is below the limit
        else:

            if model_space_below_limit():
                # Online update
                mf.partial_fit(data, train_ids_current_minibatch, settings,
                               param, cache)

        time_method_sans_init += t_ime.clock() - time_method_init

        # Make predictions
        time_predictions_init = t_ime.clock()
        weights_prediction = np.ones(
            settings.n_mondrians) * 1.0 / settings.n_mondrians
        train_ids_cumulative = data['train_ids_partition']['cumulative'][
            idx_minibatch]

        pred_forest_train, metrics_train = \
            mf.evaluate_predictions(data, data['x_train'][train_ids_cumulative, :], \
            data['y_train'][train_ids_cumulative], \
            settings, param, weights_prediction, False)

        # Predict for the next n data points in time
        pred_forest_test, metrics_test = \
            mf.evaluate_predictions(data, data['x_test'][start_pos:start_pos + (len(data['x_test'])/number_batches)],
            data['y_test'][start_pos:start_pos + (len(data['y_test'])/number_batches)], \
            settings, param, weights_prediction, False)

        # Collect information about prediction
        for prediction in pred_forest_test['pred_prob']:
            pred_prob_overall_test.append(prediction)

        name_metric = settings.name_metric  # acc or mse
        log_prob_train = metrics_train['log_prob']
        log_prob_test = metrics_test['log_prob']
        metric_train = metrics_train[name_metric]
        metric_test = metrics_test[name_metric]
        tree_numleaves = np.zeros(settings.n_mondrians)

        if settings.store_every:

            log_prob_train_minibatch[idx_minibatch] = metrics_train['log_prob']
            log_prob_test_minibatch[idx_minibatch] = metrics_test['log_prob']
            metric_train_minibatch[idx_minibatch] = metrics_train[name_metric]
            metric_test_minibatch[idx_minibatch] = metrics_test[name_metric]
            time_method_minibatch[idx_minibatch] = 0  #FIXME
            tree_numleaves = np.zeros(settings.n_mondrians)

            for i_p, p in enumerate(mf):
                tree_numleaves[i_p] = len(p.leaf_nodes)
            forest_numleaves_minibatch[idx_minibatch] = np.mean(tree_numleaves)

        tree_leafes_total = 0

        for i_t, tree in enumerate(mf.forest):
            tree_numleaves[i_t] = len(tree.leaf_nodes)
            tree_leafes_total += len(tree.leaf_nodes)

        # Print results
        forest_numleaves = np.mean(tree_numleaves)
        print '%9d\t\t%.3f\t\t%.3f' % (idx_minibatch, metric_test,
                                       forest_numleaves)

        # Additional space information for analysis
        print_space_inf = print_space_stats()

        if print_space_inf:

            print "Current total tree leaf nodes : " + str(tree_leafes_total)
            cur_mem_usage = resource.getrusage(
                resource.RUSAGE_SELF).ru_maxrss / 1024  # convert to MB
            print "Current total memory usage in MB: " + str(cur_mem_usage)
            print

        time_prediction += t_ime.clock() - time_predictions_init
        accuracy.append(metric_test)
        start_pos += (len(data['x_test']) / number_batches)

    # Total time w/o saving results
    time_total = t_ime.clock() - time_0
    time = [time_method_sans_init, time_prediction, time_total]

    # Process and dump statistics to file if desired
    if settings.save == 1:

        test = []
        train = []
        time_method_mb = 0
        forest_numleaves_mb = 0

        if settings.store_every:

            test = [log_prob_test_minibatch, metric_test_minibatch]
            train = [log_prob_train_minibatch, metric_train_minibatch]
            time_method_mb = time_method_minibatch
            forest_numleaves_mb = forest_numleaves_minibatch

        time = [
            time_method_sans_init, time_prediction, time_method_mb, time_total
        ]
        metrics = [metric_test, metric_train]

        process_statistics(settings, data, pred_prob_overall_test,
                           log_prob_train, metrics, test, train, time,
                           forest_numleaves_mb)

    # Print statistics to command line
    print_statistics(settings, accuracy, data, mf, time)
Example #11
0
def main():
    # Import settings from command line
    settings = process_command_line()
    print 'Current settings:'
    pp.pprint(vars(settings))

    # Resetting random seed
    reset_random_seed(settings)

    # Loading data
    data = load_dataset(settings)

    param, cache = precompute_minimal(data,settings)

    mf = MondrianForest(settings, data)

    print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves'

    for idx_minibatch in range(settings.n_minibatches):
        train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch]
        if idx_minibatch == 0:
            # Batch training for first minibatch
            mf.fit(data, train_ids_current_minibatch, settings, param, cache)
        else:
            # Online update
            mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache)

        # Evaluate
        weights_prediction = np.ones(settings.n_mondrians) * 1.0 / settings.n_mondrians
        train_ids_cumulative = data['train_ids_partition']['cumulative'][idx_minibatch]
        pred_forest_train, metrics_train = \
            mf.evaluate_predictions(data, data['x_train'][train_ids_cumulative, :], \
            data['y_train'][train_ids_cumulative], \
            settings, param, weights_prediction, False)
        pred_forest_test, metrics_test = \
            mf.evaluate_predictions(data, data['x_test'], data['y_test'], \
            settings, param, weights_prediction, False)
        name_metric = settings.name_metric     # acc or mse
        metric_train = metrics_train[name_metric]
        metric_test = metrics_test[name_metric]
        tree_numleaves = np.zeros(settings.n_mondrians)
        for i_t, tree in enumerate(mf.forest):
            tree_numleaves[i_t] = len(tree.leaf_nodes)
        forest_numleaves = np.mean(tree_numleaves)
        print '%9d\t%.3f\t\t%.3f\t\t%.3f' % (idx_minibatch, metric_train, metric_test, forest_numleaves)
        print 'length of y_test'
        print data['y_test'].shape

        y_test_pred = get_y_pred(pred_forest_test['pred_prob'])
        print 'lenght of y_test_pred:'
        print y_test_pred.shape

        for x in range(0,len(y_test_pred)):
            print 'label: %d mf prediction: %d' % (data['y_test'][x], y_test_pred[x])

        cm = confusion_matrix(data['y_test'], y_test_pred)
        
        # Show confusion matrix in a separate window
        plt.matshow(cm)
        plt.title('Confusion matrix')
        plt.colorbar()
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()





    print '\nFinal forest stats:'
    tree_stats = np.zeros((settings.n_mondrians, 2))
    tree_average_depth = np.zeros(settings.n_mondrians)
    for i_t, tree in enumerate(mf.forest):
        tree_stats[i_t, -2:] = np.array([len(tree.leaf_nodes), len(tree.non_leaf_nodes)])
        tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0]
    print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \
            % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth))
    print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \
            % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth))
Example #12
0
File: ORF.py Project: teamZeta/arp3
    def __init__(self, img, region):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int) / 4
        h, s, v = cv2.split(image)
        s += 45
        v += 109
        image = h  # cv2.merge((h, s, v))
        self.window = max(region.width, region.height) * 2

        left = int(max(region.x, 0))
        top = int(max(region.y, 0))

        right = int(min(region.x + region.width, image.shape[1] - 1))
        bottom = int(min(region.y + region.height, image.shape[0] - 1))

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[int(top):int(bottom), int(left):int(right)]
        self.position = (region.x + region.width / 2,
                         region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = image
        self.pos = np.array([
            image[int(top):int(bottom),
                  int(left):int(right)].copy().tolist()
        ])
        self.neg = np.array([])
        self.st_neg = 0

        vred = [(0, 0)]
        infloop = 0
        while (1):
            l = random.randint(-int((right - left) * 0.03),
                               int((right - left) * 0.03))
            t = random.randint(-int((bottom - top) * 0.03),
                               int((bottom - top) * 0.03))
            if l + left >= 0 and l + right < image.shape[
                    1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop += 1

        self.pos = np.array([
            np.array(self.old_img[top + int(t2):bottom + int(t2), left +
                                  int(l2):right + int(l2)].copy().tolist())
            for (l2, t2) in vred
        ])
        vred = []
        infloop = 0
        while (1):
            l = random.randint(int((right - left) / 2),
                               int(image.shape[1] - (right - left) / 2 - 1))
            t = random.randint(int((bottom - top) / 2),
                               int(image.shape[0] - (bottom - top) / 2 - 1))
            if abs(l - left - (right - left) / 2) > (
                    right - left) or abs(t - top -
                                         (bottom - top) / 2) > (bottom - top):
                vred += [(l, t)]
            if len(vred) > 45 or infloop > 10000:
                break
            infloop += 1
        self.neg = np.array([
            np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) +
                                  (bottom - top) / 2,
                                  int(l2) - (right - left) / 2:int(l2) +
                                  (right - left) / 2].copy().tolist())
            for (l2, t2) in vred
        ])
        print("pred update")
        print("neg" + str(len(self.neg)))
        print("pos" + str(len(self.pos)))

        set = {
            'optype': 'class',
            'verbose': 1,
            'draw_mondrian': 0,
            'perf_dataset_keys': ['train', 'test'],
            'data_path': '../../process_data/',
            'dataset': 'toy-mf',
            'tag': '',
            'alpha': 0,
            'bagging': 0,
            'select_features': 0,
            'smooth_hierarchically': 1,
            'normalize_features': 1,
            'min_samples_split': 2,
            'save': 0,
            'discount_factor': 10,
            'op_dir': 'results',
            'init_id': 1,
            'store_every': 0,
            'perf_store_keys': ['pred_prob'],
            'perf_metrics_keys': ['log_prob', 'acc'],
            'budget': -1.0,
            'n_mondrians': 10,
            'debug': 0,
            'n_minibatches': 1,
            'name_metric': 'acc',
            'budget_to_use': inf
        }
        self.settings = Map(set)
        reset_random_seed(self.settings)

        x_trainp = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.pos
        ])
        x_trainn = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.neg
        ])
        if len(self.neg) > 0:
            x_train = np.append(x_trainp, x_trainn, axis=0)
            self.st_neg = 1
        else:
            x_train = x_trainp
            self.st_neg = 0

        self.data = {
            'n_dim':
            1,
            'x_test':
            array([x_train[5]]),
            'x_train':
            array(x_train),
            'y_train':
            array(
                np.ones(len(self.pos)).astype(int).tolist() +
                np.zeros(len(self.neg)).astype(int).tolist()),
            'is_sparse':
            False,
            'n_train':
            len(x_train),
            'n_class':
            2,
            'y_test':
            array([]),
            'n_test':
            0
        }

        self.param, self.cache = precompute_minimal(self.data, self.settings)
        self.mf = MondrianForest(self.settings, self.data)
        self.mf.fit(self.data, array(range(0, len(x_train))), self.settings,
                    self.param, self.cache)
Example #13
0
File: ORF.py Project: teamZeta/arp3
class ORF(object):
    def __init__(self, img, region):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int) / 4
        h, s, v = cv2.split(image)
        s += 45
        v += 109
        image = h  # cv2.merge((h, s, v))
        self.window = max(region.width, region.height) * 2

        left = int(max(region.x, 0))
        top = int(max(region.y, 0))

        right = int(min(region.x + region.width, image.shape[1] - 1))
        bottom = int(min(region.y + region.height, image.shape[0] - 1))

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[int(top):int(bottom), int(left):int(right)]
        self.position = (region.x + region.width / 2,
                         region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = image
        self.pos = np.array([
            image[int(top):int(bottom),
                  int(left):int(right)].copy().tolist()
        ])
        self.neg = np.array([])
        self.st_neg = 0

        vred = [(0, 0)]
        infloop = 0
        while (1):
            l = random.randint(-int((right - left) * 0.03),
                               int((right - left) * 0.03))
            t = random.randint(-int((bottom - top) * 0.03),
                               int((bottom - top) * 0.03))
            if l + left >= 0 and l + right < image.shape[
                    1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop += 1

        self.pos = np.array([
            np.array(self.old_img[top + int(t2):bottom + int(t2), left +
                                  int(l2):right + int(l2)].copy().tolist())
            for (l2, t2) in vred
        ])
        vred = []
        infloop = 0
        while (1):
            l = random.randint(int((right - left) / 2),
                               int(image.shape[1] - (right - left) / 2 - 1))
            t = random.randint(int((bottom - top) / 2),
                               int(image.shape[0] - (bottom - top) / 2 - 1))
            if abs(l - left - (right - left) / 2) > (
                    right - left) or abs(t - top -
                                         (bottom - top) / 2) > (bottom - top):
                vred += [(l, t)]
            if len(vred) > 45 or infloop > 10000:
                break
            infloop += 1
        self.neg = np.array([
            np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) +
                                  (bottom - top) / 2,
                                  int(l2) - (right - left) / 2:int(l2) +
                                  (right - left) / 2].copy().tolist())
            for (l2, t2) in vred
        ])
        print("pred update")
        print("neg" + str(len(self.neg)))
        print("pos" + str(len(self.pos)))

        set = {
            'optype': 'class',
            'verbose': 1,
            'draw_mondrian': 0,
            'perf_dataset_keys': ['train', 'test'],
            'data_path': '../../process_data/',
            'dataset': 'toy-mf',
            'tag': '',
            'alpha': 0,
            'bagging': 0,
            'select_features': 0,
            'smooth_hierarchically': 1,
            'normalize_features': 1,
            'min_samples_split': 2,
            'save': 0,
            'discount_factor': 10,
            'op_dir': 'results',
            'init_id': 1,
            'store_every': 0,
            'perf_store_keys': ['pred_prob'],
            'perf_metrics_keys': ['log_prob', 'acc'],
            'budget': -1.0,
            'n_mondrians': 10,
            'debug': 0,
            'n_minibatches': 1,
            'name_metric': 'acc',
            'budget_to_use': inf
        }
        self.settings = Map(set)
        reset_random_seed(self.settings)

        x_trainp = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.pos
        ])
        x_trainn = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.neg
        ])
        if len(self.neg) > 0:
            x_train = np.append(x_trainp, x_trainn, axis=0)
            self.st_neg = 1
        else:
            x_train = x_trainp
            self.st_neg = 0

        self.data = {
            'n_dim':
            1,
            'x_test':
            array([x_train[5]]),
            'x_train':
            array(x_train),
            'y_train':
            array(
                np.ones(len(self.pos)).astype(int).tolist() +
                np.zeros(len(self.neg)).astype(int).tolist()),
            'is_sparse':
            False,
            'n_train':
            len(x_train),
            'n_class':
            2,
            'y_test':
            array([]),
            'n_test':
            0
        }

        self.param, self.cache = precompute_minimal(self.data, self.settings)
        self.mf = MondrianForest(self.settings, self.data)
        self.mf.fit(self.data, array(range(0, len(x_train))), self.settings,
                    self.param, self.cache)

    def set_region(self, position):
        self.position = position

    def reset_position(self, pos):
        self.position = pos

    def set_position(self, position):
        self.position = (position[0], position[1])
        self.size = [
            position[0] - self.size[0] / 2, position[1] - self.size[1] / 2,
            position[0] + self.size[0] / 2, position[1] + self.size[1] / 2
        ]

    def set_region(self, region):
        self.position = (int(region.x + region.width / 2),
                         int(region.y + region.height / 2))
        self.size = (region.width, region.height)

    def updateTree(self, img):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int) / 4
        h, s, v = cv2.split(image)
        s += 45
        v += 109
        image = h  # cv2.merge((h, s, v))

        t = len(self.template) / 2
        l = len(self.template[0]) / 2
        left = self.position[0] - l
        top = self.position[1] - t
        right = self.position[0] + l
        bottom = self.position[1] + t

        vred = [(0, 0)]
        infloop = 0
        while (1):
            l2 = random.randint(-int((right - left) * 0.03),
                                int((right - left) * 0.03))
            t2 = random.randint(-int((bottom - top) * 0.03),
                                int((bottom - top) * 0.03))
            if l2 + left >= 0 and l2 + right < image.shape[
                    1] and t2 + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l2, t2)]
            if len(vred) > 5 or infloop > 10000:
                break
            infloop += 1
        self.pos = np.array([
            np.array(image[top + t2:bottom + t2,
                           left + l2:right + l2].copy().tolist())
            for (l2, t2) in vred
        ])
        vred = []
        infloop = 0
        while (1):
            l2 = random.randint((right - left) / 2,
                                image.shape[1] - (right - left) / 2 - 1)
            t2 = random.randint((bottom - top) / 2,
                                image.shape[0] - (bottom - top) / 2 - 1)
            if abs(l2 - left - (right - left) / 2) > (
                    right - left) and abs(t2 - top -
                                          (bottom - top) / 2) > (bottom - top):
                vred += [(l2, t2)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop += 1

        self.neg = np.array([
            np.array(image[t2 - (bottom - top) / 2:t2 + (bottom - top) / 2,
                           l2 - (right - left) / 2:l2 +
                           (right - left) / 2, ].tolist()) for (l2, t2) in vred
        ])

        x_trainp = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.pos
        ])
        x_trainn = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.neg
        ])

        if len(self.neg) > 0:
            x_train = np.append(x_trainp, x_trainn, axis=0)
        else:
            x_train = x_trainp

        self.data['x_train'] = np.append(self.data['x_train'],
                                         array(x_train),
                                         axis=0)
        self.data['y_train'] = np.append(
            self.data['y_train'],
            array(
                np.ones(len(self.pos)).astype(int).tolist() +
                np.zeros(len(self.neg)).astype(int).tolist()))
        self.mf.partial_fit(
            self.data,
            array(
                range(
                    len(self.data['x_train']) - len(x_train),
                    len(self.data['x_train']))), self.settings, self.param,
            self.cache)

    def track(self, img):
        if self.st_neg == 0:
            return [0, vot.Rectangle(1, 1, self.size[0], self.size[1])]
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int) / 4
        h, s, v = cv2.split(image)
        s += 45
        v += 109
        image2 = h  # cv2.merge((h, s, v))
        left = int(max(round(self.position[0] - self.size[0]), 0))
        top = int(max(round(self.position[1] - self.size[1]), 0))
        right = int(
            min(round(self.position[0] + self.size[0]), image2.shape[1] - 1))
        bottom = int(
            min(round(self.position[1] + self.size[1]), image2.shape[0] - 1))
        if right - left < self.template.shape[
                1] or bottom - top < self.template.shape[0]:
            return [
                0,
                vot.Rectangle(self.position[0] + self.size[0] / 2,
                              self.position[1] + self.size[1] / 2,
                              self.size[0], self.size[1])
            ]

        cut = image2[top:bottom, left:right]

        t = int(self.size[1] / 2)
        l = int(self.size[0] / 2)
        weights_prediction = np.ones(
            self.settings.n_mondrians) * 1.0 / self.settings.n_mondrians
        predicta = []
        for i in range(t, int(bottom - top - t), 5):
            for j in range(l, right - left - l, 5):
                imclass = cut[i - t:i + t, j - l:j + l]
                pred = self.mf.evaluate_predictions(
                    self.data,
                    array([
                        np.bincount(imclass.flatten().astype(int),
                                    minlength=45)
                    ]), [1], self.settings, self.param, weights_prediction,
                    False)[0]
                predicta += [(pred['pred_prob'][0][1], i, j)]

        if len(predicta) < 1:
            return [
                0,
                vot.Rectangle(self.position[0] + self.size[0] / 2,
                              self.position[1] + self.size[1] / 2,
                              self.size[0], self.size[1])
            ]

        terk = predicta[0]
        for i in range(0, len(predicta)):
            if terk[0] < predicta[i][0]:
                terk = predicta[i]

        t = int(self.size[1] / 2 * 0.9)
        l = int(self.size[0] / 2 * 0.9)
        if int(top + terk[1] -
               t) >= 0 and int(top + terk[1] + t) < image2.shape[0] and int(
                   left + terk[2] - l) >= 0 and int(left + terk[2] +
                                                    l) < image2.shape[0]:
            imclass = image2[int(top + terk[1] - t):int(top + terk[1] + t),
                             int(left + terk[2] - l):int(left + terk[2] + l)]
            predmin = \
                self.mf.evaluate_predictions(self.data,
                                             array([np.bincount(imclass.flatten().astype(int), minlength=45)]), [1],
                                             self.settings, self.param, weights_prediction, False)[0]
        else:
            predmin = None

        t = int(self.size[1] / 2 * 1.1)
        l = int(self.size[0] / 2 * 1.1)
        if int(top + terk[1] -
               t) >= 0 and int(top + terk[1] + t) < image2.shape[0] and int(
                   left + terk[2] - l) >= 0 and int(left + terk[2] +
                                                    l) < image2.shape[0]:
            imclass = image2[int(top + terk[1] - t):int(top + terk[1] + t),
                             int(left + terk[2] - l):int(left + terk[2] + l)]
            predmax = \
                self.mf.evaluate_predictions(self.data,
                                             array([np.bincount(imclass.flatten().astype(int), minlength=45)]),
                                             [1],
                                             self.settings, self.param, weights_prediction, False)[0]
        else:
            predmax = None

        if predmax != None and predmin != None and predmax['pred_prob'][0][
                1] > predmin['pred_prob'][0][1]:
            if predmax['pred_prob'][0][1] > terk[0]:
                self.size = (int(self.size[0] * 1.1), int(self.size[1] * 1.1))

        elif predmin != None:
            if predmin['pred_prob'][0][1] > terk[0]:
                self.size = (int(self.size[0] * 0.9), int(self.size[1] * 0.9))
        self.position = (left + terk[2], top + terk[1])

        return [
            terk[0],
            vot.Rectangle(left + terk[2] - l, top + terk[1] - t, self.size[0],
                          self.size[1])
        ]