def pre_processing(): """ Processing settings and data :return: required data for execution """ # Process and print settings settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_data(settings) param, cache = precompute_minimal(data, settings) return settings, data, param, cache
def perform_file(fileName): settings.dataset = fileName + ".arff" print(fileName) print("Loading data") data = load_data(settings) param, cache = precompute_minimal(data, settings) mf = MondrianForest(settings, data) for idx_minibatch in range(settings.n_minibatches): train_ids_current_minibatch = data['train_ids_partition']['current'][ idx_minibatch] if idx_minibatch == 0: with open(settings.data_path + fileName + '.csv', 'w') as f: f.write("target;prediction\n") print("Training 0 batch", len(train_ids_current_minibatch)) # Batch training for first minibatch mf.fit(data, train_ids_current_minibatch, settings, param, cache) else: print('Evaluation on batch', idx_minibatch, 'in', fileName) # Evaluate weights_prediction = np.ones( settings.n_mondrians) * 1.0 / settings.n_mondrians results = mf.evaluate_predictions( data, data['x_train'][train_ids_current_minibatch], data['y_train'][train_ids_current_minibatch], settings, param, weights_prediction, True) # prediction predictions = results[0]['pred_mean'] real = data['y_train'][train_ids_current_minibatch].flatten() for i in range(len(predictions)): # print(i, predictions[i], real[i]) with open(settings.data_path + fileName + '.csv', 'a') as f: f.write("{0},{1}\n".format(real[i], predictions[i])) print("Training on next batch ", idx_minibatch, len(train_ids_current_minibatch)) # Online update mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache) print("Finished training ...")
def main(): # Import settings from command line settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading batch data batch_type_list = ['office', 'kitchen', 'bookstore'] #batch_type_list = ['kitchen','office','bathroom','bedroom','bookstore','living_room'] incremental_type_list = [ 'computer_room', 'home_office', 'office_kitchen', 'classroom' ] #incremental_type_list = ['study_space','classroom','computer_room','lobby','home_office','office_kitchen','playroom','reception_room','study','dining_room','cafeteria','furniture_store','conference_room','dinette','gym','storage_room','indoor_balcony','laundromat','printer_room','basement','recreation_room'] training_perc = 0 data, training_list, test_list = load_type_dataset(settings, batch_type_list, incremental_type_list, training_perc) param, cache = precompute_minimal(data, settings) mf = MondrianForest(settings, data) batch_train_ids = data['train_ids_partition']['batch'] print '\nBatch training on %d element...' % (len(batch_train_ids)) mf.fit(data, batch_train_ids, settings, param, cache) print '...batch training done. \n' if training_perc > 0: incremental_train_ids = data['train_ids_partition']['incremental'] print 'Incremental training on %d element...' % ( len(incremental_train_ids)) mf.partial_fit(data, incremental_train_ids, settings, param, cache) print '...incremental training done. \n' #Evaluation print 'Evaluation... \n' weights_prediction = np.ones( settings.n_mondrians) * 1.0 / settings.n_mondrians pred_forest_test, metrics_test = \ mf.evaluate_predictions(data, data['x_test'], data['y_test'], \ settings, param, weights_prediction, False) name_metric = settings.name_metric # acc or mse metric_test = metrics_test[name_metric] tree_numleaves = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_numleaves[i_t] = len(tree.leaf_nodes) forest_numleaves = np.mean(tree_numleaves) f_stats = open( '/home/alberto/tesi/mondrianforest/src/results/statistics.txt', 'w') print '%s\t\tnum_leaves' % (name_metric) f_stats.write(str(name_metric) + ' : ' + str(metric_test)) f_stats.write('\n') f_stats.write('num_leaves : ' + str(forest_numleaves)) f_stats.write('\n') f_stats.write('\n') print '%.3f\t\t%.3f' % (metric_test, forest_numleaves) print '\nFinal forest stats:' f_stats.write('Final forest stats: \n') tree_stats = np.zeros((settings.n_mondrians, 2)) tree_average_depth = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_stats[i_t, -2:] = np.array( [len(tree.leaf_nodes), len(tree.non_leaf_nodes)]) tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0] print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \ % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth)) print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \ % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth)) f_stats.write('mean(num_leaves) = ' + str(np.mean(tree_stats[:, -2]))) f_stats.write(' mean(num_non_leaves) = ' + str(np.mean(tree_stats[:, -1]))) f_stats.write(' mean(tree_average_depth) = ' + str(np.mean(tree_average_depth)) + '\n') f_stats.write('n_train = ' + str(data['n_train'])) f_stats.write(' log_2(n_train) = ' + str(np.log2(data['n_train']))) f_stats.write(' mmean(tree_average_depth) = ' + str(np.mean(tree_average_depth)) + ' +- ' + str(np.std(tree_average_depth)) + '\n') f_stats.write( '\n------------------------------------------------------------------\n' ) print '\n...evaluation done.' print 'Computing confusion matrices...' uf_dir = settings.data_path + '/unary_csv' labels_dir = settings.data_path + '/labels_csv' cm_res_dir = '../results/cm' for file_name in test_list: curr_uf_csv = uf_dir + '/' + file_name curr_lables_csv = labels_dir + '/' + file_name x_df = pd.read_csv(curr_uf_csv, usecols=unary_features) y_df = pd.read_csv(curr_lables_csv, dtype=int) x_test = x_df.to_numpy() y_test = y_df.to_numpy() y_test.shape = (y_test.shape[0], ) if settings.normalize_features == 1: min_d = np.minimum(np.min(data['x_train'], 0), np.min(data['x_test'], 0)) max_d = np.maximum(np.max(data['x_train'], 0), np.max(data['x_test'], 0)) range_d = max_d - min_d idx_range_d_small = range_d <= 0. # find columns where all features are identical if data['n_dim'] > 1: range_d[ idx_range_d_small] = 1e-3 # non-zero value just to prevent division by 0 elif idx_range_d_small: range_d = 1e-3 x_test -= min_d + 0. x_test /= range_d cm_weights_prediction = np.ones( settings.n_mondrians) * 1.0 / settings.n_mondrians cm_pred_forest_test, cm_metrics_test = \ mf.evaluate_predictions(data, x_test, y_test, \ settings, param, weights_prediction, False) #y_test_pred = get_y_pred(cm_pred_forest_test['pred_prob']) y_test_pred = get_label_predictions(cm_pred_forest_test['pred_prob']) f_stats.write(str(file_name) + '\n') print 'y_test' print y_test[:25] print 'y_test_pred' print y_test_pred[:25] f_stats.write('\n y_test y_mf_pred: \n') for x in range(25): if (y_test[x] < 10): f_stats.write('# ' + str(y_test[x]) + ' # ' + str(y_test_pred[x]) + '\n') else: f_stats.write('# ' + str(y_test[x]) + ' # ' + str(y_test_pred[x]) + '\n') f_stats.write('\n---------------------------------------\n') cm = compute_confusion_matrix(y_test, y_test_pred, print_cm=False) cm_path = cm_res_dir + '/' + file_name #np.savetxt(cm_path, cm, delimiter=",") # SAVE PREDICTIONS ON CORRESPONDING PCD '''end_idx = file_name.rfind('.') input_path = '/home/alberto/tesi/dataset/NYUDV2/trained_semseg_data/clustering/' pcd_name = file_name[0:end_idx] + '.pcd' print pcd_name pcd_path = input_path + pcd_name input_cloud = pypcd.PointCloud.from_path(pcd_path) point_x_list = input_cloud.pc_data['x'] point_y_list = input_cloud.pc_data['y'] point_z_list = input_cloud.pc_data['z'] cluster_idx_list = input_cloud.pc_data['label'] new_cloud = input_cloud.pc_data.copy new_cloud = input_cloud.pc_data.view(np.float32).reshape(input_cloud.pc_data.shape + (-1,)) print 'Cluster cloud shape:' print new_cloud.shape print 'Cluster label length: %d' % (len(cluster_idx_list)) for n in range(new_cloud.shape[0]): new_cloud[n][0] = point_x_list[n] new_cloud[n][1] = point_y_list[n] new_cloud[n][2] = point_z_list[n] if(cluster_idx_list[n] > 4000): new_cloud[n][3] = 0 else: new_cloud[n][3] = y_test_pred[cluster_idx_list[n]] #res_pcd = pypcd.make_xyz_rgb_point_cloud(new_cloud) res_pcd = pypcd.make_xyz_label_point_cloud(new_cloud) output_path = '/home/alberto/tesi/mondrianforest/src/results/pcd/'+pcd_name res_pcd.save(output_path)''' print '...computation done.\n END.' f_stats.close()
from mondrianforest_utils import load_data, reset_random_seed, precompute_minimal from mondrianforest import process_command_line, MondrianForest settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_data(settings) print "Data: ", data print type(settings) param, cache = precompute_minimal(data, settings) mf = MondrianForest(settings, data) data['x_test'] print(data) train_ids_current_minibatch = data['train_ids_partition']['current'][0] print train_ids_current_minibatch.shape print "First batch train on 5 data points" mf.fit(data, train_ids_current_minibatch[0:5], settings, param, cache) print mf.forest[0].counts print "\nNow the extension\n" mf.partial_fit(data, train_ids_current_minibatch[5:10], settings, param, cache) print mf.forest[0].counts
def __init__(self, image, region): self.window = max(region.width, region.height) * 2 left = max(region.x, 0) top = max(region.y, 0) right = min(region.x + region.width, image.shape[1] - 1) bottom = min(region.y + region.height, image.shape[0] - 1) if (right - left) % 2 != 0: right -= 1 if (bottom - top) % 2 != 0: bottom -= 1 self.template = image[top:bottom, left:right] self.position = (region.x + region.width / 2, region.y + region.height / 2) self.size = (region.width, region.height) self.old_img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) self.hsv = np.zeros_like(image) self.hsv[..., 1] = 255 self.pos = np.array([image[top:bottom, left:right].copy().tolist()]) self.neg = np.array([]) vred = [(0, 0)] while (1): l = random.randint(-int((right - left) * 0.1), int((right - left) * 0.1)) t = random.randint(-int((bottom - top) * 0.1), int((bottom - top) * 0.1)) if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]: vred += [(l, t)] if len(vred) > 5: break self.pos = np.array( [np.array(self.old_img[top + t:bottom + t, left + l:right + l].copy().tolist()) for (l, t) in vred]) vred = [] infloop = 0 while (1): l = random.randint((right - left) / 2, image.shape[1] - (right - left) / 2 - 1) t = random.randint((bottom - top) / 2, image.shape[0] - (bottom - top) / 2 - 1) if abs(l - left - (right - left) / 2) > (right - left) and abs(t - top - (bottom - top) / 2) > ( bottom - top): vred += [(l, t)] if len(vred) > 5 or infloop > 10000: break infloop+=1 self.neg = np.array( [np.array(self.old_img[t - (bottom - top) / 2:t + (bottom - top) / 2, l - (right - left) / 2:l + (right - left) / 2].copy().tolist()) for (l, t) in vred]) print("tu je image") stevec = 1 for i in self.pos: cv2.imwrite("file" + str(stevec) + ".png", i) stevec += 1 # Resetting random seed set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'], 'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0, 'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2, 'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0, 'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0, 'n_mondrians': 10, 'debug': 0, 'n_minibatches': 2, 'name_metric': 'acc', 'budget_to_use': inf} self.settings = Map(set) reset_random_seed(self.settings) stevec = -30 for i in self.neg: cv2.imwrite("file" + str(stevec) + ".png", i) stevec -= 1 x_trainp = [x.flatten().tolist() for x in self.pos] x_trainn = [x.flatten().tolist() for x in self.neg] x_train = x_trainp + x_trainn print(len(x_train[0])) self.data = {'n_dim': 1, 'x_test': array([x_train[5]]), 'x_train': array(x_train), 'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2, 'y_test': array([]), 'n_test': 0} self.param, self.cache = precompute_minimal(self.data, self.settings) self.mf = MondrianForest(self.settings, self.data) for idx_minibatch in range(self.settings.n_minibatches): #train_ids_current_minibatch = self.data['train_ids_partition']['current'][idx_minibatch] if idx_minibatch == 0: # Batch training for first minibatch self.mf.fit(self.data, array(range(0, len(x_train)/2)), self.settings, self.param, self.cache) else: # Online update self.mf.partial_fit(self.data, array(range(len(x_train)/2, len(x_train))), self.settings, self.param, self.cache) print("updatalo je") weights_prediction = np.ones(self.settings.n_mondrians) * 1.0 / self.settings.n_mondrians #train_ids_cumulative = self.data['train_ids_partition']['cumulative'][idx_minibatch] print(self.mf.evaluate_predictions(self.data, array([x_train[5]]), [1], \ self.settings, self.param, weights_prediction, False))
from mondrianforest_utils import load_data, reset_random_seed, precompute_minimal from mondrianforest import process_command_line, MondrianForest PLOT = False settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_data(settings) param, cache = precompute_minimal(data, settings) mf = MondrianForest(settings, data) print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves' for idx_minibatch in range(settings.n_minibatches): train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch] if idx_minibatch == 0: # Batch training for first minibatch mf.fit(data, train_ids_current_minibatch, settings, param, cache) else: # Online update mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache) # Evaluate
def __init__(self, img, region): image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) image = image.astype(int)/4 h, s, v = cv2.split(image) s+=45 v+=109 image = h#cv2.merge((h, s, v)) self.window = max(region.width, region.height) * 2 left = int(max(region.x, 0)) top = int(max(region.y, 0)) right = int(min(region.x + region.width, image.shape[1] - 1)) bottom = int(min(region.y + region.height, image.shape[0] - 1)) if (right - left) % 2 != 0: right -= 1 if (bottom - top) % 2 != 0: bottom -= 1 self.template = image[int(top):int(bottom), int(left):int(right)] self.position = (region.x + region.width / 2, region.y + region.height / 2) self.size = (region.width, region.height) self.old_img = image self.pos = np.array([image[int(top):int(bottom), int(left):int(right)].copy().tolist()]) self.neg = np.array([]) vred = [(0, 0)] infloop = 0 while(1): l = random.randint(-int((right - left) * 0.03), int((right - left) * 0.03)) t = random.randint(-int((bottom - top) * 0.03), int((bottom - top) * 0.03)) if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]: vred += [(l, t)] if len(vred) > 15 or infloop > 10000: break infloop+=1 self.pos = np.array( [np.array(self.old_img[top + int(t2):bottom + int(t2), left + int(l2):right + int(l2)].copy().tolist()) for (l2, t2) in vred]) vred = [] infloop = 0 while (1): l = random.randint(int((right - left) / 2), int(image.shape[1] - (right - left) / 2 - 1)) t = random.randint(int((bottom - top) / 2), int(image.shape[0] - (bottom - top) / 2 - 1)) if abs(l - left - (right - left) / 2) > (right - left) or abs(t - top - (bottom - top) / 2) > ( bottom - top): vred += [(l, t)] if len(vred) > 45 or infloop > 10000: break infloop+=1 self.neg = np.array( [np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) + (bottom - top) / 2, int(l2) - (right - left) / 2:int(l2) + (right - left) / 2].copy().tolist()) for (l2, t2) in vred]) print("pred update") print("neg" + str(len(self.neg))) print("pos" + str(len(self.pos))) set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'], 'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0, 'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2, 'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0, 'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0, 'n_mondrians': 10, 'debug': 0, 'n_minibatches': 1, 'name_metric': 'acc', 'budget_to_use': inf} self.settings = Map(set) reset_random_seed(self.settings) x_trainp = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.pos]) x_trainn = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.neg]) x_train = np.append(x_trainp, x_trainn, axis=0) self.data = {'n_dim': 1, 'x_test': array([x_train[5]]), 'x_train': array(x_train), 'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2, 'y_test': array([]), 'n_test': 0} self.param, self.cache = precompute_minimal(self.data, self.settings) self.mf = MondrianForest(self.settings, self.data) self.mf.fit(self.data, array(range(0, len(x_train))), self.settings, self.param, self.cache) print("kones")
def main(): # Import settings from command line settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed reset_random_seed(settings) # Loading data data = load_dataset(settings) param, cache = precompute_minimal(data,settings) mf = MondrianForest(settings, data) print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves' for idx_minibatch in range(settings.n_minibatches): train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch] if idx_minibatch == 0: # Batch training for first minibatch mf.fit(data, train_ids_current_minibatch, settings, param, cache) else: # Online update mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache) # Evaluate weights_prediction = np.ones(settings.n_mondrians) * 1.0 / settings.n_mondrians train_ids_cumulative = data['train_ids_partition']['cumulative'][idx_minibatch] pred_forest_train, metrics_train = \ mf.evaluate_predictions(data, data['x_train'][train_ids_cumulative, :], \ data['y_train'][train_ids_cumulative], \ settings, param, weights_prediction, False) pred_forest_test, metrics_test = \ mf.evaluate_predictions(data, data['x_test'], data['y_test'], \ settings, param, weights_prediction, False) name_metric = settings.name_metric # acc or mse metric_train = metrics_train[name_metric] metric_test = metrics_test[name_metric] tree_numleaves = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_numleaves[i_t] = len(tree.leaf_nodes) forest_numleaves = np.mean(tree_numleaves) print '%9d\t%.3f\t\t%.3f\t\t%.3f' % (idx_minibatch, metric_train, metric_test, forest_numleaves) print 'length of y_test' print data['y_test'].shape y_test_pred = get_y_pred(pred_forest_test['pred_prob']) print 'lenght of y_test_pred:' print y_test_pred.shape for x in range(0,len(y_test_pred)): print 'label: %d mf prediction: %d' % (data['y_test'][x], y_test_pred[x]) cm = confusion_matrix(data['y_test'], y_test_pred) # Show confusion matrix in a separate window plt.matshow(cm) plt.title('Confusion matrix') plt.colorbar() plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() print '\nFinal forest stats:' tree_stats = np.zeros((settings.n_mondrians, 2)) tree_average_depth = np.zeros(settings.n_mondrians) for i_t, tree in enumerate(mf.forest): tree_stats[i_t, -2:] = np.array([len(tree.leaf_nodes), len(tree.non_leaf_nodes)]) tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0] print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \ % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth)) print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \ % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth))
def __init__(self, img, region): image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) image = image.astype(int) / 4 h, s, v = cv2.split(image) s += 45 v += 109 image = h # cv2.merge((h, s, v)) self.window = max(region.width, region.height) * 2 left = int(max(region.x, 0)) top = int(max(region.y, 0)) right = int(min(region.x + region.width, image.shape[1] - 1)) bottom = int(min(region.y + region.height, image.shape[0] - 1)) if (right - left) % 2 != 0: right -= 1 if (bottom - top) % 2 != 0: bottom -= 1 self.template = image[int(top):int(bottom), int(left):int(right)] self.position = (region.x + region.width / 2, region.y + region.height / 2) self.size = (region.width, region.height) self.old_img = image self.pos = np.array([ image[int(top):int(bottom), int(left):int(right)].copy().tolist() ]) self.neg = np.array([]) self.st_neg = 0 vred = [(0, 0)] infloop = 0 while (1): l = random.randint(-int((right - left) * 0.03), int((right - left) * 0.03)) t = random.randint(-int((bottom - top) * 0.03), int((bottom - top) * 0.03)) if l + left >= 0 and l + right < image.shape[ 1] and t + top >= 0 and t + bottom < image.shape[0]: vred += [(l, t)] if len(vred) > 15 or infloop > 10000: break infloop += 1 self.pos = np.array([ np.array(self.old_img[top + int(t2):bottom + int(t2), left + int(l2):right + int(l2)].copy().tolist()) for (l2, t2) in vred ]) vred = [] infloop = 0 while (1): l = random.randint(int((right - left) / 2), int(image.shape[1] - (right - left) / 2 - 1)) t = random.randint(int((bottom - top) / 2), int(image.shape[0] - (bottom - top) / 2 - 1)) if abs(l - left - (right - left) / 2) > ( right - left) or abs(t - top - (bottom - top) / 2) > (bottom - top): vred += [(l, t)] if len(vred) > 45 or infloop > 10000: break infloop += 1 self.neg = np.array([ np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) + (bottom - top) / 2, int(l2) - (right - left) / 2:int(l2) + (right - left) / 2].copy().tolist()) for (l2, t2) in vred ]) print("pred update") print("neg" + str(len(self.neg))) print("pos" + str(len(self.pos))) set = { 'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'], 'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0, 'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2, 'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0, 'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0, 'n_mondrians': 10, 'debug': 0, 'n_minibatches': 1, 'name_metric': 'acc', 'budget_to_use': inf } self.settings = Map(set) reset_random_seed(self.settings) x_trainp = np.array([ np.bincount(x.flatten().astype(int), minlength=45) for x in self.pos ]) x_trainn = np.array([ np.bincount(x.flatten().astype(int), minlength=45) for x in self.neg ]) if len(self.neg) > 0: x_train = np.append(x_trainp, x_trainn, axis=0) self.st_neg = 1 else: x_train = x_trainp self.st_neg = 0 self.data = { 'n_dim': 1, 'x_test': array([x_train[5]]), 'x_train': array(x_train), 'y_train': array( np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2, 'y_test': array([]), 'n_test': 0 } self.param, self.cache = precompute_minimal(self.data, self.settings) self.mf = MondrianForest(self.settings, self.data) self.mf.fit(self.data, array(range(0, len(x_train))), self.settings, self.param, self.cache)