def experiment6(): k = 1 n_features_list = np.arange(0, 30, 2) + 2 n_data = 10**4 n_targets = 1000 results = [] for n_features in n_features_list: data = np.random.uniform(0, 1, n_data * n_features).reshape( (n_data, n_features)) targets = np.random.uniform(0, 1, n_targets * n_features).reshape( (n_targets, n_features)) model = Trees.VPTree(data, 2, similarity_metric='euclidean') number_of_comparisons = experiment_number_of_comparisons_model( targets, data, model, k, 6, 'vp_tree_2') results.append(number_of_comparisons) plt.clf() plt.boxplot(results) plt.xticks(np.arange(len(n_features_list)) + 1, n_features_list) plt.title( 'Number of comparisions in vp tree, for different dimensionalities\nNumber of data points: {0}\nNumber of trials: {1}' .format(n_data, n_targets)) plt.ylabel('number of comparisons') plt.xlabel('number of dimensions') plt.margins(0.2, 0.2) plt.savefig('simulation6_number_of_comparisons.jpg', bbox_inches='tight')
def experiment1(): print('Running simulation 1') n_data = 10**4 n_features = 30 k = 1 data = np.random.uniform(0, 1, n_data * n_features).reshape( (n_data, n_features)) n_targets = 1000 targets = np.random.uniform(0, 1, n_targets * n_features).reshape( (n_targets, n_features)) model_names = [ 'kd-tree', 'vp-tree-2', 'vp-tree-10', 'vp-tree-100', 'pp-100-100', 'pp-1000-1000' ] results = [] for i in range(2): if i == 0: model = Trees.KDTree(data, similarity_metric='euclidean') elif i == 1: model = Trees.VPTree(data, 2, similarity_metric='euclidean') elif i == 2: model = Trees.VPTree(data, 10, similarity_metric='euclidean') elif i == 3: model = Trees.VPTree(data, 100, similarity_metric='euclidean') elif i == 4: model = NewDataStructures.PavlosIndexingSystem( data, 100, 100, similarity_metric='euclidean') else: model = NewDataStructures.PavlosIndexingSystem( data, 1000, 1000, similarity_metric='euclidean') number_of_comparisons = experiment1_model(targets, data, model, k, model_names[i]) results.append(number_of_comparisons) plt.clf() plt.boxplot(results) plt.margins(0.2, 0.2) plt.xticks(np.arange(len(model_names)) + 1, model_names, rotation='vertical') plt.title( 'Number of comparisions per model\nNumber of data points: {0}\nDimensionality: {1}\nNumber of trials: {2}\nk={3}' .format(n_data, n_features, n_targets, k)) plt.ylabel('number of comparisons') plt.savefig('simulation1_number_of_comparisons.jpg', bbox_inches='tight')