Esempio n. 1
0
def experiment6():
    k = 1
    n_features_list = np.arange(0, 30, 2) + 2
    n_data = 10**4
    n_targets = 1000
    results = []
    for n_features in n_features_list:
        data = np.random.uniform(0, 1, n_data * n_features).reshape(
            (n_data, n_features))
        targets = np.random.uniform(0, 1, n_targets * n_features).reshape(
            (n_targets, n_features))
        model = Trees.VPTree(data, 2, similarity_metric='euclidean')
        number_of_comparisons = experiment_number_of_comparisons_model(
            targets, data, model, k, 6, 'vp_tree_2')
        results.append(number_of_comparisons)
    plt.clf()
    plt.boxplot(results)
    plt.xticks(np.arange(len(n_features_list)) + 1, n_features_list)
    plt.title(
        'Number of comparisions in vp tree, for different dimensionalities\nNumber of data points: {0}\nNumber of trials: {1}'
        .format(n_data, n_targets))
    plt.ylabel('number of comparisons')
    plt.xlabel('number of dimensions')
    plt.margins(0.2, 0.2)
    plt.savefig('simulation6_number_of_comparisons.jpg', bbox_inches='tight')
Esempio n. 2
0
def experiment1():
    print('Running simulation 1')
    n_data = 10**4
    n_features = 30
    k = 1
    data = np.random.uniform(0, 1, n_data * n_features).reshape(
        (n_data, n_features))
    n_targets = 1000

    targets = np.random.uniform(0, 1, n_targets * n_features).reshape(
        (n_targets, n_features))
    model_names = [
        'kd-tree', 'vp-tree-2', 'vp-tree-10', 'vp-tree-100', 'pp-100-100',
        'pp-1000-1000'
    ]
    results = []
    for i in range(2):
        if i == 0:
            model = Trees.KDTree(data, similarity_metric='euclidean')
        elif i == 1:
            model = Trees.VPTree(data, 2, similarity_metric='euclidean')
        elif i == 2:
            model = Trees.VPTree(data, 10, similarity_metric='euclidean')
        elif i == 3:
            model = Trees.VPTree(data, 100, similarity_metric='euclidean')
        elif i == 4:
            model = NewDataStructures.PavlosIndexingSystem(
                data, 100, 100, similarity_metric='euclidean')
        else:
            model = NewDataStructures.PavlosIndexingSystem(
                data, 1000, 1000, similarity_metric='euclidean')
        number_of_comparisons = experiment1_model(targets, data, model, k,
                                                  model_names[i])
        results.append(number_of_comparisons)
    plt.clf()
    plt.boxplot(results)
    plt.margins(0.2, 0.2)
    plt.xticks(np.arange(len(model_names)) + 1,
               model_names,
               rotation='vertical')
    plt.title(
        'Number of comparisions per model\nNumber of data points: {0}\nDimensionality: {1}\nNumber of trials: {2}\nk={3}'
        .format(n_data, n_features, n_targets, k))
    plt.ylabel('number of comparisons')
    plt.savefig('simulation1_number_of_comparisons.jpg', bbox_inches='tight')