def compute_n_crystal_found(filename, test_range=range(0, 101, 10)):

    X, y = read_data(filename)

    scores = []
    for t in test_range:
        ind = N_INIT_POINTS - 1 + t
        scores.append(np.sum(y[N_INIT_POINTS:ind] == CRYSTAL_CLASS))

    return test_range, scores
Exemple #2
0
def compute_volume_convex_hull(filename, test_range=range(0, 101, 10)):

    X, y = read_data(filename)

    scores = []
    for t in test_range:
        ind = N_INIT_POINTS - 1 + t

        X_train = X[0:ind]
        y_train = y[0:ind]

        points = X_train[y_train == CRYSTAL_CLASS, :]
        hull = ConvexHull(points)
        scores.append(hull.volume)

    return test_range, scores
Exemple #3
0
def compute_volume_neighbors(filename, radius, test_range=range(0, 101, 10)):

    X, y = read_data(filename)

    scores = []
    for t in test_range:
        ind = N_INIT_POINTS - 1 + t

        X_train = X[0:ind]
        y_train = y[0:ind]

        points = X_train[y_train == CRYSTAL_CLASS, :]
        tree = BallTree(points)
        score, _ = average_n_neighbors(tree, points, radius)
        scores.append(score)

    return test_range, scores
Exemple #4
0
    if len(sys.argv) != 2:
        print 'Please specify a root folder as argument'

    # seed
    seed = int(sys.argv[1])
    random.seed(seed)
    np.random.seed(seed)

    #
    root_folder = os.path.join(HERE_PATH, sys.argv[1])
    filetools.ensure_dir(root_folder)

    # load data
    current_datafile = os.path.join(HERE_PATH, 'init_data.csv')
    X, y = read_data(current_datafile)
    # check everything is fine
    np.testing.assert_array_almost_equal(np.sum(X, axis=1),
                                         TOTAL_VOLUME_IN_ML,
                                         decimal=N_DECIMAL_EQUAL)

    #
    X_selected = np.random.rand(N_GENERATED, X.shape[1])
    X_selected = proba_normalize_row(X_selected)
    X_selected = TOTAL_VOLUME_IN_ML * X_selected

    # save new csv

    # xout
    X_out = np.vstack((X, X_selected))
Exemple #5
0
def get_data_xp(xp_key):
    return read_data(FILENAMES[xp_key])
Exemple #6
0
def get_new_data(filename):
    X, y = read_data(filename)
    return X[N_INIT_POINTS:, :], y[N_INIT_POINTS:]
Exemple #7
0
def get_init_data(filename):
    X, y = read_data(filename)
    return X[:N_INIT_POINTS, :], y[:N_INIT_POINTS]

def get_min_sample_per_class(y):
    min_sample_per_class = np.inf
    for class_number in np.unique(y):
        n_sample = np.sum(y == class_number)
        if n_sample < min_sample_per_class:
            min_sample_per_class = n_sample
    return min_sample_per_class


def compute_learning_curve(filename, (X_test, y_test),
                           blank_clf,
                           test_range=range(0, 101, 10)):

    X, y = read_data(filename)

    scores = []
    confusions = []
    for t in test_range:
        ind = N_INIT_POINTS - 1 + t

        X_train = X[0:ind]
        y_train = y[0:ind]

        clf = clone(blank_clf)
        clf.fit(X_train, y_train)

        prediction_accuracy = clf.score(X_test, y_test)
        scores.append(prediction_accuracy)