Ejemplo n.º 1
0
def main():
    x_ij, y_ij = ds.load_svmlight_file(
        "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train",
        block_size=(5000, 22), n_features=22, store_sparse=True)

    csvm = CascadeSVM(c=10000, gamma=0.01)

    performance.measure("CSVM", "ijcnn1", csvm.fit, x_ij, y_ij)
Ejemplo n.º 2
0
def main():
    x_kdd = ds.load_txt_file(
        "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.csv",
        block_size=(11482, 122))

    x_kdd = x_kdd[:, :121]
    pca = PCA(arity=48)

    performance.measure("PCA", "KDD99", pca.fit, x_kdd)
Ejemplo n.º 3
0
def main():
    x_kdd = ds.load_txt_file(
        "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.csv",
        block_size=(11482, 122))

    y_kdd = x_kdd[:, 121:122]
    x_kdd = x_kdd[:, :121]

    rf = RandomForestClassifier(n_estimators=100, distr_depth=2)
    performance.measure("RF", "KDD99", rf.fit, x_kdd, y_kdd)
Ejemplo n.º 4
0
def main():
    file = "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/data_scaled.csv"
    data = ds.load_txt_file(file, block_size=(10000, 5))

    dbscan = DBSCAN(eps=0.19,
                    min_samples=5,
                    max_samples=5000,
                    n_regions=17,
                    dimensions=[0, 1])
    performance.measure("DBSCAN", "gaia", dbscan.fit, data)
Ejemplo n.º 5
0
def main():

    x_mn, y_mn = ds.load_svmlight_file(
        "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.scaled",
        block_size=(5000, 780),
        n_features=780,
        store_sparse=False)

    rf = RandomForestClassifier(n_estimators=100, distr_depth=2)
    performance.measure("RF", "mnist", rf.fit, x_mn, y_mn)
Ejemplo n.º 6
0
def main():
    n_samples = 300000000
    n_chunks = 1536
    chunk_size = int(np.ceil(n_samples / n_chunks))
    n_features = 100
    n_clusters = 500

    x = ds.random_array((n_samples, n_features), (chunk_size, n_features))

    km = KMeans(n_clusters=n_clusters, max_iter=5, tol=0, arity=48)
    performance.measure("KMeans", "300M", km.fit, x)
Ejemplo n.º 7
0
def main():
    n_blocks = 384
    data = "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/recommendation" \
           "/netflix/netflix_data_libsvm.txt"
    n_factors = 100
    n_features = 480189

    block_size = (int(ceil(17770 / n_blocks)),
                  int(ceil(n_features / n_blocks)))

    x, y = ds.load_svmlight_file(data, block_size=block_size,
                                 n_features=n_features, store_sparse=True)

    als = ALS(tol=0.0001, random_state=676, n_f=n_factors, max_iter=10,
              verbose=False)

    performance.measure("ALS", "Netflix", als, x)
Ejemplo n.º 8
0
def main():
    x_kdd = ds.load_txt_file(
        "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/kdd99/train.csv",
        block_size=(11482, 122))

    x_kdd = shuffle(x_kdd)
    y_kdd = x_kdd[:, 121:122]
    x_kdd = x_kdd[:, :121]

    x_ij, y_ij = ds.load_svmlight_file(
        "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/ijcnn1/train",
        block_size=(5000, 22), n_features=22, store_sparse=True)

    csvm = CascadeSVM(c=10000, gamma=0.01)

    performance.measure("CSVM", "KDD99", csvm.fit, x_kdd, y_kdd)
    performance.measure("CSVM", "ijcnn1", csvm.fit, x_ij, y_ij)
Ejemplo n.º 9
0
def main():
    n_blocks = 384
    data = "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/" \
           "netflix_data_libsvm.txt"
    n_factors = 100
    n_features = 480189

    block_size = (int(ceil(17770 / n_blocks)),
                  int(ceil(n_features / n_blocks)))

    x, y = ds.load_svmlight_file(data,
                                 block_size=block_size,
                                 n_features=n_features,
                                 store_sparse=True)

    als = ALS(tol=0.0001,
              random_state=676,
              n_f=n_factors,
              max_iter=10,
              verbose=False)

    performance.measure("ALS", "Netflix", als.fit, x)
Ejemplo n.º 10
0
def main():
    x = ds.random_array((20000, 20000), (100, 100))
    performance.measure("TR", "20K", x.transpose)