def main(): x_ij, y_ij = ds.load_svmlight_file( "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train", block_size=(5000, 22), n_features=22, store_sparse=True) csvm = CascadeSVM(c=10000, gamma=0.01) performance.measure("CSVM", "ijcnn1", csvm.fit, x_ij, y_ij)
def main(): x_kdd = ds.load_txt_file( "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.csv", block_size=(11482, 122)) x_kdd = x_kdd[:, :121] pca = PCA(arity=48) performance.measure("PCA", "KDD99", pca.fit, x_kdd)
def main(): x_kdd = ds.load_txt_file( "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.csv", block_size=(11482, 122)) y_kdd = x_kdd[:, 121:122] x_kdd = x_kdd[:, :121] rf = RandomForestClassifier(n_estimators=100, distr_depth=2) performance.measure("RF", "KDD99", rf.fit, x_kdd, y_kdd)
def main(): file = "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/data_scaled.csv" data = ds.load_txt_file(file, block_size=(10000, 5)) dbscan = DBSCAN(eps=0.19, min_samples=5, max_samples=5000, n_regions=17, dimensions=[0, 1]) performance.measure("DBSCAN", "gaia", dbscan.fit, data)
def main(): x_mn, y_mn = ds.load_svmlight_file( "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/train.scaled", block_size=(5000, 780), n_features=780, store_sparse=False) rf = RandomForestClassifier(n_estimators=100, distr_depth=2) performance.measure("RF", "mnist", rf.fit, x_mn, y_mn)
def main(): n_samples = 300000000 n_chunks = 1536 chunk_size = int(np.ceil(n_samples / n_chunks)) n_features = 100 n_clusters = 500 x = ds.random_array((n_samples, n_features), (chunk_size, n_features)) km = KMeans(n_clusters=n_clusters, max_iter=5, tol=0, arity=48) performance.measure("KMeans", "300M", km.fit, x)
def main(): n_blocks = 384 data = "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/recommendation" \ "/netflix/netflix_data_libsvm.txt" n_factors = 100 n_features = 480189 block_size = (int(ceil(17770 / n_blocks)), int(ceil(n_features / n_blocks))) x, y = ds.load_svmlight_file(data, block_size=block_size, n_features=n_features, store_sparse=True) als = ALS(tol=0.0001, random_state=676, n_f=n_factors, max_iter=10, verbose=False) performance.measure("ALS", "Netflix", als, x)
def main(): x_kdd = ds.load_txt_file( "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/kdd99/train.csv", block_size=(11482, 122)) x_kdd = shuffle(x_kdd) y_kdd = x_kdd[:, 121:122] x_kdd = x_kdd[:, :121] x_ij, y_ij = ds.load_svmlight_file( "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/ijcnn1/train", block_size=(5000, 22), n_features=22, store_sparse=True) csvm = CascadeSVM(c=10000, gamma=0.01) performance.measure("CSVM", "KDD99", csvm.fit, x_kdd, y_kdd) performance.measure("CSVM", "ijcnn1", csvm.fit, x_ij, y_ij)
def main(): n_blocks = 384 data = "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/" \ "netflix_data_libsvm.txt" n_factors = 100 n_features = 480189 block_size = (int(ceil(17770 / n_blocks)), int(ceil(n_features / n_blocks))) x, y = ds.load_svmlight_file(data, block_size=block_size, n_features=n_features, store_sparse=True) als = ALS(tol=0.0001, random_state=676, n_f=n_factors, max_iter=10, verbose=False) performance.measure("ALS", "Netflix", als.fit, x)
def main(): x = ds.random_array((20000, 20000), (100, 100)) performance.measure("TR", "20K", x.transpose)