def test_pca_mortgage(nrows=1000,ncols=100,n_components=10, svd_solver='full',whiten=False,random_state=42, threshold=1e-3,data_source = 'random',use_assert=True, quarters=8,rows_per_quarter=100000,test_model='cuml'): print() if data_source=='random': X = np.random.rand(nrows,ncols) elif data_source=='mortgage': X = load_mortgage(quarters=quarters,rows_per_quarter=rows_per_quarter) X = X[np.random.randint(0,X.shape[0]-1,nrows),:ncols] else: raise NotImplementedError X = pd.DataFrame({'fea%d'%i:X[:,i] for i in range(X.shape[1])}) print('%s data'%data_source,X.shape) test_pca_helper(X,n_components,svd_solver,whiten,random_state,threshold,use_assert,test_model)
def test_dbscan(nrows=1000, ncols=100, eps=3, min_samples=2, threshold=1e-3, data_source='random', use_assert=True, quarters=8, rows_per_quarter=100000, test_model='cuml'): print() #X = np.random.rand(nrows,ncols) #X = np.array([[1, 2], [2, 2], [2, 3],[8, 7], [8, 8], [25, 80]],dtype='float64') if data_source == 'random': X = np.random.rand(nrows, ncols) elif data_source == 'mortgage': X = load_mortgage(quarters=quarters, rows_per_quarter=rows_per_quarter) X = X[np.random.randint(0, X.shape[0] - 1, nrows), :ncols] else: raise NotImplementedError X = pd.DataFrame({'fea%d' % i: X[:, i] for i in range(X.shape[1])}) print('%s data' % data_source, X.shape) test_dbscan_helper(X, eps, min_samples, threshold, use_assert, test_model)