def test_027_from_parquet_largefile(self): """ test whether or not incremental_quantile_normalize works with a larger random file """ np.random.seed(42) df1 = pd.DataFrame( index=range(5000), columns=["sample" + str(col) for col in range(100)], ) df1[:] = np.random.randint(0, 100, size=df1.shape) df1 = df1.astype(float) df1.to_parquet("test_large.parquet") qnorm.incremental_quantile_normalize( "test_large.parquet", "test_large_out.parquet", rowchunksize=11, colchunksize=11, ) df2 = pd.read_parquet("test_large_out.parquet") np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=4)
def test_021_from_hdf_largefile(self): """ test whether or not incremental_quantile_normalize works with a larger random file """ np.random.seed(42) df1 = pd.DataFrame( index=range(5000), columns=["sample" + str(col) for col in range(100)], dtype=int, ) df1[:] = np.random.randint(0, 100, size=df1.shape) df1.to_hdf("test_large.hdf", key="qnorm", format="table", data_columns=True) qnorm.incremental_quantile_normalize( "test_large.hdf", "test_large_out.hdf", rowchunksize=11, colchunksize=11, ) df2 = pd.read_hdf("test_large_out.hdf", index_col=0, header=0) np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=4)
def test_017_from_hdf(self): """ test the basic incremental_quantile_normalize functionality """ qnorm.incremental_quantile_normalize("test.hdf", "test_out.hdf") df1 = pd.read_hdf("test.hdf", index_col=0, header=0) df2 = pd.read_hdf("test_out.hdf", index_col=0, header=0) np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=5)
def test_023_from_parquet(self): """ test the basic incremental_quantile_normalize functionality """ qnorm.incremental_quantile_normalize("test.parquet", "test_out.parquet") df1 = pd.read_parquet("test.parquet") df2 = pd.read_parquet("test_out.parquet") np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=5)
def test_025_from_parquet_colchunk(self): """ test the incremental_quantile_normalize with colchunks functionality """ df1 = pd.read_parquet("test.parquet") for colchunksize in range(1, 10): qnorm.incremental_quantile_normalize("test.parquet", "test_out.parquet", colchunksize=colchunksize) df2 = pd.read_parquet("test_out.parquet") np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=5)
def test_019_from_hdf_colchunk(self): """ test the incremental_quantile_normalize with colchunks functionality """ df1 = pd.read_hdf("test.hdf", index_col=0, header=0) for colchunksize in range(1, 10): qnorm.incremental_quantile_normalize("test.hdf", "test_out.hdf", colchunksize=colchunksize) df2 = pd.read_hdf("test_out.hdf", index_col=0, header=0) np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=5)
def test_013_from_csv_rowchunk(self): """ test the incremental_quantile_normalize with rowchunks functionality """ df1 = pd.read_csv("test.csv", index_col=0, header=0) for rowchunksize in range(1, 10): qnorm.incremental_quantile_normalize("test.csv", "test_out.csv", rowchunksize=rowchunksize) df2 = pd.read_csv("test_out.csv", index_col=0, header=0) np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=5)
def test_026_from_parquet_colrowchunk(self): """ test the incremental_quantile_normalize with both row and colchunks """ df1 = pd.read_parquet("test.parquet") for colchunksize in range(1, 10): for rowchunksize in range(1, 10): qnorm.incremental_quantile_normalize( "test.parquet", "test_out.parquet", rowchunksize=rowchunksize, colchunksize=colchunksize, ) df2 = pd.read_parquet("test_out.parquet") np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=5)
def test_020_from_hdf_colrowchunk(self): """ test the incremental_quantile_normalize with both row and colchunks """ df1 = pd.read_hdf("test.hdf", index_col=0, header=0) for colchunksize in range(1, 10): for rowchunksize in range(1, 10): qnorm.incremental_quantile_normalize( "test.hdf", "test_out.hdf", rowchunksize=rowchunksize, colchunksize=colchunksize, ) df2 = pd.read_hdf("test_out.hdf", index_col=0, header=0) np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=5)
def test_016_from_csv_largefile(self): """ test whether or not incremental_quantile_normalize works with a larger random file """ np.random.seed(42) df1 = pd.DataFrame(index=range(5000), columns=range(100)) df1[:] = np.random.randint(0, 100, size=df1.shape) df1.to_csv("test_large.csv") qnorm.incremental_quantile_normalize( "test_large.csv", "test_large_out.csv", rowchunksize=11, colchunksize=11, ) df2 = pd.read_csv("test_large_out.csv", index_col=0, header=0) np.testing.assert_almost_equal(qnorm.quantile_normalize(df1), df2.values, decimal=4)