def _testMatrixCreation(self, in_x, in_y, **kwargs): if "sharding" not in kwargs: kwargs["sharding"] = RayShardingMode.BATCH mat = RayDMatrix(in_x, in_y, **kwargs) def _load_data(params): x = params["data"] y = params["label"] if isinstance(x, list): x = concat_dataframes(x) if isinstance(y, list): y = concat_dataframes(y) return x, y params = mat.get_data(rank=0, num_actors=1) x, y = _load_data(params) self.assertTrue(np.allclose(self.x, x)) self.assertTrue(np.allclose(self.y, y)) # Multi actor check mat = RayDMatrix(in_x, in_y, **kwargs) params = mat.get_data(rank=0, num_actors=2) x1, y1 = _load_data(params) mat.unload_data() params = mat.get_data(rank=1, num_actors=2) x2, y2 = _load_data(params) self.assertTrue(np.allclose(self.x, concat_dataframes([x1, x2]))) self.assertTrue(np.allclose(self.y, concat_dataframes([y1, y2])))
def testColumnOrdering(self): """When excluding cols, the remaining col order should be preserved.""" cols = [str(i) for i in range(50)] df = pd.DataFrame(np.random.randn(1, len(cols)), columns=cols) matrix = RayDMatrix(df, label=cols[-1], num_actors=1) data = matrix.get_data(0)["data"] assert data.columns.tolist() == cols[:-1]
def _testMatrixCreation(self, in_x, in_y, **kwargs): mat = RayDMatrix(in_x, in_y, **kwargs) params = mat.get_data(rank=0, num_actors=1) x = params["data"] y = params["label"] if isinstance(x, list): x = concat_dataframes(x) if isinstance(y, list): y = concat_dataframes(y) self.assertTrue(np.allclose(self.x, x)) self.assertTrue(np.allclose(self.y, y))