コード例 #1
0
    def registerDatasetTypes(datasetTypeName, dimensions, storageClass, registry):
        """Bulk register DatasetTypes
        """
        datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
        registry.registerDatasetType(datasetType)

        for compName, compStorageClass in storageClass.components.items():
            compType = DatasetType(datasetType.componentTypeName(compName), dimensions, compStorageClass)
            registry.registerDatasetType(compType)
コード例 #2
0
class ParquetFormatterTestCase(unittest.TestCase):
    """Tests for ParquetFormatter, using PosixDatastore.
    """
    def setUp(self):
        """Create a new butler root for each test."""
        self.root = tempfile.mkdtemp(dir=TESTDIR)
        Butler.makeRepo(self.root)
        self.butler = Butler(self.root, run="test_run")
        # No dimensions in dataset type so we don't have to worry about
        # inserting dimension data or defining data IDs.
        self.datasetType = DatasetType(
            "data",
            dimensions=(),
            storageClass="DataFrame",
            universe=self.butler.registry.dimensions)
        self.butler.registry.registerDatasetType(self.datasetType)

    def tearDown(self):
        if os.path.exists(self.root):
            shutil.rmtree(self.root, ignore_errors=True)

    def testSingleIndexDataFrame(self):
        columns1 = pd.Index(["a", "b", "c"])
        df1 = pd.DataFrame(np.random.randn(5, 3),
                           index=np.arange(5, dtype=int),
                           columns=columns1)
        self.butler.put(df1, self.datasetType, dataId={})
        # Read the whole DataFrame.
        df2 = self.butler.get(self.datasetType, dataId={})
        self.assertTrue(df1.equals(df2))
        # Read just the column descriptions.
        columns2 = self.butler.get(
            self.datasetType.componentTypeName("columns"), dataId={})
        self.assertTrue(df1.columns.equals(columns2))
        # Read just some columns a few different ways.
        df3 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": ["a", "c"]})
        self.assertTrue(df1.loc[:, ["a", "c"]].equals(df3))
        df4 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": "a"})
        self.assertTrue(df1.loc[:, ["a"]].equals(df4))
        # Passing an unrecognized column should be a ValueError.
        with self.assertRaises(ValueError):
            self.butler.get(self.datasetType,
                            dataId={},
                            parameters={"columns": ["d"]})

    def testMultiIndexDataFrame(self):
        columns1 = pd.MultiIndex.from_tuples(
            [
                ("g", "a"),
                ("g", "b"),
                ("g", "c"),
                ("r", "a"),
                ("r", "b"),
                ("r", "c"),
            ],
            names=["filter", "column"],
        )
        df1 = pd.DataFrame(np.random.randn(5, 6),
                           index=np.arange(5, dtype=int),
                           columns=columns1)
        self.butler.put(df1, self.datasetType, dataId={})
        # Read the whole DataFrame.
        df2 = self.butler.get(self.datasetType, dataId={})
        self.assertTrue(df1.equals(df2))
        # Read just the column descriptions.
        columns2 = self.butler.get(
            self.datasetType.componentTypeName("columns"), dataId={})
        self.assertTrue(df1.columns.equals(columns2))
        # Read just some columns a few different ways.
        df3 = self.butler.get(self.datasetType,
                              dataId={},
                              parameters={"columns": {
                                  "filter": "g"
                              }})
        self.assertTrue(df1.loc[:, ["g"]].equals(df3))
        df4 = self.butler.get(
            self.datasetType,
            dataId={},
            parameters={"columns": {
                "filter": ["r"],
                "column": "a"
            }})
        self.assertTrue(df1.loc[:, [("r", "a")]].equals(df4))
        # Passing an unrecognized column should be a ValueError.
        with self.assertRaises(ValueError):
            self.butler.get(self.datasetType,
                            dataId={},
                            parameters={"columns": ["d"]})