Ejemplo n.º 1
0
    def testReuseOutputRoot(self):
        """Set up an output repositoriy and verify its parent relationship to
        the input repository.

        Then set up an output repository with the first output as an input,
        and verify the parent relationships.
        """
        testOutput = self.mkdtemp("testOutput")
        butler = dafPersist.Butler(inputs={
            'root': ROOT,
            'mapper': MinMapper1
        },
                                   outputs=testOutput)
        self.assertTrue(os.path.exists(testOutput))
        self.assertTrue(os.path.isdir(testOutput))
        cfg = dafPersist.Storage().getRepositoryCfg(testOutput)
        expectedCfg = dafPersist.RepositoryCfg(root=ROOT,
                                               mapper=MinMapper1,
                                               mapperArgs=None,
                                               parents=None,
                                               policy=None)
        self.assertEqual(cfg.parents, [expectedCfg])
        del butler

        testOutput2 = self.mkdtemp("testOutput2")
        butler = dafPersist.Butler(inputs={
            'root': testOutput,
            'mapper': MinMapper1
        },
                                   outputs=testOutput2)
        self.assertTrue(os.path.exists(testOutput2))
        self.assertTrue(os.path.isdir(testOutput2))
        cfg = dafPersist.Storage().getRepositoryCfg(testOutput2)
        self.assertEqual(cfg.parents, [testOutput])
        del butler
Ejemplo n.º 2
0
 def testStorageRepoCfgCache(self):
     """Tests that when a cfg is gotten from storage it is cached."""
     butler = dp.Butler(
         outputs=dp.RepositoryArgs(mode='w',
                                   mapper=dpTest.EmptyTestMapper,
                                   root=os.path.join(self.testDir, 'a')))
     del butler
     storage = dp.Storage()
     self.assertEqual(0, len(storage.repositoryCfgs))
     cfg = storage.getRepositoryCfg(os.path.join(self.testDir, 'a'))
     self.assertEqual(
         cfg, storage.repositoryCfgs[os.path.join(self.testDir, 'a')])
    def test(self):
        # create a repo where repo 'a' is a parent of repo 'b'
        butler = dp.Butler(outputs=dp.RepositoryArgs(
            root=os.path.join(self.testDir, 'a'), mapper=MapperForTestWriting))
        del butler
        butler = dp.Butler(inputs=os.path.join(self.testDir, 'a'),
                           outputs=os.path.join(self.testDir, 'b'))
        self.assertEqual(len(butler._repos.inputs()), 1)
        self.assertEqual(butler._repos.inputs()[0].cfg.root,
                         os.path.join(self.testDir, 'a'))
        self.assertEqual(len(butler._repos.outputs()), 1)
        self.assertEqual(butler._repos.outputs()[0].cfg.root,
                         os.path.join(self.testDir, 'b'))
        del butler

        # load that repo a few times, include 'a' as an input.
        for i in range(4):
            butler = dp.Butler(inputs=os.path.join(self.testDir, 'a'),
                               outputs=dp.RepositoryArgs(root=os.path.join(
                                   self.testDir, 'b'),
                                                         mode='rw'))
            self.assertEqual(len(butler._repos.inputs()), 2)
            self.assertEqual(butler._repos.inputs()[0].cfg.root,
                             os.path.join(self.testDir, 'b'))
            self.assertEqual(butler._repos.inputs()[1].cfg.root,
                             os.path.join(self.testDir, 'a'))
            self.assertEqual(len(butler._repos.outputs()), 1)
            self.assertEqual(butler._repos.outputs()[0].cfg.root,
                             os.path.join(self.testDir, 'b'))
            cfg = dp.Storage().getRepositoryCfg(os.path.join(
                self.testDir, 'b'))
            self.assertEqual(
                cfg,
                dp.RepositoryCfg(root=os.path.join(self.testDir, 'b'),
                                 mapper=MapperForTestWriting,
                                 mapperArgs=None,
                                 parents=[os.path.join(self.testDir, 'a')],
                                 policy=None))

        # load the repo a few times and don't explicitly list 'a' as an input
        for i in range(4):
            butler = dp.Butler(outputs=dp.RepositoryArgs(
                root=os.path.join(self.testDir, 'b'), mode='rw'))
            self.assertEqual(len(butler._repos.inputs()), 2)
            self.assertEqual(butler._repos.inputs()[0].cfg.root,
                             os.path.join(self.testDir, 'b'))
            self.assertEqual(butler._repos.inputs()[1].cfg.root,
                             os.path.join(self.testDir, 'a'))
            self.assertEqual(len(butler._repos.outputs()), 1)
            self.assertEqual(butler._repos.outputs()[0].cfg.root,
                             os.path.join(self.testDir, 'b'))
            cfg = dp.Storage().getRepositoryCfg(os.path.join(
                self.testDir, 'b'))
            self.assertEqual(
                cfg,
                dp.RepositoryCfg(root=os.path.join(self.testDir, 'b'),
                                 mapper=MapperForTestWriting,
                                 mapperArgs=None,
                                 parents=[os.path.join(self.testDir, 'a')],
                                 policy=None))

        # load 'b' as 'write only' and don't list 'a' as an input. This should raise, because inputs must
        # match readable outputs parents.
        with self.assertRaises(RuntimeError):
            butler = dp.Butler(outputs=os.path.join(self.testDir, 'b'))

        # load 'b' as 'write only' and explicitly list 'a' as an input.
        butler = dp.Butler(inputs=os.path.join(self.testDir, 'a'),
                           outputs=os.path.join(self.testDir, 'b'))
        self.assertEqual(len(butler._repos.inputs()), 1)
        self.assertEqual(len(butler._repos.outputs()), 1)
        self.assertEqual(butler._repos.inputs()[0].cfg.root,
                         os.path.join(self.testDir, 'a'))
        self.assertEqual(butler._repos.outputs()[0].cfg.root,
                         os.path.join(self.testDir, 'b'))
        cfg = dp.Storage().getRepositoryCfg(os.path.join(self.testDir, 'b'))
    def testOneLevelInputs(self):
        """
        1. put an object with the same ID but slightly different value into 2 repositories.
        2. use those repositories as inputs to a butler, and tag them
        3. make sure that the correct object is gotten for each of
            a. one tag
            b. the other tag
            c. no tag
        4. repeat step 3 but reverse the order of input cfgs to a new butler.
        5. use the butler from step 4 and write an output. The inputs will get recorded as parents of the
           output repo.
        6. create a new butler with a new overlapping repo, and verify that objects can be gotten from the
           other's parent repos via tagging.
        """
        objA = tstObj('a')
        objB = tstObj('b')

        # put objA in repo1:
        repo1Args = dp.RepositoryArgs(mode='rw',
                                      root=os.path.join(self.testDir, 'repo1'),
                                      mapper=MapperForTestWriting)
        butler = dp.Butler(outputs=repo1Args)
        butler.put(objA, 'foo', {'bar': 1})
        del butler

        # put objB in repo2:
        repo2Args = dp.RepositoryArgs(mode='rw',
                                      root=os.path.join(self.testDir, 'repo2'),
                                      mapper=MapperForTestWriting)
        butler = dp.Butler(outputs=repo2Args)
        butler.put(objB, 'foo', {'bar': 1})
        del butler
        del repo1Args
        del repo2Args

        # make the objects inputs of repos
        # and verify the correct object can ge fetched using the tag and not using the tag

        repo1Args = dp.RepositoryArgs(root=os.path.join(self.testDir, 'repo1'),
                                      tags='one')
        repo2Args = dp.RepositoryArgs(root=os.path.join(self.testDir, 'repo2'),
                                      tags='two')

        butler = dp.Butler(inputs=(repo1Args, repo2Args))
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='one')),
                         objA)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='two')),
                         objB)
        self.assertEqual(butler.get('foo', {'bar': 1}), objA)

        butler = dp.Butler(inputs=(repo2Args, repo1Args))
        self.assertEqual(butler.get('foo', dp.DataId(bar=1, tag='one')), objA)
        self.assertEqual(butler.get('foo', dp.DataId(bar=1, tag='two')), objB)
        self.assertEqual(butler.get('foo', dp.DataId(bar=1)), objB)

        # create butler with repo1 and repo2 as parents, and an output repo3.
        repo3Args = dp.RepositoryArgs(mode='rw',
                                      root=os.path.join(self.testDir, 'repo3'),
                                      mapper=MapperForTestWriting)
        butler = dp.Butler(inputs=(repo1Args, repo2Args), outputs=repo3Args)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='one')),
                         objA)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='two')),
                         objB)
        self.assertEqual(butler.get('foo', {'bar': 1}), objA)
        # add an object to the output repo. note since the output repo mode is 'rw' that object is gettable
        # and it has first priority in search order. Other repos should be searchable by tagging.
        objC = tstObj('c')
        butler.put(objC, 'foo', {'bar': 1})
        self.assertEqual(butler.get('foo', {'bar': 1}), objC)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='one')),
                         objA)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='two')),
                         objB)
        del butler

        repo3Cfg = dp.Storage().getRepositoryCfg(
            os.path.join(self.testDir, 'repo3'))
        self.assertEqual(repo3Cfg.parents, [
            os.path.join(self.testDir, 'repo1'),
            os.path.join(self.testDir, 'repo2')
        ])

        # expand the structure to look like this:
        # ┌────────────────────────┐ ┌────────────────────────┐
        # │repo1                   │ │repo2                   │
        # │ tag:"one"              │ │ tag:"two"              │
        # │ tstObj('a')            │ │ tstObj('b')            │
        # │   at ('foo', {'bar:1'})│ │   at ('foo', {'bar:1'})│
        # └───────────┬────────────┘ └───────────┬────────────┘
        #             └─────────────┬────────────┘
        #              ┌────────────┴───────────┐ ┌────────────────────────┐
        #              │repo4                   │ │repo5                   │
        #              │ tag:"four"             │ │ tag:"five"             │
        #              │ tstObj('d')            │ │ tstObj('e')            │
        #              │   at ('foo', {'bar:2'})│ │   at ('foo', {'bar:1'})│
        #              └───────────┬────────────┘ └───────────┬────────────┘
        #                          └─────────────┬────────────┘
        #                                     ┌──┴───┐
        #                                     │butler│
        #                                     └──────┘

        repo4Args = dp.RepositoryArgs(mode='rw',
                                      root=os.path.join(self.testDir, 'repo4'),
                                      mapper=MapperForTestWriting)
        butler = dp.Butler(inputs=(os.path.join(self.testDir, 'repo1'),
                                   os.path.join(self.testDir, 'repo2')),
                           outputs=repo4Args)
        objD = tstObj('d')
        butler.put(objD, 'foo', {'bar': 2})
        del butler

        repo5Cfg = dp.RepositoryArgs(mode='rw',
                                     root=os.path.join(self.testDir, 'repo5'),
                                     mapper=MapperForTestWriting)
        butler = dp.Butler(outputs=repo5Cfg)
        objE = tstObj('e')
        butler.put(objE, 'foo', {'bar': 1})
        del butler

        repo4Args = dp.RepositoryArgs(cfgRoot=os.path.join(
            self.testDir, 'repo4'),
                                      tags='four')
        repo5Args = dp.RepositoryArgs(cfgRoot=os.path.join(
            self.testDir, 'repo5'),
                                      tags='five')
        butler = dp.Butler(inputs=(repo4Args, repo5Args))
        self.assertEqual(butler.get('foo', {'bar': 1}), objA)
        self.assertEqual(butler.get('foo', {'bar': 2}), objD)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='four')),
                         objA)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='five')),
                         objE)
        del butler

        butler = dp.Butler(inputs=(repo5Args, repo4Args))
        self.assertEqual(butler.get('foo', {'bar': 1}), objE)
        self.assertEqual(butler.get('foo', {'bar': 2}), objD)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='four')),
                         objA)
        self.assertEqual(butler.get('foo', dp.DataId({'bar': 1}, tag='five')),
                         objE)
        del butler