Beispiel #1
0
    def test_1(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')

        assert len(dt.pub_trees) == 2
        assert set([x.drs.realm for x in dt.pub_trees.values()]) == set(['atmos', 'ocean'])
Beispiel #2
0
class TestListing(TestEg):

    # Set the following in subclasses
    #   listing_file 

    def setUp(self):
        super(TestListing, self).setUp()

        listing_path = os.path.join(test_dir, self.listing_file)
        gen_drs.write_listing(self.tmpdir, listing_path)

        self._init_drs_fs()
        self.dt = DRSTree(self.drs_fs)

    def _init_drs_fs(self):
        self.drs_fs = CMIP5FileSystem(self.tmpdir)

    def _discover(self, institute, model):
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', 
                         institute=institute, 
                         model=model)

    def _do_version(self, pt):
        assert pt.state == pt.STATE_INITIAL
        pt.do_version()
        assert pt.state == pt.STATE_VERSIONED
        assert pt.versions.keys() == [self.today]
Beispiel #3
0
class TestRepair2(TestRepair):

    genfuncs = (gen_drs.write_eg3_1, gen_drs.write_eg3_2)

    def setUp(self):
        TestEg.setUp(self)

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert self.pt.state == self.pt.STATE_VERSIONED

        self.breakme()

    def _cmor1(self):
        genfunc = self.genfuncs[0]
        genfunc(self.tmpdir)

        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        genfunc = self.genfuncs[1]
        genfunc(self.tmpdir)

        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')
Beispiel #4
0
class TestEg4(TestEg3):
    __test__ = True

    def _cmor1(self):
        gen_drs.write_eg4_1(self.tmpdir)
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        gen_drs.write_eg4_2(self.tmpdir)
        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')

    def test_1(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert self._exists('files')
        assert self._exists('files/tas_20100102')
        assert self._exists('v20100102/tas')


    def test_2(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert len(self._listdir('files/tas_20100101')) == 3
        assert len(self._listdir('files/tas_20100102')) == 2
        assert len(self._listdir('v20100101/tas')) == 3
        assert len(self._listdir('v20100102/tas')) == 5

    # Do test_3 from superclass
        
    # Do test_4 from superclass


    def test_6(self):
        # Test differencing 2 versions

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()

        v1 = []
        todo = []
        for state, path1, path2 in self.pt.diff_version(20100101):
            if state == self.pt.DIFF_V1_ONLY:
                v1.append(path1)
            elif state == self.pt.DIFF_V2_ONLY:
                todo.append(path2)

        assert len(v1) == 3
        assert len(todo) == 2
Beispiel #5
0
    def setUp(self):
        super(TestRepair, self).setUp()

        gen_drs.write_listing(self.tmpdir, op.join(test_dir, self.listing))

        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, **self.drs_components)
        self.pt = dt.pub_trees.values()[0]
Beispiel #6
0
    def test_2(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')

        assert len(dt.pub_trees) == 3
        pt = dt.pub_trees.values()[0]
        assert pt.drs.realm == 'atmos'
Beispiel #7
0
    def test_1(self):
        drs_fs = CordexFileSystem(self.tmpdir)
        drs_tree = DRSTree(drs_fs)
        json_obj = json.load(open(op.join(test_dir, 'cordex_1.json')))

        drs_tree.discover_incoming_fromjson(json_obj, activity='cordex')

        assert len(drs_tree.pub_trees) == 3
Beispiel #8
0
class TestEg5(TestEg4):
    __test__ = False

    def _cmor1(self):
        gen_drs.write_eg5_1(self.tmpdir)
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        gen_drs.write_eg5_2(self.tmpdir)
        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')

    # Do test1 from superclass

    def test_2(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert len(self._listdir('files/tas_20100101')) == 5
        assert len(self._listdir('files/tas_20100102')) == 2
        assert len(self._listdir('v20100101/tas')) == 5
        assert len(self._listdir('v20100102/tas')) == 5

    # Do test_3 from superclass
        
    # Do test_4 from superclass

    def test_6(self):
        # Test differencing 2 versions

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()

        v1 = []
        todo = []
        diff = []
        same = []
        for state, path1, path2 in self.pt.diff_version(20100101):
            if state == self.pt.DIFF_V1_ONLY:
                v1.append(path1)
            elif state == self.pt.DIFF_V2_ONLY:
                todo.append(path2)
            elif state == self.pt.DIFF_SIZE:
                diff.append(path1)
            elif state == self.pt.DIFF_NONE:
                same.append(path1)

        #!TODO: not same?  This test needs reviewing.
        assert len(v1) == 3
        assert len(same) == 2
Beispiel #9
0
class TestEg5_1(TestEg5):
    __test__ = False


    def _cmor2(self):
        gen_drs.write_eg5_2(self.tmpdir)
        self.dt2 = DRSTree(self.tmpdir)
        self.dt2.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')
        (self.pt, ) = self.dt2.pub_trees.values()
Beispiel #10
0
    def setUp(self):
        super(TestMapfile, self).setUp()

        gen_drs.write_eg1(self.tmpdir)

        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')
        self.pt = dt.pub_trees.values()[0]
        self.pt.do_version()
        assert self.pt.state == self.pt.STATE_VERSIONED
class TestThreeway(TestEg):
    __test__ = True

    listing_files = ['threeway_1.ls', 'threeway_2.ls', 'threeway_3.ls']

    def setUp(self):
        super(TestThreeway, self).setUp()

        self.drs_fs = CMIP5FileSystem(self.tmpdir)
        self.dt = DRSTree(self.drs_fs)
        self.listing_iter = self._iterSetUpListings()

    def _iterSetUpListings(self):
        for listing_file in self.listing_files:
            listing_path = os.path.join(test_dir, listing_file)
            gen_drs.write_listing(self.tmpdir, listing_path)

            yield listing_path

    def _discover(self):
        self.dt.discover_incoming(self.incoming, activity='cmip5',
                         product='output1',
                         institute='MOHC',
                         model='HadGEM2-ES')

    def _do_version(self, pt, next_version):
        assert next_version not in pt.versions.keys()
        pt.do_version(next_version)
        assert next_version in pt.versions.keys()

    def _check_version(self, pt, version):
        for path, drs in pt.versions[version]:
            assert os.path.islink(path)
            # link is relative
            real_path = os.path.realpath(os.path.join(os.path.dirname(path),
                                                      os.readlink(path)))
            assert os.path.isfile(real_path)

            # Check variables match
            mo = re.search(r'/files/(.*?)_\d+/(.*?)_', real_path)
            assert mo.group(1) == mo.group(2)

    def test1(self):
        v = 1
        for listing_path in self.listing_iter:
            print 'Doing version %d' % v
            self._discover()
            assert len(self.dt.pub_trees) == 1
            pt = self.dt.pub_trees.values()[0]

            self._do_version(pt, v)
            self._check_version(pt, v)
            v += 1
Beispiel #12
0
    def test_3(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')
        
        pt = dt.pub_trees.values()[0]
        assert pt.state == pt.STATE_INITIAL

        pt.do_version()
        assert pt.state == pt.STATE_VERSIONED
        assert len(pt.versions.keys()) == 1

        assert self.today in pt.versions.keys()
Beispiel #13
0
    def test_1(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')
        
        pt = dt.pub_trees.values()[0]
        assert pt.state == pt.STATE_INITIAL

        pt.do_version()

        for path, drs in pt.versions[pt.latest]:
            lnk = os.readlink(path)
            assert not os.path.isabs(lnk)
Beispiel #14
0
class TestEg3_1(TestEg3):
    """Use a separate DRSTree instance for the upgrade to test
    TestEg3 still works in this scenario.
    """

    __test__ = True

    def _cmor2(self):
        gen_drs.write_eg3_2(self.tmpdir)
        self.dt2 = DRSTree(self.tmpdir)
        self.dt2.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')
        (self.pt, ) = self.dt2.pub_trees.values()
Beispiel #15
0
class TestEmptyPubdir(TestEg):
    # Regression for bug where drs_tool crashes if the PublishTree directory 
    # exists but is empty
    __test__ = True
    def setUp(self):
        super(TestEmptyPubdir, self).setUp()
        pubdir = op.join(self.tmpdir,
                         'output2/MOHC/HadGEM2-ES/esmControl/day/seaIce/day/r1i1p1')
        os.makedirs(pubdir)

        self.dt = DRSTree(self.tmpdir)

    def test_1(self):
        self.dt.discover()
Beispiel #16
0
    def test_1(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3', 
                    experiment='1pctto4x', realm='atmos')

        assert len(dt.pub_trees) == 3
        k = sorted(dt.pub_trees.keys())[2]
        assert k == 'cmip5.output1.MOHC.HadCM3.1pctto4x.day.atmos.day.r3i1p1'
        pt = dt.pub_trees[k]

        assert pt.versions == {}
        assert len(pt._todo) == 15
        vars = set(x[1].variable for x in pt._todo)
        assert vars == set(('pr', 'rsus', 'tas'))
        assert pt.state == pt.STATE_INITIAL
Beispiel #17
0
    def setUp(self):
        super(TestEmptyPubdir, self).setUp()
        pubdir = op.join(self.tmpdir,
                         'output2/MOHC/HadGEM2-ES/esmControl/day/seaIce/day/r1i1p1')
        os.makedirs(pubdir)

        self.dt = DRSTree(self.tmpdir)
Beispiel #18
0
    def _cmor1(self):
        gen_drs.write_eg5_1(self.tmpdir)
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()
Beispiel #19
0
    def test_2(self):
        """Test incremental discovery without calling discover() first."""
        dt = DRSTree(self.tmpdir)
        components = dict(activity='cmip5',
                          product='output1', institute='MOHC', model='HadCM3')
        assert len(dt.pub_trees) == 0

        # Discover ocean realm
        dt.discover_incoming(self.tmpdir, realm='ocean', **components)
        assert len(dt.pub_trees) == 1

        # Discover atmos realm
        dt.discover_incoming(self.tmpdir, realm='atmos', **components)
        assert len(dt.pub_trees) == 2

        assert set([x.drs.realm for x in dt.pub_trees.values()]) == set(['atmos', 'ocean'])
Beispiel #20
0
    def setUp(self):
        super(TestListing, self).setUp()

        listing_path = os.path.join(test_dir, self.listing_file)
        gen_drs.write_listing(self.tmpdir, listing_path)

        self.dt = DRSTree(self.tmpdir)
Beispiel #21
0
    def _cmor2(self):
        gen_drs.write_eg4_2(self.tmpdir)
        self.dt2 = DRSTree(self.tmpdir)
        self.dt2.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')

        (self.pt, ) = self.dt2.pub_trees.values()
Beispiel #22
0
    def test_2(self):
        drs_fs = SpecsFileSystem(self.tmpdir)
        drs_tree = DRSTree(drs_fs)
        with open(op.join(test_dir, 'specs_cedacc.json')) as fh:
            json_obj = [json.loads(line) for line in fh]

        drs_tree.discover_incoming_fromjson(json_obj, activity='specs')
        
        # This id will not be present if realm is not correctly split on space
        drs_id = 'specs.output.IPSL.IPSL-CM5A-LR.decadal.S20130101.mon.seaIce.OImon.sic.r3i1p1'
        assert drs_id in drs_tree.pub_trees

        p = drs_tree.pub_trees.values()[0]
        p_vars = set(drs.variable for (drs_str, drs) in p._todo)

        # All DRS objects should be for the same variable
        assert len(p_vars) == 1
Beispiel #23
0
class TestEg6(TestEg):
    __test__ = True

    deliveries = [
        ['clt_day_HadGEM2-ES_rcp26_r1i1p1_20051201-20151130.nc', 
         'clt_day_HadGEM2-ES_rcp26_r1i1p1_20151201-20251130.nc'],
        ['huss_day_HadGEM2-ES_rcp26_r1i1p1_20991201-21091130.nc'],
        ['hur_day_HadGEM2-ES_rcp26_r1i1p1_20991201-20991230.nc', 
         'hus_day_HadGEM2-ES_rcp26_r1i1p1_20991201-20991230.nc'],
        ]

    def setUp(self):
        super(TestEg6, self).setUp()
        
        self.setupIncoming()

        self.drs_fs = CMIP5FileSystem(self.tmpdir)
        self.dt = DRSTree(self.drs_fs)

        for i, delivery in enumerate(self.deliveries):
            self.dt.discover_incoming(op.join(self.incoming, str(i)),
                                      activity='cmip5', product='output1', institute='MOHC')
            for drs_id, pt in self.dt.pub_trees.items():
                pt.do_version(i)

    def setupIncoming(self):
        # Create incoming files
        self.incoming =  op.join(self.tmpdir, 'incoming')
        os.mkdir(self.incoming)
        for i, delivery in enumerate(self.deliveries):
            os.mkdir(op.join(self.incoming, str(i)))
            for filename in delivery:
                gen_drs.write_eg_file(op.join(self.incoming, str(i), filename))

    def test_1(self):
        assert len(self.dt.pub_trees) == 1
        pt = self.dt.pub_trees.values()[0]

        file_counts = set((k, len(v)) for (k, v) in pt.versions.items())

        print file_counts
        assert file_counts == set([(0, 2), (1, 3), (2, 5)])
Beispiel #24
0
    def setUp(self):
        super(TestEg6, self).setUp()
        
        self.setupIncoming()

        self.dt = DRSTree(self.tmpdir)

        for i, delivery in enumerate(self.deliveries):
            self.dt.discover_incoming(op.join(self.incoming, str(i)),
                                      activity='cmip5', product='output1', institute='MOHC')
            for drs_id, pt in self.dt.pub_trees.items():
                pt.do_version(i)
Beispiel #25
0
    def setUp(self):
        super(TestDups, self).setup()

        # Create test data
        gen_drs.write_eg1(self.tmpdir)

        # Do initial version change
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')
        self.pt = dt.pub_trees.values()[0]
        self.pt.do_version()
Beispiel #26
0
class TestDups(TestEg):
    def setUp(self):
        super(TestDups, self).setup()

        # Create test data
        gen_drs.write_eg1(self.tmpdir)

        # Do initial version change
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')
        self.pt = dt.pub_trees.values()[0]
        self.pt.do_version()

    def tearDown(self):
        shutil.rmtree(self.tmpdir)

    def _make_incoming1(self):
        # Original ingest
        gen_drs.write_listing(self.incoming, os.path.join(test_dir, 'dups1.ls'))

    def _make_incoming2(self):
        # Ingest with some new files and 2 duplicates
        gen_drs.write_listing(self.incoming, os.path.join(test_dir, 'dups2.ls'))
        
    def _make_incoming3(self):
        # As incoming2 except one of the dups differs in size
        self._make_incoming2()

        fh = open(os.path.join(self.incoming, CHANGE_FILE), 'a')
        print >>fh, 'File has grown'

    def _make_incoming4(self):
        # As incoming2 except one of the dups only differs by contents
        self._make_incoming2()

        fh = open(os.path.join(self.incoming, CHANGE_FILE), 'r+')
        fh.seek(0)
        fh.write('XXX')
        fh.close()
Beispiel #27
0
    def make_drs_tree(self):
        if self.opts.root:
            self.drs_root = self.opts.root
        else:
            try:
                self.drs_root = config.drs_defaults['root']
            except KeyError:
                raise Exception('drs-root not defined')

        if self.opts.incoming:
            incoming = self.opts.incoming
        else:
            try:
                incoming = config.drs_defaults['incoming']
            except KeyError:
                incoming = os.path.join(self.drs_root, config.DEFAULT_INCOMING)

        self.drs_tree = DRSTree(self.drs_root)

        if self.opts.move_cmd:
            self.drs_tree.set_move_cmd(self.opts.move_cmd)


        kwargs = {}
        for attr in ['activity', 'product', 'institute', 'model', 'experiment', 
                     'frequency', 'realm', 'ensemble']:
            try:
                val = getattr(self.opts, attr)
                # val may be there but None
                if val is None:
                    raise AttributeError
            except AttributeError:
                val = config.drs_defaults.get(attr)

            kwargs[attr] = val

        # Get the template DRS from args
        if self.args:
            dataset_id = self.args[0]
            drs = DRS.from_dataset_id(dataset_id, **kwargs)
        else:
            drs = DRS(**kwargs)

        # Product detection
        if self.opts.detect_product:
            self._config_p_cmip5()
            self._setup_p_cmip5()

        self.drs_tree.discover(incoming, **drs)
    def setUp(self):
        super(TestThreeway, self).setUp()

        self.dt = DRSTree(self.tmpdir)
        self.listing_iter = self._iterSetUpListings()
Beispiel #29
0
class TestEg3(TestEg):
    __test__ = True

    def _cmor1(self):
        gen_drs.write_eg3_1(self.tmpdir)
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        gen_drs.write_eg3_2(self.tmpdir)
        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')

        
    def _exists(self, x):
        return os.path.exists(os.path.join(self.pt.pub_dir, x))
    def _listdir(self, x):
        return os.listdir(os.path.join(self.pt.pub_dir, x))
    def _listlinks(self, x):
        links = glob('%s/*' % os.path.join(self.pt.pub_dir, x))
        return [os.readlink(lnk) for lnk in links if os.path.islink(lnk)]


    def test_01(self):
        self._cmor1()
        assert len(self.pt.drs_tree.incoming) > 0

        self.pt.do_version()
        assert len(self.pt.drs_tree.incoming) == 0
        assert self.pt.count_todo() == 0
        assert len(list(self.pt.list_todo())) == 0

    def test_1(self):
        self._cmor1()
        self.pt.do_version(20100101)

        self._cmor2()
        self.pt.do_version(20100102)

        assert len(self.pt.drs_tree.incoming) == 0

        assert self._exists('files')
        assert self._exists('files/rsus_20100102')
        assert not self._exists('files/rsus_20100101')

        assert self._exists('v20100101/tas')
        assert self._exists('v20100101/pr')
        assert not self._exists('v20100101/rsus')
        assert self._exists('v20100102/rsus')

    def test_2(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert self._exists('v20100102/pr/pr_day_HadCM3_1pctto4x_r1i1p1_2000010100-2001123114.nc')

    def test_3(self):
        self._cmor1()
        assert self.pt.state == self.pt.STATE_INITIAL
        self.pt.do_version()
        assert self.pt.state == self.pt.STATE_VERSIONED
        self._cmor2()
        assert self.pt.state == self.pt.STATE_VERSIONED_TRANS
        self.pt.do_version()
        assert self.pt.state == self.pt.STATE_VERSIONED
    

    def test_4(self):
        # Check all links are to the "files" branch
        self._cmor1()
        self.pt.do_version()
        self._cmor2()
        self.pt.do_version()

        links = self._listlinks('v2/tas/r1i1p1')
        for link in links:
            assert '/files/' in link

    def test_5(self):
        self._cmor1()
        self.pt.do_version(20100101)

        latest = os.readlink(os.path.join(self.pt.pub_dir, 'latest'))
        assert latest == 'v20100101'

        self._cmor2()
        self.pt.do_version(20100102)

        latest = os.readlink(os.path.join(self.pt.pub_dir, 'latest'))
        assert latest == 'v20100102'


    def test_6(self):
        # Test differencing 2 versions

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()

        v1 = []
        todo = []
        for state, path1, path2 in self.pt.diff_version(20100101):
            if state == self.pt.DIFF_V1_ONLY:
                assert not 'rsus' in path1
                v1.append(path1)
            elif state == self.pt.DIFF_V2_ONLY:
                assert 'rsus' in path2
                todo.append(path2)

        assert len(v1) == 10
        assert len(todo) == 5
    def setUp(self):
        super(TestThreeway, self).setUp()

        self.drs_fs = CMIP5FileSystem(self.tmpdir)
        self.dt = DRSTree(self.drs_fs)
        self.listing_iter = self._iterSetUpListings()