def test_1(self): dt = DRSTree(self.tmpdir) dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') assert len(dt.pub_trees) == 2 assert set([x.drs.realm for x in dt.pub_trees.values()]) == set(['atmos', 'ocean'])
class TestListing(TestEg): # Set the following in subclasses # listing_file def setUp(self): super(TestListing, self).setUp() listing_path = os.path.join(test_dir, self.listing_file) gen_drs.write_listing(self.tmpdir, listing_path) self._init_drs_fs() self.dt = DRSTree(self.drs_fs) def _init_drs_fs(self): self.drs_fs = CMIP5FileSystem(self.tmpdir) def _discover(self, institute, model): self.dt.discover(self.incoming, activity='cmip5', product='output1', institute=institute, model=model) def _do_version(self, pt): assert pt.state == pt.STATE_INITIAL pt.do_version() assert pt.state == pt.STATE_VERSIONED assert pt.versions.keys() == [self.today]
class TestRepair2(TestRepair): genfuncs = (gen_drs.write_eg3_1, gen_drs.write_eg3_2) def setUp(self): TestEg.setUp(self) self._cmor1() self.pt.do_version(20100101) self._cmor2() self.pt.do_version(20100102) assert self.pt.state == self.pt.STATE_VERSIONED self.breakme() def _cmor1(self): genfunc = self.genfuncs[0] genfunc(self.tmpdir) self.dt = DRSTree(self.tmpdir) self.dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') (self.pt, ) = self.dt.pub_trees.values() def _cmor2(self): genfunc = self.genfuncs[1] genfunc(self.tmpdir) self.dt.discover_incoming(self.incoming, activity='cmip5', product='output1')
class TestEg4(TestEg3): __test__ = True def _cmor1(self): gen_drs.write_eg4_1(self.tmpdir) self.dt = DRSTree(self.tmpdir) self.dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') (self.pt, ) = self.dt.pub_trees.values() def _cmor2(self): gen_drs.write_eg4_2(self.tmpdir) self.dt.discover_incoming(self.incoming, activity='cmip5', product='output1') def test_1(self): self._cmor1() self.pt.do_version(20100101) self._cmor2() self.pt.do_version(20100102) assert self._exists('files') assert self._exists('files/tas_20100102') assert self._exists('v20100102/tas') def test_2(self): self._cmor1() self.pt.do_version(20100101) self._cmor2() self.pt.do_version(20100102) assert len(self._listdir('files/tas_20100101')) == 3 assert len(self._listdir('files/tas_20100102')) == 2 assert len(self._listdir('v20100101/tas')) == 3 assert len(self._listdir('v20100102/tas')) == 5 # Do test_3 from superclass # Do test_4 from superclass def test_6(self): # Test differencing 2 versions self._cmor1() self.pt.do_version(20100101) self._cmor2() v1 = [] todo = [] for state, path1, path2 in self.pt.diff_version(20100101): if state == self.pt.DIFF_V1_ONLY: v1.append(path1) elif state == self.pt.DIFF_V2_ONLY: todo.append(path2) assert len(v1) == 3 assert len(todo) == 2
def setUp(self): super(TestRepair, self).setUp() gen_drs.write_listing(self.tmpdir, op.join(test_dir, self.listing)) dt = DRSTree(self.tmpdir) dt.discover(self.incoming, **self.drs_components) self.pt = dt.pub_trees.values()[0]
def test_2(self): dt = DRSTree(self.tmpdir) dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') assert len(dt.pub_trees) == 3 pt = dt.pub_trees.values()[0] assert pt.drs.realm == 'atmos'
def test_1(self): drs_fs = CordexFileSystem(self.tmpdir) drs_tree = DRSTree(drs_fs) json_obj = json.load(open(op.join(test_dir, 'cordex_1.json'))) drs_tree.discover_incoming_fromjson(json_obj, activity='cordex') assert len(drs_tree.pub_trees) == 3
class TestEg5(TestEg4): __test__ = False def _cmor1(self): gen_drs.write_eg5_1(self.tmpdir) self.dt = DRSTree(self.tmpdir) self.dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') (self.pt, ) = self.dt.pub_trees.values() def _cmor2(self): gen_drs.write_eg5_2(self.tmpdir) self.dt.discover_incoming(self.incoming, activity='cmip5', product='output1') # Do test1 from superclass def test_2(self): self._cmor1() self.pt.do_version(20100101) self._cmor2() self.pt.do_version(20100102) assert len(self._listdir('files/tas_20100101')) == 5 assert len(self._listdir('files/tas_20100102')) == 2 assert len(self._listdir('v20100101/tas')) == 5 assert len(self._listdir('v20100102/tas')) == 5 # Do test_3 from superclass # Do test_4 from superclass def test_6(self): # Test differencing 2 versions self._cmor1() self.pt.do_version(20100101) self._cmor2() v1 = [] todo = [] diff = [] same = [] for state, path1, path2 in self.pt.diff_version(20100101): if state == self.pt.DIFF_V1_ONLY: v1.append(path1) elif state == self.pt.DIFF_V2_ONLY: todo.append(path2) elif state == self.pt.DIFF_SIZE: diff.append(path1) elif state == self.pt.DIFF_NONE: same.append(path1) #!TODO: not same? This test needs reviewing. assert len(v1) == 3 assert len(same) == 2
class TestEg5_1(TestEg5): __test__ = False def _cmor2(self): gen_drs.write_eg5_2(self.tmpdir) self.dt2 = DRSTree(self.tmpdir) self.dt2.discover_incoming(self.incoming, activity='cmip5', product='output1') (self.pt, ) = self.dt2.pub_trees.values()
def setUp(self): super(TestMapfile, self).setUp() gen_drs.write_eg1(self.tmpdir) dt = DRSTree(self.tmpdir) dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') self.pt = dt.pub_trees.values()[0] self.pt.do_version() assert self.pt.state == self.pt.STATE_VERSIONED
class TestThreeway(TestEg): __test__ = True listing_files = ['threeway_1.ls', 'threeway_2.ls', 'threeway_3.ls'] def setUp(self): super(TestThreeway, self).setUp() self.drs_fs = CMIP5FileSystem(self.tmpdir) self.dt = DRSTree(self.drs_fs) self.listing_iter = self._iterSetUpListings() def _iterSetUpListings(self): for listing_file in self.listing_files: listing_path = os.path.join(test_dir, listing_file) gen_drs.write_listing(self.tmpdir, listing_path) yield listing_path def _discover(self): self.dt.discover_incoming(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadGEM2-ES') def _do_version(self, pt, next_version): assert next_version not in pt.versions.keys() pt.do_version(next_version) assert next_version in pt.versions.keys() def _check_version(self, pt, version): for path, drs in pt.versions[version]: assert os.path.islink(path) # link is relative real_path = os.path.realpath(os.path.join(os.path.dirname(path), os.readlink(path))) assert os.path.isfile(real_path) # Check variables match mo = re.search(r'/files/(.*?)_\d+/(.*?)_', real_path) assert mo.group(1) == mo.group(2) def test1(self): v = 1 for listing_path in self.listing_iter: print 'Doing version %d' % v self._discover() assert len(self.dt.pub_trees) == 1 pt = self.dt.pub_trees.values()[0] self._do_version(pt, v) self._check_version(pt, v) v += 1
def test_3(self): dt = DRSTree(self.tmpdir) dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') pt = dt.pub_trees.values()[0] assert pt.state == pt.STATE_INITIAL pt.do_version() assert pt.state == pt.STATE_VERSIONED assert len(pt.versions.keys()) == 1 assert self.today in pt.versions.keys()
def test_1(self): dt = DRSTree(self.tmpdir) dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') pt = dt.pub_trees.values()[0] assert pt.state == pt.STATE_INITIAL pt.do_version() for path, drs in pt.versions[pt.latest]: lnk = os.readlink(path) assert not os.path.isabs(lnk)
class TestEg3_1(TestEg3): """Use a separate DRSTree instance for the upgrade to test TestEg3 still works in this scenario. """ __test__ = True def _cmor2(self): gen_drs.write_eg3_2(self.tmpdir) self.dt2 = DRSTree(self.tmpdir) self.dt2.discover_incoming(self.incoming, activity='cmip5', product='output1') (self.pt, ) = self.dt2.pub_trees.values()
class TestEmptyPubdir(TestEg): # Regression for bug where drs_tool crashes if the PublishTree directory # exists but is empty __test__ = True def setUp(self): super(TestEmptyPubdir, self).setUp() pubdir = op.join(self.tmpdir, 'output2/MOHC/HadGEM2-ES/esmControl/day/seaIce/day/r1i1p1') os.makedirs(pubdir) self.dt = DRSTree(self.tmpdir) def test_1(self): self.dt.discover()
def test_1(self): dt = DRSTree(self.tmpdir) dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3', experiment='1pctto4x', realm='atmos') assert len(dt.pub_trees) == 3 k = sorted(dt.pub_trees.keys())[2] assert k == 'cmip5.output1.MOHC.HadCM3.1pctto4x.day.atmos.day.r3i1p1' pt = dt.pub_trees[k] assert pt.versions == {} assert len(pt._todo) == 15 vars = set(x[1].variable for x in pt._todo) assert vars == set(('pr', 'rsus', 'tas')) assert pt.state == pt.STATE_INITIAL
def setUp(self): super(TestEmptyPubdir, self).setUp() pubdir = op.join(self.tmpdir, 'output2/MOHC/HadGEM2-ES/esmControl/day/seaIce/day/r1i1p1') os.makedirs(pubdir) self.dt = DRSTree(self.tmpdir)
def _cmor1(self): gen_drs.write_eg5_1(self.tmpdir) self.dt = DRSTree(self.tmpdir) self.dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') (self.pt, ) = self.dt.pub_trees.values()
def test_2(self): """Test incremental discovery without calling discover() first.""" dt = DRSTree(self.tmpdir) components = dict(activity='cmip5', product='output1', institute='MOHC', model='HadCM3') assert len(dt.pub_trees) == 0 # Discover ocean realm dt.discover_incoming(self.tmpdir, realm='ocean', **components) assert len(dt.pub_trees) == 1 # Discover atmos realm dt.discover_incoming(self.tmpdir, realm='atmos', **components) assert len(dt.pub_trees) == 2 assert set([x.drs.realm for x in dt.pub_trees.values()]) == set(['atmos', 'ocean'])
def setUp(self): super(TestListing, self).setUp() listing_path = os.path.join(test_dir, self.listing_file) gen_drs.write_listing(self.tmpdir, listing_path) self.dt = DRSTree(self.tmpdir)
def _cmor2(self): gen_drs.write_eg4_2(self.tmpdir) self.dt2 = DRSTree(self.tmpdir) self.dt2.discover_incoming(self.incoming, activity='cmip5', product='output1') (self.pt, ) = self.dt2.pub_trees.values()
def test_2(self): drs_fs = SpecsFileSystem(self.tmpdir) drs_tree = DRSTree(drs_fs) with open(op.join(test_dir, 'specs_cedacc.json')) as fh: json_obj = [json.loads(line) for line in fh] drs_tree.discover_incoming_fromjson(json_obj, activity='specs') # This id will not be present if realm is not correctly split on space drs_id = 'specs.output.IPSL.IPSL-CM5A-LR.decadal.S20130101.mon.seaIce.OImon.sic.r3i1p1' assert drs_id in drs_tree.pub_trees p = drs_tree.pub_trees.values()[0] p_vars = set(drs.variable for (drs_str, drs) in p._todo) # All DRS objects should be for the same variable assert len(p_vars) == 1
class TestEg6(TestEg): __test__ = True deliveries = [ ['clt_day_HadGEM2-ES_rcp26_r1i1p1_20051201-20151130.nc', 'clt_day_HadGEM2-ES_rcp26_r1i1p1_20151201-20251130.nc'], ['huss_day_HadGEM2-ES_rcp26_r1i1p1_20991201-21091130.nc'], ['hur_day_HadGEM2-ES_rcp26_r1i1p1_20991201-20991230.nc', 'hus_day_HadGEM2-ES_rcp26_r1i1p1_20991201-20991230.nc'], ] def setUp(self): super(TestEg6, self).setUp() self.setupIncoming() self.drs_fs = CMIP5FileSystem(self.tmpdir) self.dt = DRSTree(self.drs_fs) for i, delivery in enumerate(self.deliveries): self.dt.discover_incoming(op.join(self.incoming, str(i)), activity='cmip5', product='output1', institute='MOHC') for drs_id, pt in self.dt.pub_trees.items(): pt.do_version(i) def setupIncoming(self): # Create incoming files self.incoming = op.join(self.tmpdir, 'incoming') os.mkdir(self.incoming) for i, delivery in enumerate(self.deliveries): os.mkdir(op.join(self.incoming, str(i))) for filename in delivery: gen_drs.write_eg_file(op.join(self.incoming, str(i), filename)) def test_1(self): assert len(self.dt.pub_trees) == 1 pt = self.dt.pub_trees.values()[0] file_counts = set((k, len(v)) for (k, v) in pt.versions.items()) print file_counts assert file_counts == set([(0, 2), (1, 3), (2, 5)])
def setUp(self): super(TestEg6, self).setUp() self.setupIncoming() self.dt = DRSTree(self.tmpdir) for i, delivery in enumerate(self.deliveries): self.dt.discover_incoming(op.join(self.incoming, str(i)), activity='cmip5', product='output1', institute='MOHC') for drs_id, pt in self.dt.pub_trees.items(): pt.do_version(i)
def setUp(self): super(TestDups, self).setup() # Create test data gen_drs.write_eg1(self.tmpdir) # Do initial version change self.dt = DRSTree(self.tmpdir) self.dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') self.pt = dt.pub_trees.values()[0] self.pt.do_version()
class TestDups(TestEg): def setUp(self): super(TestDups, self).setup() # Create test data gen_drs.write_eg1(self.tmpdir) # Do initial version change self.dt = DRSTree(self.tmpdir) self.dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') self.pt = dt.pub_trees.values()[0] self.pt.do_version() def tearDown(self): shutil.rmtree(self.tmpdir) def _make_incoming1(self): # Original ingest gen_drs.write_listing(self.incoming, os.path.join(test_dir, 'dups1.ls')) def _make_incoming2(self): # Ingest with some new files and 2 duplicates gen_drs.write_listing(self.incoming, os.path.join(test_dir, 'dups2.ls')) def _make_incoming3(self): # As incoming2 except one of the dups differs in size self._make_incoming2() fh = open(os.path.join(self.incoming, CHANGE_FILE), 'a') print >>fh, 'File has grown' def _make_incoming4(self): # As incoming2 except one of the dups only differs by contents self._make_incoming2() fh = open(os.path.join(self.incoming, CHANGE_FILE), 'r+') fh.seek(0) fh.write('XXX') fh.close()
def make_drs_tree(self): if self.opts.root: self.drs_root = self.opts.root else: try: self.drs_root = config.drs_defaults['root'] except KeyError: raise Exception('drs-root not defined') if self.opts.incoming: incoming = self.opts.incoming else: try: incoming = config.drs_defaults['incoming'] except KeyError: incoming = os.path.join(self.drs_root, config.DEFAULT_INCOMING) self.drs_tree = DRSTree(self.drs_root) if self.opts.move_cmd: self.drs_tree.set_move_cmd(self.opts.move_cmd) kwargs = {} for attr in ['activity', 'product', 'institute', 'model', 'experiment', 'frequency', 'realm', 'ensemble']: try: val = getattr(self.opts, attr) # val may be there but None if val is None: raise AttributeError except AttributeError: val = config.drs_defaults.get(attr) kwargs[attr] = val # Get the template DRS from args if self.args: dataset_id = self.args[0] drs = DRS.from_dataset_id(dataset_id, **kwargs) else: drs = DRS(**kwargs) # Product detection if self.opts.detect_product: self._config_p_cmip5() self._setup_p_cmip5() self.drs_tree.discover(incoming, **drs)
def setUp(self): super(TestThreeway, self).setUp() self.dt = DRSTree(self.tmpdir) self.listing_iter = self._iterSetUpListings()
class TestEg3(TestEg): __test__ = True def _cmor1(self): gen_drs.write_eg3_1(self.tmpdir) self.dt = DRSTree(self.tmpdir) self.dt.discover(self.incoming, activity='cmip5', product='output1', institute='MOHC', model='HadCM3') (self.pt, ) = self.dt.pub_trees.values() def _cmor2(self): gen_drs.write_eg3_2(self.tmpdir) self.dt.discover_incoming(self.incoming, activity='cmip5', product='output1') def _exists(self, x): return os.path.exists(os.path.join(self.pt.pub_dir, x)) def _listdir(self, x): return os.listdir(os.path.join(self.pt.pub_dir, x)) def _listlinks(self, x): links = glob('%s/*' % os.path.join(self.pt.pub_dir, x)) return [os.readlink(lnk) for lnk in links if os.path.islink(lnk)] def test_01(self): self._cmor1() assert len(self.pt.drs_tree.incoming) > 0 self.pt.do_version() assert len(self.pt.drs_tree.incoming) == 0 assert self.pt.count_todo() == 0 assert len(list(self.pt.list_todo())) == 0 def test_1(self): self._cmor1() self.pt.do_version(20100101) self._cmor2() self.pt.do_version(20100102) assert len(self.pt.drs_tree.incoming) == 0 assert self._exists('files') assert self._exists('files/rsus_20100102') assert not self._exists('files/rsus_20100101') assert self._exists('v20100101/tas') assert self._exists('v20100101/pr') assert not self._exists('v20100101/rsus') assert self._exists('v20100102/rsus') def test_2(self): self._cmor1() self.pt.do_version(20100101) self._cmor2() self.pt.do_version(20100102) assert self._exists('v20100102/pr/pr_day_HadCM3_1pctto4x_r1i1p1_2000010100-2001123114.nc') def test_3(self): self._cmor1() assert self.pt.state == self.pt.STATE_INITIAL self.pt.do_version() assert self.pt.state == self.pt.STATE_VERSIONED self._cmor2() assert self.pt.state == self.pt.STATE_VERSIONED_TRANS self.pt.do_version() assert self.pt.state == self.pt.STATE_VERSIONED def test_4(self): # Check all links are to the "files" branch self._cmor1() self.pt.do_version() self._cmor2() self.pt.do_version() links = self._listlinks('v2/tas/r1i1p1') for link in links: assert '/files/' in link def test_5(self): self._cmor1() self.pt.do_version(20100101) latest = os.readlink(os.path.join(self.pt.pub_dir, 'latest')) assert latest == 'v20100101' self._cmor2() self.pt.do_version(20100102) latest = os.readlink(os.path.join(self.pt.pub_dir, 'latest')) assert latest == 'v20100102' def test_6(self): # Test differencing 2 versions self._cmor1() self.pt.do_version(20100101) self._cmor2() v1 = [] todo = [] for state, path1, path2 in self.pt.diff_version(20100101): if state == self.pt.DIFF_V1_ONLY: assert not 'rsus' in path1 v1.append(path1) elif state == self.pt.DIFF_V2_ONLY: assert 'rsus' in path2 todo.append(path2) assert len(v1) == 10 assert len(todo) == 5
def setUp(self): super(TestThreeway, self).setUp() self.drs_fs = CMIP5FileSystem(self.tmpdir) self.dt = DRSTree(self.drs_fs) self.listing_iter = self._iterSetUpListings()