def test_shards_constrain(self): # Test that a file-context constrains the shard list conn = SearchConnection(self.test_service, distrib=True) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() # !TODO: white-box test. Refactor. query_dict = f_ctx._build_query() full_query = f_ctx.connection._build_query(query_dict, shards=f_ctx.shards) # !TODO: Force fail to see whether shards is passed through. # NOTE: 'shards' is NOT even a key in this dictionary. Needs rewrite!!! q_shard = full_query['shards'] # Check it isn't a ',' separated list assert ',' not in q_shard q_shard_host = q_shard.split(':')[0] assert q_shard_host == r1.json['index_node'] # Now make the query to make sure it returns data from # the right index_node f_results = f_ctx.search() f_r1 = f_results[0] assert f_r1.json['index_node'] == r1.json['index_node']
def test_context_facets1(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model="IPSL-CM5A-LR") assert context2.facet_constraints['project'] == 'CMIP5' assert context2.facet_constraints['model'] == 'IPSL-CM5A-LR'
def search_landsea_mask_by_esgf(resource): """ Searches a landsea mask (variable sftlf) in ESGF which matches the NetCDF attributes in the NetCDF files ``resource``. Raises an Exception if no mask is found. Returns the OpenDAP URL of the first found mask file. """ # fill search constraints from nc attributes ds = Dataset(resource) attributes = ds.ncattrs() constraints = dict(variable="sftlf") for attr, facet in ATTRIBUTE_TO_FACETS_MAP.iteritems(): if attr in attributes: constraints[facet] = ds.getncattr(attr) # run file search conn = SearchConnection(config.esgfsearch_url(), distrib=config.esgfsearch_distrib()) ctx = conn.new_context(search_type=TYPE_FILE, **constraints) if ctx.hit_count == 0: raise Exception("Could not find a mask in ESGF for dataset {0}".format( os.path.basename(resource))) # LOGGER.exception("Could not find a mask in ESGF.") # return if ctx.hit_count > 1: LOGGER.warn("Found more then one mask file.") results = ctx.search(batch_size=1) return results[0].opendap_url
def test_passed_cached_session(self): import requests_cache td = datetime.timedelta(hours=1) session = requests_cache.core.CachedSession(self.cache, expire_after=td) conn = SearchConnection(self.test_service, session=session) context = conn.new_context(project='cmip5') assert context.facet_constraints['project'] == 'cmip5'
def test_url_fixing(self): conn1 = SearchConnection(self.test_service) conn2 = SearchConnection(self.test_service + '/') conn3 = SearchConnection(self.test_service + '///') conn4 = SearchConnection(self.test_service + '/search') conn5 = SearchConnection(self.test_service + '/search///') assert conn1.url == conn2.url == conn3.url == conn4.url == conn5.url
def test_result1(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] assert re.match(r'cmip5\.output1\.IPSL\..\|vesg.ipsl.fr', r1.dataset_id)
def test_result1(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] assert re.match(r'cmip5\.output1\.MOHC\..+\|esgf-data2.ceda.ac.uk', r1.dataset_id)
def test_result1(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] assert r1.dataset_id == 'cmip5.output1.IPSL.IPSL-CM5A-LR.1pctCO2.3hr.atmos.3hr.r1i1p1.v20110427|vesg.ipsl.fr'
def test_result1(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] assert re.match(r'cmip5\.output1\.MOHC\..+\|esgf-data1.ceda.ac.uk', r1.dataset_id)
def test_constrain_freetext(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', query='humidity') assert context.freetext_constraint == 'humidity' context = context.constrain(experiment='historical') assert context.freetext_constraint == 'humidity'
def test_get_shard_list(): conn = SearchConnection(TEST_SERVICE, distrib=True) shards = conn.get_shard_list() #!NOTE: the exact shard list will change depending on the shard replication configuration # on the test server assert 'esgf-index2.ceda.ac.uk' in shards # IPSL now replicates all non-local shards. Just check it has a few shards assert len(shards['esgf-index2.ceda.ac.uk']) > 3
def test_context_facet_multivalue2(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', model='IPSL-CM5A-MR') assert context.facet_constraints.getall('model') == ['IPSL-CM5A-MR'] context2 = context.constrain(model=['IPSL-CM5A-MR', 'IPSL-CM5A-LR']) assert sorted(context2.facet_constraints.getall('model')) == ['IPSL-CM5A-LR', 'IPSL-CM5A-MR']
def test_url_fixing(): conn1 = SearchConnection(TEST_SERVICE) conn2 = SearchConnection(TEST_SERVICE+'/') conn3 = SearchConnection(TEST_SERVICE+'///') conn4 = SearchConnection(TEST_SERVICE+'/search') conn5 = SearchConnection(TEST_SERVICE+'/search///') assert conn1.url == conn2.url == conn3.url == conn4.url == conn5.url
def test_context_facets_multivalue(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model=['IPSL-CM5A-LR', 'IPSL-CM5A-MR']) assert context2.hit_count > 0 assert context2.facet_constraints['project'] == 'CMIP5' assert sorted(context2.facet_constraints.getall('model')) == ['IPSL-CM5A-LR', 'IPSL-CM5A-MR']
def test_constrain(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') count1 = context.hit_count context = context.constrain(model="IPSL-CM5A-LR") count2 = context.hit_count assert count1 > count2
def test_context_facet_multivalue2(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', model='IPSL-CM5A-MR') assert context.facet_constraints.getall('model') == ['IPSL-CM5A-MR'] context2 = context.constrain(model=['IPSL-CM5A-MR', 'IPSL-CM5A-LR']) assert sorted(context2.facet_constraints.getall('model')) == [ 'IPSL-CM5A-LR', 'IPSL-CM5A-MR' ]
def test_download_url(): conn = SearchConnection(CEDA_SERVICE, distrib=False) ctx = conn.new_context() results = ctx.search(drs_id='GeoMIP.output1.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1') files = results[0].file_context().search() download_url = files[0].download_url assert re.match(r'http://.*\.nc', download_url)
def test_facet_count(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model="IPSL-CM5A-LR") counts = context2.facet_counts assert counts['model'].keys() == ['IPSL-CM5A-LR'] assert counts['project'].keys() == ['CMIP5']
def test_context_facet_multivalue3(): conn = SearchConnection(TEST_SERVICE) ctx = conn.new_context(project='CMIP5', query='humidity', experiment='rcp45') hits1 = ctx.hit_count assert hits1 > 0 ctx2 = conn.new_context(project='CMIP5', query='humidity', experiment=['rcp45','rcp85']) hits2 = ctx2.hit_count assert hits2 > hits1
def test_download_url(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context() results = ctx.search(drs_id=('GeoMIP.output.MOHC.HadGEM2-ES.G1.day.' 'atmos.day.r1i1p1')) files = results[0].file_context().search() download_url = files[0].download_url assert re.match(r'http://.*\.nc', download_url)
def test_file_context(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() assert f_ctx.facet_constraints['dataset_id'] == r1.dataset_id
def test_download_url(): conn = SearchConnection(CEDA_SERVICE, distrib=False) ctx = conn.new_context() results = ctx.search( drs_id='GeoMIP.output.MOHC.HadGEM2-ES.G1.day.atmos.day.r1i1p1') files = results[0].file_context().search() download_url = files[0].download_url assert re.match(r'http://.*\.nc', download_url)
def test_index_node(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] service = urlparse(self.test_service) assert r1.index_node == service.hostname
def test_get_shard_list(self): conn = SearchConnection(self.test_service, cache=self.cache, distrib=True) shards = conn.get_shard_list() # !NOTE: the exact shard list will change depending on the shard # replication configuration # on the test server assert 'esgf-index1.ceda.ac.uk' in shards # IPSL now replicates all non-local shards. # Just check it has a few shards assert len(shards['esgf-index1.ceda.ac.uk']) > 1
def test_context_facets3(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model="IPSL-CM5A-LR") results = context2.search() result = results[0] assert result.json['project'] == ['CMIP5'] assert result.json['model'] == ['IPSL-CM5A-LR']
def test_distrib(): conn = SearchConnection(TEST_SERVICE, distrib=False) context = conn.new_context(project='CMIP5') count1 = context.hit_count conn2 = SearchConnection(TEST_SERVICE, distrib=True) context = conn2.new_context(project='CMIP5') count2 = context.hit_count assert count1 < count2
def test_context_facets_multivalue(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5') context2 = context.constrain(model=['IPSL-CM5A-LR', 'IPSL-CM5A-MR']) assert context2.hit_count > 0 assert context2.facet_constraints['project'] == 'CMIP5' assert sorted(context2.facet_constraints.getall('model')) == [ 'IPSL-CM5A-LR', 'IPSL-CM5A-MR' ]
def test_opendap_fail(): conn = SearchConnection(CEDA_SERVICE, distrib=False) ctx = conn.new_context() results = ctx.search(project='CMIP5', experiment='rcp45', time_frequency='mon', realm='atmos', ensemble='r1i1p1') files_ctx = results[0].file_context() hit = files_ctx.search()[0] assert hit.opendap_url is None
def test_context_facet_options(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', model='IPSL-CM5A-LR', ensemble='r1i1p1', experiment='rcp60', realm='seaIce' ) assert context.get_facet_options().keys() == [ 'product', 'cf_standard_name', 'variable_long_name', 'cmor_table', 'time_frequency', 'variable' ]
def test_other_index_node(self): conn = SearchConnection(self.test_service, distrib=True) ctx = conn.new_context(project='CMIP5', institute='INM') results = ctx.search() r1 = results[0] service = urlparse(self.test_service) print('index_node = %s' % r1.index_node) assert r1.index_node is not None assert r1.index_node != service.hostname
def test_url_fixing(self): # Switch off warnings for this case because we are testing that issue import warnings warnings.simplefilter("ignore") conn1 = SearchConnection(self.test_service) conn2 = SearchConnection(self.test_service + '/') conn3 = SearchConnection(self.test_service + '///') conn4 = SearchConnection(self.test_service + '/search') conn5 = SearchConnection(self.test_service + '/search///') warnings.resetwarnings() assert conn1.url == conn2.url == conn3.url == conn4.url == conn5.url
def test_context_facet_options(): conn = SearchConnection(TEST_SERVICE) context = conn.new_context(project='CMIP5', model='IPSL-CM5A-LR', ensemble='r1i1p1', experiment='rcp60', realm='seaIce') assert context.get_facet_options().keys() == [ 'product', 'cf_standard_name', 'variable_long_name', 'cmor_table', 'time_frequency', 'variable' ]
def test_file_list2(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() file_results = f_ctx.search() for file_result in file_results: assert re.search(r'ds/.*\.nc', file_result.download_url)
def test_other_index_node(): conn = SearchConnection(TEST_SERVICE, distrib=True) ctx = conn.new_context(project='CMIP5', institute='INM') results = ctx.search() r1 = results[0] service = urlparse(TEST_SERVICE) print 'index_node = %s' % r1.index_node assert r1.index_node is not None assert r1.index_node != service.hostname
def test_other_index_node(self): conn = SearchConnection(self.test_service, distrib=True) ctx = conn.new_context(project='CMIP5', institute='IPSL') results = ctx.search() r1 = results[0] service = urlparse(self.test_service) print(('index_node = %s' % r1.index_node)) assert r1.index_node is not None assert r1.index_node != service.hostname
def test_file_list2(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() file_results = f_ctx.search() for file_result in file_results: print file_result.url assert re.match(r'http://vesg.ipsl.fr/thredds/.*\.nc', file_result.url)
def test_file_list2(): conn = SearchConnection(TEST_SERVICE, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() file_results = f_ctx.search() for file_result in file_results: print file_result.download_url assert re.match(r'http://vesg.ipsl.fr/thredds/.*\.nc', file_result.download_url)
def test_context_facet_multivalue3(): conn = SearchConnection(TEST_SERVICE) ctx = conn.new_context(project='CMIP5', query='humidity', experiment='rcp45') hits1 = ctx.hit_count assert hits1 > 0 ctx2 = conn.new_context(project='CMIP5', query='humidity', experiment=['rcp45', 'rcp85']) hits2 = ctx2.hit_count assert hits2 > hits1
def test_gridftp_url_in_file_result(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() file_results = f_ctx.search() for file_result in file_results: gridftp_url = file_result.gridftp_url assert gridftp_url.split(":")[0] == "gsiftp" assert file_result.gridftp_url.endswith(".nc")
def test_opendap_fail(): conn = SearchConnection(CEDA_SERVICE, distrib=False) ctx = conn.new_context() results = ctx.search(project='CMIP5', experiment='rcp45', time_frequency='mon', realm='fx', ensemble='r1i1p1') files_ctx = results[0].file_context() hit = files_ctx.search()[0] assert hit.opendap_url is None
def test_shards_constrain4(): # Regression test for issue #8 reported by [email protected] conn = SearchConnection('http://pcmdi9.llnl.gov/esg-search',distrib=True) ctx = conn.new_context(query='cmip5.output1.BCC.bcc-csm1-1-m.historical.mon.atmos.Amon.r1i1p1.v20120709') s = ctx.search() ds = s[0] publicationDataset, server = ds.dataset_id.split('|') print publicationDataset, server, ds.json['replica'] searchContext = ds.file_context() searchContext=searchContext.constrain(variable='tas') for j in searchContext.search(): print j.download_url, j.checksum, j.checksum_type, j.size
def test_shards_constrain2(): # Regression test for issue #8 reported by [email protected] conn = SearchConnection('http://pcmdi9.llnl.gov/esg-search',distrib=True) ctx = conn.new_context(experiment='piControl', time_frequency='day', variable='pr', ensemble='r1i1p1') ctx = ctx.constrain(query='cmip5.output1.BCC.bcc-csm1-1-m.piControl.day.atmos.day.r1i1p1') s = ctx.search() ds = s[0] publicationDataset, server = ds.dataset_id.split('|') print publicationDataset, server, ds.json['replica'] searchContext = ds.file_context() searchContext=searchContext.constrain(variable='pr') for j in searchContext.search(): print j.download_url, j.checksum, j.checksum_type, j.size
def test_aggregations(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] agg_ctx = r1.aggregation_context() agg_results = agg_ctx.search() agg1 = agg_results[0] ds_id, shard = r1.dataset_id.split('|') las_url = agg1.urls['LAS'][0][0] # !FIXME: A pretty dumb test for a correct aggregation assert '.aggregation' in las_url
def test_file_list(self): conn = SearchConnection(self.test_service, distrib=False) ctx = conn.new_context(project='CMIP5') results = ctx.search() r1 = results[0] f_ctx = r1.file_context() file_results = f_ctx.search() f1 = file_results[0] ds_id, shard = r1.dataset_id.split('|') download_url = f1.download_url # Assumes dataset is published with DRS path. ds_subpath = ds_id.replace('.', '/') assert ds_subpath.lower() in download_url.lower()
def test_shards_constrain3(self): # Regression test for issue #8 reported by [email protected] conn = SearchConnection(self.test_service_pcmdi, distrib=True) ctx = conn.new_context(query=('cmip5.output1.CMCC.CMCC-CESM.' 'historical.mon.atmos.Amon.r1i1p1.' 'v20130416')) s = ctx.search() ds = s[0] publicationDataset, server = ds.dataset_id.split('|') print(publicationDataset, server, ds.json['replica']) searchContext = ds.file_context() searchContext = searchContext.constrain(variable='pr') for j in searchContext.search(): print(j.download_url, j.checksum, j.checksum_type, j.size)