def linkmydata(self,outputdir=None): """Link the CMOR Data Structure of any output created by a tool crawl the directory and ingest the directory with solr:: :param outputdir: cmor outputdir that where created by the tool. :return: nothing """ user = self._user workpath = os.path.join(user.getUserBaseDir(),'CMOR4LINK') rootpath = config.get('project_data') solr_in = config.get('solr.incoming') solr_bk = config.get('solr.backup') solr_ps = config.get('solr.processing') # look for tool in tool toolintool = re.compile(r'^((?P<tool>[\w%]+)%(\d+|none)%(?P<project>[\w_]+)%(?P<product>[\w_]+)$)') # Maybe os.walk for multiple projects or products if len(os.listdir(outputdir)) == 1: project = os.listdir(outputdir)[0] # link? if len(os.listdir(os.path.join(outputdir,project))) == 1: product = os.listdir(os.path.join(outputdir,project))[0] new_product = '%s.%s.%s.%s' % (self.__class__.__name__.lower(),self.rowid,project,product) if re.match(toolintool,product): nproduct = re.match(toolintool,product).group('product') nproject = re.match(toolintool,product).group('project') ntool = '.%s' % re.match(toolintool,product).group('tool') new_product = '%s.s%s.%s.%s' % (self.__class__.__name__.lower(),ntool,self.rowid,nproject,nproduct) # Link section link_path = os.path.join(rootpath,'user-' + user.getName()) if os.path.islink(link_path): if not os.path.exists(link_path): os.unlink(link_path) os.symlink(workpath, os.path.join(link_path)) if not os.path.isdir(workpath): os.makedirs(workpath) workpath = os.path.join(os.path.dirname(link_path), os.readlink(link_path)) else: if not os.path.isdir(workpath): os.makedirs(workpath) os.symlink(workpath, link_path) os.symlink(os.path.join(outputdir,project,product), os.path.join(workpath,new_product)) # Prepare for solr crawl_dir=os.path.join(link_path,new_product) now = datetime.now().strftime('%Y-%m-%d_%H%M%S') output = os.path.join(solr_in,'solr_crawl_%s.csv.gz' %(now)) # Solr part with move orgy SolrCore.dump_fs_to_file(crawl_dir, output) shutil.move(os.path.join(solr_in,output),os.path.join(solr_ps,output)) hallo = SolrCore.load_fs_from_file(dump_file=os.path.join(solr_ps,output)) shutil.move(os.path.join(solr_ps,output),os.path.join(solr_bk,output))
def _run(self): # defaults batch_size = self.args.batch_size crawl_dir = self.args.crawl ingest_file = self.args.ingest abort_on_errors = self.DEBUG output = self.args.output solr_url = self.args.solr_url host = None port = None if self.args.solr_url is not None: import re mo = re.match('(?:https?://)?([^:/]{1,})(?::([0-9]{1,}))?(?:/.*|$)', solr_url) if not mo: raise Exception("Cannot understand the solr-url %s" % solr_url) host = mo.group(1) port = int(mo.group(2)) if crawl_dir is None and ingest_file is None: raise CommandError('You must either crawl to generate a dump file or ingest it') # flush stderr in case we have something pending sys.stderr.flush() if host: core_files = SolrCore(core='files', host=host, port=port) core_latest = SolrCore(core='latest', host=host, port=port) if crawl_dir: if not output: raise Exception("You need to dump a file") SolrCore.dump_fs_to_file(crawl_dir, output, batch_size=batch_size, abort_on_errors=abort_on_errors) # create database entry user = User() db = user.getUserDB() UserCrawl.objects.create(status='crawling', path_to_crawl=crawl_dir, user_id=db.getUserId(user.getName()), tar_file=output.split('/')[-1]) elif ingest_file: self.ingest_file = ingest_file from evaluation_system.misc.utils import capture_stdout fn = ingest_file.split('/')[-1] UserCrawl.objects.filter(tar_file=fn).update(status='ingesting') with capture_stdout() as capture: # Ingest the files! if host: SolrCore.load_fs_from_file(dump_file=ingest_file, batch_size=batch_size, abort_on_errors=abort_on_errors, core_all_files=core_files, core_latest=core_latest) else: SolrCore.load_fs_from_file(dump_file=ingest_file, batch_size=batch_size, abort_on_errors=abort_on_errors) print capture.result try: crawl = UserCrawl.objects.get(tar_file=fn) crawl.ingest_msg = crawl.ingest_msg + '\n' + capture.result + '\n\nNow you can find your data using "solr_search"' crawl.status = 'success' crawl.save() except: # pragma nocover pass
def setUp(self): os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname( __file__) + '/test.conf' config.reloadConfiguration() self.solr_port = config.get('solr.port') self.solr_host = config.get('solr.host') # test instances, check they are as expected self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port) self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port) self.assertEquals(self.all_files.status()['index']['numDocs'], 0) self.assertEquals(self.latest.status()['index']['numDocs'], 0)
def setUp(self): os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname( __file__) + '/test.conf' config.reloadConfiguration() self.solr_port = config.get('solr.port') self.solr_host = config.get('solr.host') # test instances, check they are as expected self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port) self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port) self.assertEquals(self.all_files.status()['index']['numDocs'], 0) self.assertEquals(self.latest.status()['index']['numDocs'], 0) # add some files to the cores supermakedirs('/tmp/some_temp_solr_core/', 0777) self.tmpdir = '/tmp/some_temp_solr_core' self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir self.files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] for f in self.files: abs_path = os.path.abspath(os.path.join(self.tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') self.cmd = Command()
def test_dump_to_file(self): tmpdir = tempfile.mkdtemp("_solr_core") files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] for f in files: abs_path = os.path.abspath(os.path.join(tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') dump_file = tmpdir + '/dump1.csv' SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file) self.assertTrue(os.path.isfile(dump_file)) dump_str = open(dump_file, 'r').read() self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str) self.assertTrue(files[0] in dump_str) self.assertTrue(files[1] in dump_str) self.assertTrue(files[2] in dump_str) SolrCore.dump_fs_to_file( tmpdir + '/cmip5/output1/MOHC/HadCM3/historical', dump_file) self.assertTrue(os.path.isfile(dump_file)) dump_str = open(dump_file, 'r').read() self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str) self.assertTrue(files[0] in dump_str) self.assertTrue(files[1] not in dump_str) self.assertTrue(files[2] not in dump_str) # check gzipped creation dump_file += '.gz' SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file) self.assertTrue(os.path.isfile(dump_file)) dump_gzip_header = open(dump_file, 'rb').read(2) gzip_header = '\037\213' self.assertEqual(dump_gzip_header, gzip_header) import gzip dump_str = gzip.open(dump_file, 'rb').read() self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str) self.assertTrue(files[0] in dump_str) self.assertTrue(files[1] in dump_str) self.assertTrue(files[2] in dump_str) if os.path.isdir(tmpdir): shutil.rmtree(tmpdir) pass
class Test(unittest.TestCase): def setUp(self): os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname( __file__) + '/test.conf' config.reloadConfiguration() self.solr_port = config.get('solr.port') self.solr_host = config.get('solr.host') # test instances, check they are as expected self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port) self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port) self.assertEquals(self.all_files.status()['index']['numDocs'], 0) self.assertEquals(self.latest.status()['index']['numDocs'], 0) # add some files to the cores supermakedirs('/tmp/some_temp_solr_core/', 0777) self.tmpdir = '/tmp/some_temp_solr_core' self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir self.files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] for f in self.files: abs_path = os.path.abspath(os.path.join(self.tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') dump_file = self.tmpdir + '/dump1.csv' # add the files to solr SolrCore.dump_fs_to_file(self.tmpdir + '/cmip5', dump_file) SolrCore.load_fs_from_file(dump_file, abort_on_errors=True, core_all_files=self.all_files, core_latest=self.latest) self.cmd = Command() def tearDown(self): self.all_files.delete('*') self.latest.delete('*') DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir if os.path.isdir(self.tmpdir): shutil.rmtree(self.tmpdir) pass def run_command_with_capture(self, args_list=[]): stdout.startCapturing() stdout.reset() self.cmd.run(args_list) stdout.stopCapturing() return stdout.getvalue() def test_search_files(self): all_files_output = u'''/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc /tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc /tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc ''' res = self.run_command_with_capture() self.assertEqual(res, all_files_output) res = self.run_command_with_capture(['variable=ua']) self.assertEqual( res, '/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc\n' ) res = self.run_command_with_capture(['variable=ua', 'variable=tauu']) self.assertEqual( res, """/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc\n/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc\n""" ) res = self.run_command_with_capture( ['variable=ua', 'variable=tauu', 'variable=wetso2']) self.assertEqual(res, all_files_output) # search specific version v = 'v20110419' res = self.run_command_with_capture(['variable=ua', 'version=%s' % v]) self.assertIn(v, res) # test bad input with self.assertRaises(SystemExit): self.assertRaises(CommandError, self.cmd.run(['badoption'])) def test_search_facets(self): all_facets = """cmor_table: aero,amon product: output1 realm: aerosol,atmos data_type: cmip5 institute: mohc project: cmip5 time_frequency: mon experiment: decadal2008,decadal2009,historical variable: tauu,ua,wetso2 model: hadcm3 ensemble: r2i1p1,r7i2p1,r9i3p1 """ res = self.run_command_with_capture(['--all-facets']) self.assertEqual(res, all_facets) res = self.run_command_with_capture(['--facet=variable']) self.assertEqual(res, 'variable: tauu,ua,wetso2\n') res = self.run_command_with_capture( ['--facet=variable', 'experiment=historical']) self.assertEqual(res, 'variable: wetso2\n') res = self.run_command_with_capture( ['--facet=variable', 'facet.limit=2']) self.assertEqual(res, 'variable: tauu,ua...\n') res = self.run_command_with_capture( ['--facet=variable', '--count-facet-values']) self.assertEqual(res, 'variable: tauu (1),ua (1),wetso2 (1)\n') def test_show_attributes(self): res = self.run_command_with_capture(['--attributes']) self.assertEqual( res, 'cmor_table, product, realm, data_type, institute, project, time_frequency, experiment, variable, model, ensemble\n' ) def test_solr_backwards(self): res = self.run_command_with_capture([ '--all-facets', 'file="\/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/\\tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc"' ]) self.assertEqual( res, """cmor_table: amon product: output1 realm: atmos data_type: cmip5 institute: mohc project: cmip5 time_frequency: mon experiment: decadal2008 variable: tauu model: hadcm3 ensemble: r9i3p1 """)
def test_ingest(self): supermakedirs('/tmp/some_temp_solr_core', 0777) tmpdir = '/tmp/some_temp_solr_core' orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = tmpdir files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] latest_versions = [files[0], files[1], files[3]] multiversion_latest = files[3] old_versions = [files[2], files[4]] for f in files: abs_path = os.path.abspath(os.path.join(tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') dump_file = tmpdir + '/dump1.csv' SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file, check=True, abort_on_errors=True) # test instances, check they are as expected SolrCore.load_fs_from_file(dump_file, abort_on_errors=True, core_all_files=self.all_files, core_latest=self.latest) # check ff_all = SolrFindFiles(core='files', host=self.solr_host, port=self.solr_port) ff_latest = SolrFindFiles(core='latest', host=self.solr_host, port=self.solr_port) all_entries = [i for i in ff_all._search()] latest_entries = [i for i in ff_latest._search()] # old version should be only on the general core self.assertTrue(all([tmpdir + '/' + e in all_entries for e in files])) self.assertTrue( all([tmpdir + '/' + e in latest_entries for e in latest_versions])) self.assertTrue( all([tmpdir + '/' + e not in latest_entries for e in old_versions])) # add new version new_version = tmpdir + '/' + 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20120419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' with open(dump_file, 'r') as f: content = f.readlines() content.insert(3, new_version + ',1564083682.09\n') with open(dump_file, "w") as f: contents = "".join(content) f.write(contents) f.close() SolrCore.load_fs_from_file(dump_file, abort_on_errors=True, core_all_files=self.all_files, core_latest=self.latest) self.assertTrue( set(ff_all._search()).symmetric_difference(set(all_entries)).pop() == new_version) self.assertTrue((set(ff_latest._search()) - set(latest_entries)).pop() == new_version) self.assertTrue((set(latest_entries) - set(ff_latest._search())).pop() == tmpdir + '/' + multiversion_latest) # test get_solr_fields (facets) facets = self.all_files.get_solr_fields().keys() print self.all_files.get_solr_fields() facets_to_be = [ 'model', 'product', 'realm', 'version', 'data_type', 'institute', 'file_name', 'creation_time', 'cmor_table', 'time_frequency', 'experiment', 'timestamp', 'file', 'time', 'variable', '_version_', 'file_no_version', 'project', 'ensemble' ] self.assertEqual(facets, facets_to_be) DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = orig_dir
class Test(unittest.TestCase): def setUp(self): os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname( __file__) + '/test.conf' config.reloadConfiguration() self.solr_port = config.get('solr.port') self.solr_host = config.get('solr.host') # test instances, check they are as expected self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port) self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port) self.assertEquals(self.all_files.status()['index']['numDocs'], 0) self.assertEquals(self.latest.status()['index']['numDocs'], 0) def tearDown(self): self.all_files.delete('*') self.latest.delete('*') unittest.TestCase.tearDown(self) def test_dump_to_file(self): tmpdir = tempfile.mkdtemp("_solr_core") files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] for f in files: abs_path = os.path.abspath(os.path.join(tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') dump_file = tmpdir + '/dump1.csv' SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file) self.assertTrue(os.path.isfile(dump_file)) dump_str = open(dump_file, 'r').read() self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str) self.assertTrue(files[0] in dump_str) self.assertTrue(files[1] in dump_str) self.assertTrue(files[2] in dump_str) SolrCore.dump_fs_to_file( tmpdir + '/cmip5/output1/MOHC/HadCM3/historical', dump_file) self.assertTrue(os.path.isfile(dump_file)) dump_str = open(dump_file, 'r').read() self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str) self.assertTrue(files[0] in dump_str) self.assertTrue(files[1] not in dump_str) self.assertTrue(files[2] not in dump_str) # check gzipped creation dump_file += '.gz' SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file) self.assertTrue(os.path.isfile(dump_file)) dump_gzip_header = open(dump_file, 'rb').read(2) gzip_header = '\037\213' self.assertEqual(dump_gzip_header, gzip_header) import gzip dump_str = gzip.open(dump_file, 'rb').read() self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str) self.assertTrue(files[0] in dump_str) self.assertTrue(files[1] in dump_str) self.assertTrue(files[2] in dump_str) if os.path.isdir(tmpdir): shutil.rmtree(tmpdir) pass def test_ingest(self): supermakedirs('/tmp/some_temp_solr_core', 0777) tmpdir = '/tmp/some_temp_solr_core' orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = tmpdir files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] latest_versions = [files[0], files[1], files[3]] multiversion_latest = files[3] old_versions = [files[2], files[4]] for f in files: abs_path = os.path.abspath(os.path.join(tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') dump_file = tmpdir + '/dump1.csv' SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file, check=True, abort_on_errors=True) # test instances, check they are as expected SolrCore.load_fs_from_file(dump_file, abort_on_errors=True, core_all_files=self.all_files, core_latest=self.latest) # check ff_all = SolrFindFiles(core='files', host=self.solr_host, port=self.solr_port) ff_latest = SolrFindFiles(core='latest', host=self.solr_host, port=self.solr_port) all_entries = [i for i in ff_all._search()] latest_entries = [i for i in ff_latest._search()] # old version should be only on the general core self.assertTrue(all([tmpdir + '/' + e in all_entries for e in files])) self.assertTrue( all([tmpdir + '/' + e in latest_entries for e in latest_versions])) self.assertTrue( all([tmpdir + '/' + e not in latest_entries for e in old_versions])) # add new version new_version = tmpdir + '/' + 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20120419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' with open(dump_file, 'r') as f: content = f.readlines() content.insert(3, new_version + ',1564083682.09\n') with open(dump_file, "w") as f: contents = "".join(content) f.write(contents) f.close() SolrCore.load_fs_from_file(dump_file, abort_on_errors=True, core_all_files=self.all_files, core_latest=self.latest) self.assertTrue( set(ff_all._search()).symmetric_difference(set(all_entries)).pop() == new_version) self.assertTrue((set(ff_latest._search()) - set(latest_entries)).pop() == new_version) self.assertTrue((set(latest_entries) - set(ff_latest._search())).pop() == tmpdir + '/' + multiversion_latest) # test get_solr_fields (facets) facets = self.all_files.get_solr_fields().keys() print self.all_files.get_solr_fields() facets_to_be = [ 'model', 'product', 'realm', 'version', 'data_type', 'institute', 'file_name', 'creation_time', 'cmor_table', 'time_frequency', 'experiment', 'timestamp', 'file', 'time', 'variable', '_version_', 'file_no_version', 'project', 'ensemble' ] self.assertEqual(facets, facets_to_be) DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = orig_dir #if os.path.isdir(tmpdir): # shutil.rmtree(tmpdir) # pass def test_reload(self): res = self.all_files.reload() self.assertEqual(['responseHeader'], res.keys()) def test_unload_and_create(self): res = self.all_files.unload() status = self.all_files.status() self.assertEqual({}, status) self.all_files.create() self.assertEqual(len(self.all_files.status()), 9)
class Test(unittest.TestCase): def setUp(self): os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname( __file__) + '/test.conf' config.reloadConfiguration() self.solr_port = config.get('solr.port') self.solr_host = config.get('solr.host') # test instances, check they are as expected self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port) self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port) self.assertEquals(self.all_files.status()['index']['numDocs'], 0) self.assertEquals(self.latest.status()['index']['numDocs'], 0) # add some files to the cores supermakedirs('/tmp/some_temp_solr_core/', 0777) self.tmpdir = '/tmp/some_temp_solr_core' self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir self.files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] for f in self.files: abs_path = os.path.abspath(os.path.join(self.tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') self.cmd = Command() def tearDown(self): self.all_files.delete('*') self.latest.delete('*') UserCrawl.objects.all().delete() DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir if os.path.isdir(self.tmpdir): shutil.rmtree(self.tmpdir) pass def test_command(self): with self.assertRaises(SystemExit): self.cmd.run([]) with self.assertRaises(SystemExit): self.cmd.run(['--crawl=%s/cmip5' % self.tmpdir]) # test crawl dir output = '/tmp/crawl_output.txt' self.cmd.run( ['--crawl=%s/cmip5' % self.tmpdir, '--output=%s' % output]) self.assertTrue(os.path.isfile(output)) crawl_obj = UserCrawl.objects.get(tar_file=output.split('/')[-1]) self.assertEqual(crawl_obj.status, 'crawling') # test ingesting self.assertEqual(len(list(SolrFindFiles.search())), 0) self.cmd.run(['--ingest=%s' % output]) crawl_obj = UserCrawl.objects.get(tar_file=output.split('/')[-1]) self.assertEqual(crawl_obj.status, 'success') self.assertEqual(len(list(SolrFindFiles.search())), 3) # test custom host and port self.cmd.run([ '--ingest=%s' % output, '--solr-url=http://%s:%s' % (self.solr_host, self.solr_port) ]) self.assertEqual(len(list(SolrFindFiles.search(latest_version=False))), 5) os.remove(output)
class Test(unittest.TestCase): def setUp(self): os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(__file__) + '/test.conf' config.reloadConfiguration() self.solr_port = config.get('solr.port') self.solr_host = config.get('solr.host') # test instances, check they are as expected self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port) self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port) self.assertEquals(self.all_files.status()['index']['numDocs'], 0) self.assertEquals(self.latest.status()['index']['numDocs'], 0) # add some files to the cores supermakedirs('/tmp/some_temp_solr_core/', 0777) self.tmpdir = '/tmp/some_temp_solr_core' self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir self.files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'] for f in self.files: abs_path = os.path.abspath(os.path.join(self.tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') dump_file = self.tmpdir + '/dump1.csv' # add the files to solr SolrCore.dump_fs_to_file(self.tmpdir + '/cmip5', dump_file) SolrCore.load_fs_from_file( dump_file, abort_on_errors=True, core_all_files=self.all_files, core_latest=self.latest ) def tearDown(self): self.all_files.delete('*') self.latest.delete('*') DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir if os.path.isdir(self.tmpdir): shutil.rmtree(self.tmpdir) pass def test_solr_search(self): # search some files solr_search = SolrFindFiles() all_files = solr_search.search() self.assertEqual(len(list(all_files)), 3) hist = solr_search.search(experiment='historical') self.assertEqual(list(hist), [os.path.join(self.tmpdir, self.files[0])]) all_files = solr_search.search(latest_version=False) self.assertEqual(len(list(all_files)), 5) # test OR query or_result = solr_search.search(variable=['tauu', 'wetso2']) self.assertEqual(set([os.path.join(self.tmpdir, e) for e in self.files[:2]]), set(or_result)) def test_facet_search(self): factes_to_be = {'cmor_table': ['aero', 1, 'amon', 2], 'product': ['output1', 3], 'realm': ['aerosol', 1, 'atmos', 2], 'data_type': ['cmip5', 3], 'institute': ['mohc', 3], 'project': ['cmip5', 3], 'time_frequency': ['mon', 3], 'experiment': ['decadal2008', 1, 'decadal2009', 1, 'historical', 1], 'variable': ['tauu', 1, 'ua', 1, 'wetso2', 1], 'model': ['hadcm3', 3], 'ensemble': ['r2i1p1', 1, 'r7i2p1', 1, 'r9i3p1', 1]} s = SolrFindFiles all_factes = s.facets() self.assertEqual(len(all_factes), 11) self.assertEqual(all_factes, factes_to_be) var_facets = s.facets(facets=['variable']) self.assertEqual(var_facets, dict(variable=factes_to_be['variable'])) experiment_facets = s.facets(facets='experiment', cmor_table='amon') self.assertEqual(experiment_facets, {'experiment': ['decadal2008', 1, 'decadal2009', 1]}) # test files core res = s.facets(facets='variable,project', latest_version=False) self.assertEqual(res.keys(), ['variable', 'project']) self.assertEqual(res, {'variable': ['tauu', 1, 'ua', 3, 'wetso2', 1], 'project': ['cmip5', 5]})
class Test(unittest.TestCase): def setUp(self): os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname( __file__) + '/test.conf' config.reloadConfiguration() self.solr_port = config.get('solr.port') self.solr_host = config.get('solr.host') # test instances, check they are as expected self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port) self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port) self.assertEquals(self.all_files.status()['index']['numDocs'], 0) self.assertEquals(self.latest.status()['index']['numDocs'], 0) # add some files to the cores supermakedirs('/tmp/some_temp_solr_core/', 0777) self.tmpdir = '/tmp/some_temp_solr_core' self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir self.files = [ 'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc', 'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc', 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc' ] for f in self.files: abs_path = os.path.abspath(os.path.join(self.tmpdir, f)) try: os.makedirs(os.path.dirname(abs_path)) except: # pragma nocover pass with open(abs_path, 'w') as f_out: f_out.write(' ') dump_file = self.tmpdir + '/dump1.csv' # add the files to solr SolrCore.dump_fs_to_file(self.tmpdir + '/cmip5', dump_file) SolrCore.load_fs_from_file(dump_file, abort_on_errors=True, core_all_files=self.all_files, core_latest=self.latest) self.fn = os.path.join(self.tmpdir, self.files[0]) self.drs = DRSFile.from_path(self.fn) def tearDown(self): self.all_files.delete('*') self.latest.delete('*') DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir if os.path.isdir(self.tmpdir): shutil.rmtree(self.tmpdir) pass def test_solr_search(self): # test path_only search res = DRSFile.solr_search(path_only=True, variable='tauu') self.assertEqual(list(res), [ u'/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc' ]) # test drs search res = DRSFile.solr_search(variable='ua') for i in res: self.assertTrue(isinstance(i, DRSFile)) # use drs_structure res = DRSFile.solr_search(drs_structure=CMIP5) for j, i in enumerate(res): self.assertTrue(isinstance(i, DRSFile)) self.assertEqual(j + 1, 3) def test_compare(self): fn2 = os.path.join(self.tmpdir, self.files[1]) drs2 = DRSFile.from_path(fn2) self.assertTrue(self.drs == self.drs) self.assertFalse(self.drs == drs2) self.assertFalse(drs2 == fn2) def test_json_path(self): j = self.drs.to_json() self.assertTrue(isinstance(j, str)) path = self.drs.to_path() self.assertEqual(path, self.fn) def test_find_structure_in_path(self): s = DRSFile.find_structure_in_path('/tmp/some_temp_solr_core/cmip5') self.assertEqual(s, 'cmip5') s = DRSFile.find_structure_in_path('/tmp/some_temp_solr_core/cmip5', allow_multiples=True) self.assertEqual(s, ['cmip5']) self.assertRaises(Exception, DRSFile.find_structure_in_path, '/no/valid/path') def test_structure_from_path(self): s = DRSFile.find_structure_from_path(self.fn) self.assertEqual(s, 'cmip5') s = DRSFile.find_structure_from_path(self.fn, allow_multiples=True) self.assertEqual(s, ['cmip5']) self.assertRaises(Exception, DRSFile.find_structure_from_path, '/no/valid/file_path') def test_from_dict(self): d = self.drs.dict t = DRSFile.from_dict(d, CMIP5) self.assertTrue(isinstance(t, DRSFile)) self.assertEqual(self.drs.to_path(), t.to_path()) def test_from_json(self): j = self.drs.to_json() t = DRSFile.from_json(j, CMIP5) self.assertTrue(isinstance(t, DRSFile)) self.assertEqual(self.drs.to_path(), t.to_path()) def test_to_dataset(self): res = self.drs.to_dataset_path(versioned=True) self.assertIn('/'.join(self.files[0].split('/')[:-1]), res)