Example #1
0
    def linkmydata(self,outputdir=None):
        """Link the CMOR Data Structure of any output created by a tool
           crawl the directory and ingest the directory with solr::
            :param outputdir: cmor outputdir that where created by the tool.
            :return: nothing
        """
        user = self._user
        workpath  = os.path.join(user.getUserBaseDir(),'CMOR4LINK')
        rootpath  = config.get('project_data')
        solr_in   = config.get('solr.incoming')
        solr_bk   = config.get('solr.backup')
        solr_ps   = config.get('solr.processing')
        
        # look for tool in tool
        toolintool = re.compile(r'^((?P<tool>[\w%]+)%(\d+|none)%(?P<project>[\w_]+)%(?P<product>[\w_]+)$)')
        # Maybe os.walk for multiple projects or products
        if len(os.listdir(outputdir)) == 1:
            project = os.listdir(outputdir)[0]
            # link?
        if len(os.listdir(os.path.join(outputdir,project))) == 1:
            product = os.listdir(os.path.join(outputdir,project))[0]
        new_product = '%s.%s.%s.%s' % (self.__class__.__name__.lower(),self.rowid,project,product)
        if re.match(toolintool,product):
            nproduct = re.match(toolintool,product).group('product')
            nproject = re.match(toolintool,product).group('project')
            ntool    = '.%s' % re.match(toolintool,product).group('tool')
            new_product = '%s.s%s.%s.%s' % (self.__class__.__name__.lower(),ntool,self.rowid,nproject,nproduct)

        # Link section
        link_path = os.path.join(rootpath,'user-' + user.getName())
        if os.path.islink(link_path):
            if not os.path.exists(link_path):
                os.unlink(link_path)
                os.symlink(workpath, os.path.join(link_path))
                if not os.path.isdir(workpath): os.makedirs(workpath)
            workpath = os.path.join(os.path.dirname(link_path), os.readlink(link_path))
        else:
           if not os.path.isdir(workpath): os.makedirs(workpath)
           os.symlink(workpath, link_path)
        os.symlink(os.path.join(outputdir,project,product), os.path.join(workpath,new_product))
        
        # Prepare for solr
        crawl_dir=os.path.join(link_path,new_product)
        now = datetime.now().strftime('%Y-%m-%d_%H%M%S')
        output = os.path.join(solr_in,'solr_crawl_%s.csv.gz' %(now))
        
        # Solr part with move orgy
        SolrCore.dump_fs_to_file(crawl_dir, output)
        shutil.move(os.path.join(solr_in,output),os.path.join(solr_ps,output))
        hallo = SolrCore.load_fs_from_file(dump_file=os.path.join(solr_ps,output))
        shutil.move(os.path.join(solr_ps,output),os.path.join(solr_bk,output))
Example #2
0
    def _run(self):
        # defaults
        batch_size = self.args.batch_size
        crawl_dir = self.args.crawl
        ingest_file = self.args.ingest
        abort_on_errors = self.DEBUG
        output = self.args.output
        solr_url = self.args.solr_url

        host = None
        port = None
        if self.args.solr_url is not None:
            import re
            mo = re.match('(?:https?://)?([^:/]{1,})(?::([0-9]{1,}))?(?:/.*|$)', solr_url)
            if not mo:
                raise Exception("Cannot understand the solr-url %s" % solr_url)
            host = mo.group(1)
            port = int(mo.group(2))
        
        if crawl_dir is None and ingest_file is None:
            raise CommandError('You must either crawl to generate a dump file or ingest it')
         
        # flush stderr in case we have something pending
        sys.stderr.flush()
        
        if host:
            core_files = SolrCore(core='files', host=host, port=port)
            core_latest = SolrCore(core='latest', host=host, port=port)

        if crawl_dir:
            if not output:
                raise Exception("You need to dump a file")
            SolrCore.dump_fs_to_file(crawl_dir, output, batch_size=batch_size, abort_on_errors=abort_on_errors)
            # create database entry
            user = User()
            db = user.getUserDB()
            UserCrawl.objects.create(status='crawling', path_to_crawl=crawl_dir, user_id=db.getUserId(user.getName()),
                                     tar_file=output.split('/')[-1])
        elif ingest_file:
            self.ingest_file = ingest_file
            from evaluation_system.misc.utils import capture_stdout
            fn = ingest_file.split('/')[-1]
            UserCrawl.objects.filter(tar_file=fn).update(status='ingesting')
            with capture_stdout() as capture:
                # Ingest the files!
                if host:
                    SolrCore.load_fs_from_file(dump_file=ingest_file, batch_size=batch_size,
                                               abort_on_errors=abort_on_errors, core_all_files=core_files,
                                               core_latest=core_latest)
                else:
                    SolrCore.load_fs_from_file(dump_file=ingest_file, batch_size=batch_size,
                                               abort_on_errors=abort_on_errors)
            print capture.result
            try:
                crawl = UserCrawl.objects.get(tar_file=fn)
                crawl.ingest_msg = crawl.ingest_msg + '\n' + capture.result + '\n\nNow you can find your data using "solr_search"'
                crawl.status = 'success'
                crawl.save()
            except:  # pragma nocover
                pass       
Example #3
0
 def setUp(self):
     os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(
         __file__) + '/test.conf'
     config.reloadConfiguration()
     self.solr_port = config.get('solr.port')
     self.solr_host = config.get('solr.host')
     # test instances, check they are as expected
     self.all_files = SolrCore(core='files',
                               host=self.solr_host,
                               port=self.solr_port)
     self.latest = SolrCore(core='latest',
                            host=self.solr_host,
                            port=self.solr_port)
     self.assertEquals(self.all_files.status()['index']['numDocs'], 0)
     self.assertEquals(self.latest.status()['index']['numDocs'], 0)
Example #4
0
    def setUp(self):
        os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(
            __file__) + '/test.conf'
        config.reloadConfiguration()
        self.solr_port = config.get('solr.port')
        self.solr_host = config.get('solr.host')
        # test instances, check they are as expected
        self.all_files = SolrCore(core='files',
                                  host=self.solr_host,
                                  port=self.solr_port)
        self.latest = SolrCore(core='latest',
                               host=self.solr_host,
                               port=self.solr_port)
        self.assertEquals(self.all_files.status()['index']['numDocs'], 0)
        self.assertEquals(self.latest.status()['index']['numDocs'], 0)

        # add some files to the cores
        supermakedirs('/tmp/some_temp_solr_core/', 0777)
        self.tmpdir = '/tmp/some_temp_solr_core'
        self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir']
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir

        self.files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        for f in self.files:
            abs_path = os.path.abspath(os.path.join(self.tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')
        self.cmd = Command()
Example #5
0
    def test_dump_to_file(self):
        tmpdir = tempfile.mkdtemp("_solr_core")

        files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        for f in files:
            abs_path = os.path.abspath(os.path.join(tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')

        dump_file = tmpdir + '/dump1.csv'
        SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file)

        self.assertTrue(os.path.isfile(dump_file))
        dump_str = open(dump_file, 'r').read()
        self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str)
        self.assertTrue(files[0] in dump_str)
        self.assertTrue(files[1] in dump_str)
        self.assertTrue(files[2] in dump_str)

        SolrCore.dump_fs_to_file(
            tmpdir + '/cmip5/output1/MOHC/HadCM3/historical', dump_file)

        self.assertTrue(os.path.isfile(dump_file))
        dump_str = open(dump_file, 'r').read()
        self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str)
        self.assertTrue(files[0] in dump_str)
        self.assertTrue(files[1] not in dump_str)
        self.assertTrue(files[2] not in dump_str)

        # check gzipped creation
        dump_file += '.gz'
        SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file)
        self.assertTrue(os.path.isfile(dump_file))
        dump_gzip_header = open(dump_file, 'rb').read(2)
        gzip_header = '\037\213'
        self.assertEqual(dump_gzip_header, gzip_header)
        import gzip
        dump_str = gzip.open(dump_file, 'rb').read()
        self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str)
        self.assertTrue(files[0] in dump_str)
        self.assertTrue(files[1] in dump_str)
        self.assertTrue(files[2] in dump_str)

        if os.path.isdir(tmpdir):
            shutil.rmtree(tmpdir)
            pass
Example #6
0
class Test(unittest.TestCase):
    def setUp(self):
        os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(
            __file__) + '/test.conf'
        config.reloadConfiguration()
        self.solr_port = config.get('solr.port')
        self.solr_host = config.get('solr.host')
        # test instances, check they are as expected
        self.all_files = SolrCore(core='files',
                                  host=self.solr_host,
                                  port=self.solr_port)
        self.latest = SolrCore(core='latest',
                               host=self.solr_host,
                               port=self.solr_port)
        self.assertEquals(self.all_files.status()['index']['numDocs'], 0)
        self.assertEquals(self.latest.status()['index']['numDocs'], 0)

        # add some files to the cores
        supermakedirs('/tmp/some_temp_solr_core/', 0777)
        self.tmpdir = '/tmp/some_temp_solr_core'
        self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir']
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir

        self.files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        for f in self.files:
            abs_path = os.path.abspath(os.path.join(self.tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')
        dump_file = self.tmpdir + '/dump1.csv'
        # add the files to solr
        SolrCore.dump_fs_to_file(self.tmpdir + '/cmip5', dump_file)
        SolrCore.load_fs_from_file(dump_file,
                                   abort_on_errors=True,
                                   core_all_files=self.all_files,
                                   core_latest=self.latest)

        self.cmd = Command()

    def tearDown(self):
        self.all_files.delete('*')
        self.latest.delete('*')

        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir
        if os.path.isdir(self.tmpdir):
            shutil.rmtree(self.tmpdir)
            pass

    def run_command_with_capture(self, args_list=[]):

        stdout.startCapturing()
        stdout.reset()
        self.cmd.run(args_list)
        stdout.stopCapturing()
        return stdout.getvalue()

    def test_search_files(self):

        all_files_output = u'''/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc
/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc
/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc
'''
        res = self.run_command_with_capture()
        self.assertEqual(res, all_files_output)

        res = self.run_command_with_capture(['variable=ua'])
        self.assertEqual(
            res,
            '/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc\n'
        )

        res = self.run_command_with_capture(['variable=ua', 'variable=tauu'])
        self.assertEqual(
            res,
            """/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc\n/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc\n"""
        )

        res = self.run_command_with_capture(
            ['variable=ua', 'variable=tauu', 'variable=wetso2'])
        self.assertEqual(res, all_files_output)

        # search specific version
        v = 'v20110419'
        res = self.run_command_with_capture(['variable=ua', 'version=%s' % v])
        self.assertIn(v, res)

        # test bad input
        with self.assertRaises(SystemExit):
            self.assertRaises(CommandError, self.cmd.run(['badoption']))

    def test_search_facets(self):
        all_facets = """cmor_table: aero,amon
product: output1
realm: aerosol,atmos
data_type: cmip5
institute: mohc
project: cmip5
time_frequency: mon
experiment: decadal2008,decadal2009,historical
variable: tauu,ua,wetso2
model: hadcm3
ensemble: r2i1p1,r7i2p1,r9i3p1
"""
        res = self.run_command_with_capture(['--all-facets'])
        self.assertEqual(res, all_facets)

        res = self.run_command_with_capture(['--facet=variable'])
        self.assertEqual(res, 'variable: tauu,ua,wetso2\n')

        res = self.run_command_with_capture(
            ['--facet=variable', 'experiment=historical'])
        self.assertEqual(res, 'variable: wetso2\n')

        res = self.run_command_with_capture(
            ['--facet=variable', 'facet.limit=2'])
        self.assertEqual(res, 'variable: tauu,ua...\n')

        res = self.run_command_with_capture(
            ['--facet=variable', '--count-facet-values'])
        self.assertEqual(res, 'variable: tauu (1),ua (1),wetso2 (1)\n')

    def test_show_attributes(self):
        res = self.run_command_with_capture(['--attributes'])
        self.assertEqual(
            res,
            'cmor_table, product, realm, data_type, institute, project, time_frequency, experiment, variable, model, ensemble\n'
        )

    def test_solr_backwards(self):
        res = self.run_command_with_capture([
            '--all-facets',
            'file="\/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/\\tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc"'
        ])
        self.assertEqual(
            res, """cmor_table: amon
product: output1
realm: atmos
data_type: cmip5
institute: mohc
project: cmip5
time_frequency: mon
experiment: decadal2008
variable: tauu
model: hadcm3
ensemble: r9i3p1
""")
Example #7
0
    def test_ingest(self):
        supermakedirs('/tmp/some_temp_solr_core', 0777)
        tmpdir = '/tmp/some_temp_solr_core'
        orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir']
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = tmpdir

        files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        latest_versions = [files[0], files[1], files[3]]
        multiversion_latest = files[3]
        old_versions = [files[2], files[4]]

        for f in files:
            abs_path = os.path.abspath(os.path.join(tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')

        dump_file = tmpdir + '/dump1.csv'
        SolrCore.dump_fs_to_file(tmpdir + '/cmip5',
                                 dump_file,
                                 check=True,
                                 abort_on_errors=True)
        # test instances, check they are as expected
        SolrCore.load_fs_from_file(dump_file,
                                   abort_on_errors=True,
                                   core_all_files=self.all_files,
                                   core_latest=self.latest)

        # check
        ff_all = SolrFindFiles(core='files',
                               host=self.solr_host,
                               port=self.solr_port)
        ff_latest = SolrFindFiles(core='latest',
                                  host=self.solr_host,
                                  port=self.solr_port)
        all_entries = [i for i in ff_all._search()]
        latest_entries = [i for i in ff_latest._search()]
        # old version should be only on the general core
        self.assertTrue(all([tmpdir + '/' + e in all_entries for e in files]))
        self.assertTrue(
            all([tmpdir + '/' + e in latest_entries for e in latest_versions]))
        self.assertTrue(
            all([tmpdir + '/' + e not in latest_entries
                 for e in old_versions]))

        # add new version
        new_version = tmpdir + '/' + 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20120419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        with open(dump_file, 'r') as f:
            content = f.readlines()
        content.insert(3, new_version + ',1564083682.09\n')
        with open(dump_file, "w") as f:
            contents = "".join(content)
            f.write(contents)
            f.close()

        SolrCore.load_fs_from_file(dump_file,
                                   abort_on_errors=True,
                                   core_all_files=self.all_files,
                                   core_latest=self.latest)

        self.assertTrue(
            set(ff_all._search()).symmetric_difference(set(all_entries)).pop()
            == new_version)
        self.assertTrue((set(ff_latest._search()) -
                         set(latest_entries)).pop() == new_version)
        self.assertTrue((set(latest_entries) -
                         set(ff_latest._search())).pop() == tmpdir + '/' +
                        multiversion_latest)

        # test get_solr_fields (facets)
        facets = self.all_files.get_solr_fields().keys()
        print self.all_files.get_solr_fields()
        facets_to_be = [
            'model', 'product', 'realm', 'version', 'data_type', 'institute',
            'file_name', 'creation_time', 'cmor_table', 'time_frequency',
            'experiment', 'timestamp', 'file', 'time', 'variable', '_version_',
            'file_no_version', 'project', 'ensemble'
        ]
        self.assertEqual(facets, facets_to_be)

        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = orig_dir
Example #8
0
class Test(unittest.TestCase):
    def setUp(self):
        os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(
            __file__) + '/test.conf'
        config.reloadConfiguration()
        self.solr_port = config.get('solr.port')
        self.solr_host = config.get('solr.host')
        # test instances, check they are as expected
        self.all_files = SolrCore(core='files',
                                  host=self.solr_host,
                                  port=self.solr_port)
        self.latest = SolrCore(core='latest',
                               host=self.solr_host,
                               port=self.solr_port)
        self.assertEquals(self.all_files.status()['index']['numDocs'], 0)
        self.assertEquals(self.latest.status()['index']['numDocs'], 0)

    def tearDown(self):
        self.all_files.delete('*')
        self.latest.delete('*')
        unittest.TestCase.tearDown(self)

    def test_dump_to_file(self):
        tmpdir = tempfile.mkdtemp("_solr_core")

        files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        for f in files:
            abs_path = os.path.abspath(os.path.join(tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')

        dump_file = tmpdir + '/dump1.csv'
        SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file)

        self.assertTrue(os.path.isfile(dump_file))
        dump_str = open(dump_file, 'r').read()
        self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str)
        self.assertTrue(files[0] in dump_str)
        self.assertTrue(files[1] in dump_str)
        self.assertTrue(files[2] in dump_str)

        SolrCore.dump_fs_to_file(
            tmpdir + '/cmip5/output1/MOHC/HadCM3/historical', dump_file)

        self.assertTrue(os.path.isfile(dump_file))
        dump_str = open(dump_file, 'r').read()
        self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str)
        self.assertTrue(files[0] in dump_str)
        self.assertTrue(files[1] not in dump_str)
        self.assertTrue(files[2] not in dump_str)

        # check gzipped creation
        dump_file += '.gz'
        SolrCore.dump_fs_to_file(tmpdir + '/cmip5', dump_file)
        self.assertTrue(os.path.isfile(dump_file))
        dump_gzip_header = open(dump_file, 'rb').read(2)
        gzip_header = '\037\213'
        self.assertEqual(dump_gzip_header, gzip_header)
        import gzip
        dump_str = gzip.open(dump_file, 'rb').read()
        self.assertTrue('%s\t%s' % (META_DATA.CRAWL_DIR, tmpdir) in dump_str)
        self.assertTrue(files[0] in dump_str)
        self.assertTrue(files[1] in dump_str)
        self.assertTrue(files[2] in dump_str)

        if os.path.isdir(tmpdir):
            shutil.rmtree(tmpdir)
            pass

    def test_ingest(self):
        supermakedirs('/tmp/some_temp_solr_core', 0777)
        tmpdir = '/tmp/some_temp_solr_core'
        orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir']
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = tmpdir

        files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        latest_versions = [files[0], files[1], files[3]]
        multiversion_latest = files[3]
        old_versions = [files[2], files[4]]

        for f in files:
            abs_path = os.path.abspath(os.path.join(tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')

        dump_file = tmpdir + '/dump1.csv'
        SolrCore.dump_fs_to_file(tmpdir + '/cmip5',
                                 dump_file,
                                 check=True,
                                 abort_on_errors=True)
        # test instances, check they are as expected
        SolrCore.load_fs_from_file(dump_file,
                                   abort_on_errors=True,
                                   core_all_files=self.all_files,
                                   core_latest=self.latest)

        # check
        ff_all = SolrFindFiles(core='files',
                               host=self.solr_host,
                               port=self.solr_port)
        ff_latest = SolrFindFiles(core='latest',
                                  host=self.solr_host,
                                  port=self.solr_port)
        all_entries = [i for i in ff_all._search()]
        latest_entries = [i for i in ff_latest._search()]
        # old version should be only on the general core
        self.assertTrue(all([tmpdir + '/' + e in all_entries for e in files]))
        self.assertTrue(
            all([tmpdir + '/' + e in latest_entries for e in latest_versions]))
        self.assertTrue(
            all([tmpdir + '/' + e not in latest_entries
                 for e in old_versions]))

        # add new version
        new_version = tmpdir + '/' + 'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20120419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        with open(dump_file, 'r') as f:
            content = f.readlines()
        content.insert(3, new_version + ',1564083682.09\n')
        with open(dump_file, "w") as f:
            contents = "".join(content)
            f.write(contents)
            f.close()

        SolrCore.load_fs_from_file(dump_file,
                                   abort_on_errors=True,
                                   core_all_files=self.all_files,
                                   core_latest=self.latest)

        self.assertTrue(
            set(ff_all._search()).symmetric_difference(set(all_entries)).pop()
            == new_version)
        self.assertTrue((set(ff_latest._search()) -
                         set(latest_entries)).pop() == new_version)
        self.assertTrue((set(latest_entries) -
                         set(ff_latest._search())).pop() == tmpdir + '/' +
                        multiversion_latest)

        # test get_solr_fields (facets)
        facets = self.all_files.get_solr_fields().keys()
        print self.all_files.get_solr_fields()
        facets_to_be = [
            'model', 'product', 'realm', 'version', 'data_type', 'institute',
            'file_name', 'creation_time', 'cmor_table', 'time_frequency',
            'experiment', 'timestamp', 'file', 'time', 'variable', '_version_',
            'file_no_version', 'project', 'ensemble'
        ]
        self.assertEqual(facets, facets_to_be)

        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = orig_dir
        #if os.path.isdir(tmpdir):
        #    shutil.rmtree(tmpdir)
        #    pass

    def test_reload(self):
        res = self.all_files.reload()
        self.assertEqual(['responseHeader'], res.keys())

    def test_unload_and_create(self):

        res = self.all_files.unload()
        status = self.all_files.status()
        self.assertEqual({}, status)
        self.all_files.create()
        self.assertEqual(len(self.all_files.status()), 9)
Example #9
0
class Test(unittest.TestCase):
    def setUp(self):
        os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(
            __file__) + '/test.conf'
        config.reloadConfiguration()
        self.solr_port = config.get('solr.port')
        self.solr_host = config.get('solr.host')
        # test instances, check they are as expected
        self.all_files = SolrCore(core='files',
                                  host=self.solr_host,
                                  port=self.solr_port)
        self.latest = SolrCore(core='latest',
                               host=self.solr_host,
                               port=self.solr_port)
        self.assertEquals(self.all_files.status()['index']['numDocs'], 0)
        self.assertEquals(self.latest.status()['index']['numDocs'], 0)

        # add some files to the cores
        supermakedirs('/tmp/some_temp_solr_core/', 0777)
        self.tmpdir = '/tmp/some_temp_solr_core'
        self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir']
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir

        self.files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        for f in self.files:
            abs_path = os.path.abspath(os.path.join(self.tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')
        self.cmd = Command()

    def tearDown(self):
        self.all_files.delete('*')
        self.latest.delete('*')
        UserCrawl.objects.all().delete()
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir
        if os.path.isdir(self.tmpdir):
            shutil.rmtree(self.tmpdir)
            pass

    def test_command(self):

        with self.assertRaises(SystemExit):
            self.cmd.run([])

        with self.assertRaises(SystemExit):
            self.cmd.run(['--crawl=%s/cmip5' % self.tmpdir])

        # test crawl dir
        output = '/tmp/crawl_output.txt'
        self.cmd.run(
            ['--crawl=%s/cmip5' % self.tmpdir,
             '--output=%s' % output])
        self.assertTrue(os.path.isfile(output))
        crawl_obj = UserCrawl.objects.get(tar_file=output.split('/')[-1])
        self.assertEqual(crawl_obj.status, 'crawling')
        # test ingesting
        self.assertEqual(len(list(SolrFindFiles.search())), 0)
        self.cmd.run(['--ingest=%s' % output])
        crawl_obj = UserCrawl.objects.get(tar_file=output.split('/')[-1])
        self.assertEqual(crawl_obj.status, 'success')
        self.assertEqual(len(list(SolrFindFiles.search())), 3)

        # test custom host and port
        self.cmd.run([
            '--ingest=%s' % output,
            '--solr-url=http://%s:%s' % (self.solr_host, self.solr_port)
        ])
        self.assertEqual(len(list(SolrFindFiles.search(latest_version=False))),
                         5)

        os.remove(output)
Example #10
0
class Test(unittest.TestCase):
    def setUp(self):
        os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(__file__) + '/test.conf'
        config.reloadConfiguration()
        self.solr_port = config.get('solr.port')
        self.solr_host = config.get('solr.host')
        # test instances, check they are as expected
        self.all_files = SolrCore(core='files', host=self.solr_host, port=self.solr_port)
        self.latest = SolrCore(core='latest', host=self.solr_host, port=self.solr_port)
        self.assertEquals(self.all_files.status()['index']['numDocs'], 0)
        self.assertEquals(self.latest.status()['index']['numDocs'], 0)

        # add some files to the cores
        supermakedirs('/tmp/some_temp_solr_core/', 0777)
        self.tmpdir = '/tmp/some_temp_solr_core'
        self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir']
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir

        self.files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc']
        for f in self.files:
            abs_path = os.path.abspath(os.path.join(self.tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')
        dump_file = self.tmpdir + '/dump1.csv'
        # add the files to solr
        SolrCore.dump_fs_to_file(self.tmpdir + '/cmip5', dump_file)
        SolrCore.load_fs_from_file(
            dump_file, abort_on_errors=True,
            core_all_files=self.all_files, core_latest=self.latest
        )

    def tearDown(self):
        self.all_files.delete('*')
        self.latest.delete('*')

        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir
        if os.path.isdir(self.tmpdir):
            shutil.rmtree(self.tmpdir)
            pass

    def test_solr_search(self):
        # search some files
        solr_search = SolrFindFiles()
        all_files = solr_search.search()
        self.assertEqual(len(list(all_files)), 3)
        hist = solr_search.search(experiment='historical')
        self.assertEqual(list(hist), [os.path.join(self.tmpdir, self.files[0])])
        all_files = solr_search.search(latest_version=False)
        self.assertEqual(len(list(all_files)), 5)
        # test OR query
        or_result = solr_search.search(variable=['tauu', 'wetso2'])
        self.assertEqual(set([os.path.join(self.tmpdir, e) for e in self.files[:2]]), set(or_result))

    def test_facet_search(self):

        factes_to_be = {'cmor_table': ['aero', 1, 'amon', 2], 'product': ['output1', 3],
                        'realm': ['aerosol', 1, 'atmos', 2], 'data_type': ['cmip5', 3],
                        'institute': ['mohc', 3], 'project': ['cmip5', 3], 'time_frequency': ['mon', 3],
                        'experiment': ['decadal2008', 1, 'decadal2009', 1, 'historical', 1],
                        'variable': ['tauu', 1, 'ua', 1, 'wetso2', 1], 'model': ['hadcm3', 3],
                        'ensemble': ['r2i1p1', 1, 'r7i2p1', 1, 'r9i3p1', 1]}
        s = SolrFindFiles
        all_factes = s.facets()
        self.assertEqual(len(all_factes), 11)
        self.assertEqual(all_factes, factes_to_be)

        var_facets = s.facets(facets=['variable'])
        self.assertEqual(var_facets, dict(variable=factes_to_be['variable']))
        experiment_facets = s.facets(facets='experiment', cmor_table='amon')
        self.assertEqual(experiment_facets, {'experiment': ['decadal2008', 1, 'decadal2009', 1]})

        # test files core
        res = s.facets(facets='variable,project', latest_version=False)
        self.assertEqual(res.keys(), ['variable', 'project'])
        self.assertEqual(res, {'variable': ['tauu', 1, 'ua', 3, 'wetso2', 1], 'project': ['cmip5', 5]})
Example #11
0
class Test(unittest.TestCase):
    def setUp(self):
        os.environ['EVALUATION_SYSTEM_CONFIG_FILE'] = os.path.dirname(
            __file__) + '/test.conf'
        config.reloadConfiguration()
        self.solr_port = config.get('solr.port')
        self.solr_host = config.get('solr.host')
        # test instances, check they are as expected
        self.all_files = SolrCore(core='files',
                                  host=self.solr_host,
                                  port=self.solr_port)
        self.latest = SolrCore(core='latest',
                               host=self.solr_host,
                               port=self.solr_port)
        self.assertEquals(self.all_files.status()['index']['numDocs'], 0)
        self.assertEquals(self.latest.status()['index']['numDocs'], 0)

        # add some files to the cores
        supermakedirs('/tmp/some_temp_solr_core/', 0777)
        self.tmpdir = '/tmp/some_temp_solr_core'
        self.orig_dir = DRSFile.DRS_STRUCTURE[CMIP5]['root_dir']
        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.tmpdir

        self.files = [
            'cmip5/output1/MOHC/HadCM3/historical/mon/aerosol/aero/r2i1p1/v20110728/wetso2/wetso2_aero_HadCM3_historical_r2i1p1_190912-193411.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110719/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110819/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc',
            'cmip5/output1/MOHC/HadCM3/decadal2009/mon/atmos/Amon/r7i2p1/v20110419/ua/ua_Amon_HadCM3_decadal2009_r7i2p1_200911-201912.nc'
        ]
        for f in self.files:
            abs_path = os.path.abspath(os.path.join(self.tmpdir, f))
            try:
                os.makedirs(os.path.dirname(abs_path))
            except:  # pragma nocover
                pass
            with open(abs_path, 'w') as f_out:
                f_out.write(' ')
        dump_file = self.tmpdir + '/dump1.csv'
        # add the files to solr
        SolrCore.dump_fs_to_file(self.tmpdir + '/cmip5', dump_file)
        SolrCore.load_fs_from_file(dump_file,
                                   abort_on_errors=True,
                                   core_all_files=self.all_files,
                                   core_latest=self.latest)

        self.fn = os.path.join(self.tmpdir, self.files[0])
        self.drs = DRSFile.from_path(self.fn)

    def tearDown(self):
        self.all_files.delete('*')
        self.latest.delete('*')

        DRSFile.DRS_STRUCTURE[CMIP5]['root_dir'] = self.orig_dir
        if os.path.isdir(self.tmpdir):
            shutil.rmtree(self.tmpdir)
            pass

    def test_solr_search(self):

        # test path_only search
        res = DRSFile.solr_search(path_only=True, variable='tauu')
        self.assertEqual(list(res), [
            u'/tmp/some_temp_solr_core/cmip5/output1/MOHC/HadCM3/decadal2008/mon/atmos/Amon/r9i3p1/v20120523/tauu/tauu_Amon_HadCM3_decadal2008_r9i3p1_200811-201812.nc'
        ])

        # test drs search
        res = DRSFile.solr_search(variable='ua')
        for i in res:
            self.assertTrue(isinstance(i, DRSFile))

        # use drs_structure
        res = DRSFile.solr_search(drs_structure=CMIP5)
        for j, i in enumerate(res):
            self.assertTrue(isinstance(i, DRSFile))
        self.assertEqual(j + 1, 3)

    def test_compare(self):
        fn2 = os.path.join(self.tmpdir, self.files[1])
        drs2 = DRSFile.from_path(fn2)

        self.assertTrue(self.drs == self.drs)
        self.assertFalse(self.drs == drs2)
        self.assertFalse(drs2 == fn2)

    def test_json_path(self):
        j = self.drs.to_json()
        self.assertTrue(isinstance(j, str))
        path = self.drs.to_path()
        self.assertEqual(path, self.fn)

    def test_find_structure_in_path(self):

        s = DRSFile.find_structure_in_path('/tmp/some_temp_solr_core/cmip5')
        self.assertEqual(s, 'cmip5')
        s = DRSFile.find_structure_in_path('/tmp/some_temp_solr_core/cmip5',
                                           allow_multiples=True)
        self.assertEqual(s, ['cmip5'])
        self.assertRaises(Exception, DRSFile.find_structure_in_path,
                          '/no/valid/path')

    def test_structure_from_path(self):

        s = DRSFile.find_structure_from_path(self.fn)
        self.assertEqual(s, 'cmip5')
        s = DRSFile.find_structure_from_path(self.fn, allow_multiples=True)
        self.assertEqual(s, ['cmip5'])
        self.assertRaises(Exception, DRSFile.find_structure_from_path,
                          '/no/valid/file_path')

    def test_from_dict(self):
        d = self.drs.dict
        t = DRSFile.from_dict(d, CMIP5)
        self.assertTrue(isinstance(t, DRSFile))
        self.assertEqual(self.drs.to_path(), t.to_path())

    def test_from_json(self):
        j = self.drs.to_json()
        t = DRSFile.from_json(j, CMIP5)
        self.assertTrue(isinstance(t, DRSFile))
        self.assertEqual(self.drs.to_path(), t.to_path())

    def test_to_dataset(self):
        res = self.drs.to_dataset_path(versioned=True)
        self.assertIn('/'.join(self.files[0].split('/')[:-1]), res)