def test_union_catalog(): path = os.path.dirname(__file__) uri1 = os.path.join(path, 'catalog_union_1.yml') uri2 = os.path.join(path, 'catalog_union_2.yml') union_cat = Catalog([uri1, uri2]) assert_items_equal(list(union_cat), ['entry1', 'entry1_part', 'use_example1']) assert union_cat.entry1_part.describe() == { 'container': 'dataframe', 'user_parameters': [{ 'name': 'part', 'description': 'part of filename', 'default': '1', 'type': 'str', 'allowed': ['1', '2'], }], 'description': 'entry1 part', 'direct_access': 'allow' } desc_open = union_cat.entry1_part.describe_open() assert desc_open['args']['urlpath'].endswith('entry1_1.csv') del desc_open['args']['urlpath'] # Full path will be system dependent assert desc_open == { 'args': { 'metadata': { 'bar': [2, 4, 6], 'foo': 'baz' } }, 'description': 'entry1 part', 'direct_access': 'allow', 'metadata': { 'bar': [2, 4, 6], 'foo': 'baz' }, 'plugin': 'csv' } # Implied creation of data source assert union_cat.entry1.container == 'dataframe' assert union_cat.entry1.metadata == dict(foo='bar', bar=[1, 2, 3]) # Use default parameters in explict creation of data source assert union_cat.entry1_part().container == 'dataframe' # Specify parameters in creation of data source assert union_cat.entry1_part(part='2').container == 'dataframe'
def test_remote_env(intake_server): import os os.environ['INTAKE_TEST'] = 'client' catalog = Catalog(intake_server) s = catalog.remote_env.get() assert 'INTAKE_TEST' in s._user_parameters['intake_test'] s = catalog.local_env.get() assert 'client' == s._user_parameters['intake_test'] # prevents *client* from getting env catalog = Catalog(intake_server, getenv=False) s = catalog.local_env.get() assert 'INTAKE_TEST' in s._user_parameters['intake_test']
def test_read_direct(intake_server): catalog = Catalog(intake_server) d = catalog['entry1_part'].get(part='2') info = d.discover() assert info == { 'datashape': None, 'dtype': np.dtype([('name', 'O'), ('score', '<f8'), ('rank', '<i8')]), 'npartitions': 1, 'shape': (None, ), # do not know size of CSV ahead of time 'metadata': { 'bar': [2, 4, 6], 'foo': 'baz' } } assert d.metadata == dict(foo='baz', bar=[2, 4, 6]) assert d.description == 'entry1 part' df = d.read() test_dir = os.path.dirname(__file__) file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.read_csv(file2) assert d.direct # this should be direct assert expected_df.equals(df)
def test_read(intake_server): catalog = Catalog(intake_server) d = catalog['entry1'].get() info = d.discover() assert info == { 'datashape': None, 'dtype': np.dtype([('name', 'O'), ('score', '<f8'), ('rank', '<i8')]), 'npartitions': 2, 'shape': (None, ) # Do not know CSV size ahead of time } assert d.metadata == dict(foo='bar', bar=[1, 2, 3]) df = d.read() test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2))) assert not d.direct # this should be proxied assert expected_df.equals(df)
def listing(args): catalog = Catalog(args.uri) for entry in list(catalog): if args.full: print_entry_info(catalog, entry) else: print(entry)
def test_reload(intake_server): catalog = Catalog(intake_server) entries = list(catalog) assert entries == ['use_example1'] with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f: f.write(''' plugins: source: - module: intake.catalog.tests.example1_source - dir: !template '{{ CATALOG_DIR }}/example_plugin_dir' sources: use_example1: description: example1 source plugin driver: example1 args: {} use_example1_1: description: example1 other driver: example1 args: {} ''') time.sleep(2) assert_items_equal(list(catalog), ['use_example1', 'use_example1_1'])
def serve_chart(line): parts = line.split(' ') if len(parts) < 3: print( 'Make sure input `%serve_chart {chart name} {chart version} {chart variable}`') return pj_name = get_ipython().user_global_ns['__framania_pj_name__'] name = get_ipython().user_global_ns['__framania_analysis_name__'] version = get_ipython().user_global_ns['__framania_analysis_version__'] data_dir = get_ipython().user_global_ns['__framania_data_dir__'] chart_name, chart_version, result_var_name = parts result_var = get_ipython().user_global_ns[result_var_name] chart_dir = data_dir / 'chart' / get_version_name(name, version) manager = HVManiaManager(pj_name, chart_dir) manager[chart_name] = result_var catalog = FramaniaExtendedIntakeCatalog(get_ipython().user_global_ns['__framania_catalog_file__']) source = FramaniaExtendedIntakeSource( Catalog(), chart_name, chart_version, None, get_ipython().user_global_ns['__framania_analysis_upstreams__'], False) catalog.append(source) print(f'Chart is served as {chart_name} {chart_version}')
def test_catalog_file_removal(temp_catalog_file): cat_dir = os.path.dirname(temp_catalog_file) cat = Catalog(cat_dir) assert set(cat) == {'a', 'b'} os.remove(temp_catalog_file) time.sleep(1.5) # wait for catalog refresh assert set(cat) == set()
def test_union_catalog(): path = os.path.dirname(__file__) uri1 = os.path.join(path, 'catalog_union_1.yml') uri2 = os.path.join(path, 'catalog_union_2.yml') union_cat = Catalog([uri1, uri2]) assert_items_equal(list(union_cat), ['entry1', 'entry1_part', 'use_example1']) assert union_cat.entry1_part.describe() == { 'container': 'dataframe', 'user_parameters': [ { 'name': 'part', 'description': 'part of filename', 'default': '1', 'type': 'str', 'allowed': ['1', '2'], } ], 'description': 'entry1 part', 'direct_access': 'allow' } desc_open = union_cat.entry1_part.describe_open() assert desc_open['args']['urlpath'].endswith('entry1_1.csv') del desc_open['args']['urlpath'] # Full path will be system dependent assert desc_open == { 'args': {'metadata': {'bar': [2, 4, 6], 'foo': 'baz'}}, 'description': 'entry1 part', 'direct_access': 'allow', 'metadata': {'bar': [2, 4, 6], 'foo': 'baz'}, 'plugin': 'csv' } # Implied creation of data source assert union_cat.entry1.container == 'dataframe' assert union_cat.entry1.metadata == dict(foo='bar', bar=[1, 2, 3]) # Use default parameters in explict creation of data source assert union_cat.entry1_part().container == 'dataframe' # Specify parameters in creation of data source assert union_cat.entry1_part(part='2').container == 'dataframe'
def test_read_partition(intake_server): catalog = Catalog(intake_server) d = catalog.entry1.get() p2 = d.read_partition(1) p1 = d.read_partition(0) test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') assert pd.read_csv(file1).equals(p1) assert pd.read_csv(file2).equals(p2)
def test_read_chunks(intake_server): catalog = Catalog(intake_server) d = catalog.entry1.get() chunks = list(d.read_chunked()) assert len(chunks) == 2 test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2))) assert expected_df.equals(pd.concat(chunks))
def test_pickle(intake_server): catalog = Catalog(intake_server) d = catalog.entry1.get() new_d = pickle.loads(pickle.dumps(d, pickle.HIGHEST_PROTOCOL)) df = new_d.read() test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2))) assert expected_df.equals(df)
def test_info_describe(intake_server): catalog = Catalog(intake_server) assert_items_equal(list(catalog), ['use_example1', 'entry1', 'entry1_part']) info = catalog['entry1'].describe() assert info == { 'container': 'dataframe', 'description': 'entry1 full', 'name': 'entry1', 'direct_access': 'forbid', 'user_parameters': [] } info = catalog['entry1_part'].describe() assert info['direct_access'] == 'allow'
def test_reload_newfile(intake_server): catalog = Catalog(intake_server) orig_entries = list(catalog) assert 'example2' not in orig_entries filename = os.path.join(TMP_DIR, 'intake_test_catalog2.yml') with open(filename, 'w') as f: f.write(''' sources: example2: description: source 2 driver: csv args: {} ''') time.sleep(2) assert_items_equal(list(catalog), ['example2'] + orig_entries)
def test_reload_missing_local_directory(tmpdir): catalog = Catalog(tmpdir) assert_items_equal(list(catalog), []) os.mkdir(tmpdir) with open(os.path.join(tmpdir, YAML_FILENAME), 'w') as f: f.write(''' plugins: source: - module: intake.catalog.tests.example1_source - dir: '{{ CATALOG_DIR }}/example_plugin_dir' sources: use_example1: description: example1 source plugin driver: example1 args: {} ''') assert_items_equal(list(catalog), ['use_example1'])
def test_catalog_description(): """Make sure the description comes through the save.""" cat1 = Catalog.from_dict({ 'name': LocalCatalogEntry('name', description='description', driver=intake.catalog.local.YAMLFileCatalog, ) }, name='overall catalog name', description='overall catalog description' ) cat1.save('desc_test.yaml') cat2 = intake.open_catalog('desc_test.yaml') assert cat2.description is not None
def main(argv=None): if argv is None: argv = sys.argv parser = argparse.ArgumentParser(description='Intake Catalog Server') parser.add_argument('-p', '--port', type=int, default=5000, help='port number for server to listen on') parser.add_argument( '--sys-exit-on-sigterm', action='store_true', help= 'internal flag used during unit testing to ensure .coverage file is written' ) parser.add_argument('catalog_args', metavar='FILE', type=str, nargs='+', help='Name of catalog YAML file') args = parser.parse_args(argv[1:]) if args.sys_exit_on_sigterm: signal.signal(signal.SIGTERM, call_exit_on_sigterm) print('Creating catalog from:') for arg in args.catalog_args: print(' - %s' % arg) print("catalog_args", args.catalog_args) catalog = Catalog(args.catalog_args) print('Entries:', ','.join(list(catalog))) print('Listening on port %d' % args.port) server = IntakeServer(catalog) app = server.make_app() server.start_periodic_functions(close_idle_after=3600.0) app.listen(args.port) tornado.ioloop.IOLoop.current().start()
def test_read(intake_server): catalog = Catalog(intake_server) d = catalog['entry1'].get() test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2))) meta = expected_df[:0] info = d.discover() assert info['datashape'] is None assert info['dtype'].equals(meta) assert info['npartitions'] == 2 assert info['shape'] == (None, 3) # Do not know CSV size ahead of time assert d.metadata == dict(foo='bar', bar=[1, 2, 3]) df = d.read() assert not d.direct # this should be proxied assert expected_df.equals(df)
def test_read_direct(intake_server): catalog = Catalog(intake_server) d = catalog['entry1_part'].get(part='2') test_dir = os.path.dirname(__file__) file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.read_csv(file2) meta = expected_df[:0] info = d.discover() assert info['datashape'] is None assert info['dtype'].equals(meta) assert info['npartitions'] == 1 assert info['shape'] == (None, 3) # Do not know CSV size ahead of time assert info['metadata'] == {'bar': [2, 4, 6], 'foo': 'baz'} assert d.metadata == dict(foo='baz', bar=[2, 4, 6]) assert d.description == 'entry1 part' df = d.read() assert d.direct # this should be direct assert expected_df.equals(df)
def test_remote_cat(http_server): url = http_server + 'catalog1.yml' cat = Catalog(url) assert 'entry1' in cat assert cat.entry1.describe()
def test_bad_url(intake_server): bad_url = intake_server + '/nonsense_prefix' with pytest.raises(Exception): Catalog(bad_url)
def test_duplicate_parameters(): path = os.path.dirname(__file__) uri = os.path.join(path, 'catalog_dup_parameters.yml') with pytest.raises(exceptions.DuplicateKeyError) as except_info: c = Catalog(uri)
def test_empty_catalog(): cat = Catalog() assert list(cat) == []
def test_unknown_source(intake_server): catalog = Catalog(intake_server) with pytest.raises(Exception): catalog['does_not_exist'].describe()
def catalog1(): path = os.path.dirname(__file__) return Catalog(os.path.join(path, 'catalog1.yml'))
def test_parser_validation_error(filename): with pytest.raises(exceptions.ValidationError): Catalog(abspath(filename + ".yml"))
def test_remote_datasource_interface(intake_server): catalog = Catalog(intake_server) d = catalog['entry1'].get() verify_datasource_interface(d)
def test_secret_auth_fail(intake_server_with_auth): auth = SecretClientAuth(secret='test_wrong_secret') with pytest.raises(Exception): catalog = Catalog(intake_server_with_auth, auth=auth)
def test_parser_obsolete_error(filename): with pytest.raises(exceptions.ObsoleteError): Catalog(abspath(filename + ".yml"))
def test_secret_auth(intake_server_with_auth): auth = SecretClientAuth(secret='test_secret') catalog = Catalog(intake_server_with_auth, auth=auth) entries = list(catalog) assert entries == ['use_example1']
def get_app(self): catalog_file = os.path.join(os.path.dirname(__file__), 'catalog1.yml') local_catalog = Catalog(catalog_file) handlers = get_browser_handlers(local_catalog) return tornado.web.Application(handlers)