Esempio n. 1
0
def test_reload_missing_remote_directory(intake_server):
    try:
        shutil.rmtree(TMP_DIR)
    except:
        pass

    time.sleep(1)
    catalog = Catalog(intake_server)
    assert_items_equal(list(catalog), [])

    os.mkdir(TMP_DIR)
    with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f:
        f.write('''
plugins:
  source:
    - module: intake.catalog.tests.example1_source
    - dir: '{{ CATALOG_DIR }}/example_plugin_dir'
sources:
  use_example1:
    description: example1 source plugin
    driver: example1
    args: {}
        ''')
    time.sleep(2)

    assert_items_equal(list(catalog), ['use_example1'])
    try:
        shutil.rmtree(TMP_DIR)
    except:
        pass
Esempio n. 2
0
def listing(args):
    catalog = Catalog(args.uri)
    for entry in list(catalog):
        if args.full:
            print_entry_info(catalog, entry)
        else:
            print(entry)
Esempio n. 3
0
 def invoke(self, args):
     catalog = Catalog(args.uri)
     for entry in list(catalog):
         s = catalog[entry]()
         if s.cache:
             print("Caching for entry %s" % entry)
             s.read()
Esempio n. 4
0
def test_read(intake_server):
    catalog = Catalog(intake_server)

    d = catalog['entry1'].get()

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))
    meta = expected_df[:0]

    info = d.discover()
    assert info['datashape'] is None
    assert info['dtype'] == {
        k: str(v)
        for k, v in meta.dtypes.to_dict().items()
    }
    assert info['npartitions'] == 2
    assert info['shape'] == (None, 3)  # Do not know CSV size ahead of time

    md = d.metadata.copy()
    md.pop('catalog_dir', None)
    assert md == dict(foo='bar', bar=[1, 2, 3], cache=[])

    df = d.read()

    assert expected_df.equals(df)
Esempio n. 5
0
def test_read_direct(intake_server):
    catalog = Catalog(intake_server)

    d = catalog['entry1_part'].get(part='2')
    test_dir = os.path.dirname(__file__)
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.read_csv(file2)
    meta = expected_df[:0]

    info = d.discover()

    assert info['datashape'] is None
    assert info['dtype'] == {
        k: str(v)
        for k, v in meta.dtypes.to_dict().items()
    }
    assert info['npartitions'] == 1
    assert info['shape'] == (None, 3)  # Do not know CSV size ahead of time
    md = info['metadata'].copy()
    md.pop('catalog_dir', None)
    assert md == {'bar': [2, 4, 6], 'foo': 'baz', 'cache': []}

    md = d.metadata.copy()
    md.pop('catalog_dir', None)
    assert md == dict(foo='baz', bar=[2, 4, 6], cache=[])
    assert d.description == 'entry1 part'
    df = d.read()

    assert expected_df.equals(df)
Esempio n. 6
0
def test_reload_updated_config(intake_server_with_config):
    catalog = Catalog(intake_server_with_config)

    entries = list(catalog)
    assert entries == ['use_example1']

    with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f:
        f.write('''
plugins:
  source:
    - module: intake.catalog.tests.example1_source
    - dir: '{{ CATALOG_DIR }}/example_plugin_dir'
sources:
  use_example1:
    description: example1 source plugin
    driver: example1
    args: {}
  use_example1_1:
    description: example1 other
    driver: example1
    args: {}
        ''')

    time.sleep(2)

    assert_items_equal(list(catalog), ['use_example1', 'use_example1_1'])
Esempio n. 7
0
def test_secret_auth(intake_server_with_auth):
    auth = SecretClientAuth(secret='test_secret')
    catalog = Catalog(intake_server_with_auth, auth=auth)

    entries = list(catalog)
    assert entries == ['example']

    catalog.example.read()
Esempio n. 8
0
def test_union_catalog():
    path = os.path.dirname(__file__)
    uri1 = os.path.join(path, 'catalog_union_1.yml')
    uri2 = os.path.join(path, 'catalog_union_2.yml')

    union_cat = Catalog([uri1, uri2])

    assert_items_equal(list(union_cat), ['entry1', 'entry1_part', 'use_example1'])

    assert union_cat.entry1_part.describe() == {
        'container': 'dataframe',
        'user_parameters': [
            {
                'name': 'part',
                'description': 'part of filename',
                'default': '1',
                'type': 'str',
                'allowed': ['1', '2'],
            }
        ],
        'description': 'entry1 part',
        'direct_access': 'allow'
    }

    desc_open = union_cat.entry1_part.describe_open()
    assert desc_open['args']['urlpath'].endswith('entry1_1.csv')
    del desc_open['args']['urlpath']  # Full path will be system dependent
    desc_open['args']['metadata'].pop('catalog_dir')
    assert 'csv' in str(desc_open.pop('plugin'))
    assert desc_open == {
        'args': {'metadata': {'bar': [2, 4, 6], 'cache': [], 'foo': 'baz'}},
        'description': 'entry1 part',
        'direct_access': 'allow',
        'metadata': {'bar': [2, 4, 6], 'cache': [], 'foo': 'baz'},
    }

    # Implied creation of data source
    assert union_cat.entry1.container == 'dataframe'
    md = union_cat.entry1.metadata
    md.pop('catalog_dir')
    assert md == dict(foo='bar', bar=[1, 2, 3], cache=[])

    # Use default parameters in explict creation of data source
    assert union_cat.entry1_part().container == 'dataframe'
    # Specify parameters in creation of data source
    assert union_cat.entry1_part(part='2').container == 'dataframe'
Esempio n. 9
0
def test_catalog_file_removal(temp_catalog_file):
    cat_dir = os.path.dirname(temp_catalog_file)
    cat = Catalog(cat_dir + '/*')
    assert set(cat) == {'a', 'b'}

    os.remove(temp_catalog_file)
    time.sleep(1.5)  # wait for catalog refresh
    assert set(cat) == set()
Esempio n. 10
0
def test_remote_env(intake_server):
    import os
    os.environ['INTAKE_TEST'] = 'client'
    catalog = Catalog(intake_server)
    with pytest.raises(Exception) as e:
        catalog.remote_env.get()
    assert 'path-server' in str(e.value)

    with pytest.raises(Exception) as e:
        catalog.local_env.get()
    assert 'path-client' in str(e.value)

    # prevents *client* from getting env
    catalog = Catalog(intake_server, getenv=False)
    with pytest.raises(Exception) as e:
        catalog.local_env.get()
    assert 'INTAKE_TEST' in str(e.value)
Esempio n. 11
0
def build_app(catalog_uri, zmq_address=None):
    catalog = Catalog(catalog_uri)

    app = QApplication([b'Bluesky Browser'])
    app.main_window = QMainWindow()
    central_widget = CentralWidget(catalog=catalog,
                                   zmq_address=zmq_address,
                                   menuBar=app.main_window.menuBar)
    app.main_window.setCentralWidget(central_widget)
    app.main_window.show()
    return app
Esempio n. 12
0
def test_filter():
    from intake.catalog.base import Catalog
    entry1 = LocalCatalogEntry(name='trial', description='get this back',
                              driver='csv')
    entry2 = LocalCatalogEntry(name='trial', description='pass this through',
                              driver='csv')
    cat = Catalog.from_dict({'trial1': entry1,
                             'trial2': entry2}, name='mycat')
    cat2 = cat.filter(lambda e: 'pass' in e._description)
    assert list(cat2) == ['trial2']
    assert cat2.trial2 is entry2
Esempio n. 13
0
def test_dict_adddel():
    from intake.catalog.base import Catalog
    entry = LocalCatalogEntry(name='trial', description='get this back',
                              driver='csv')
    cat = Catalog.from_dict({'trial': entry}, name='mycat')
    assert 'trial' in cat
    cat['trial2'] = entry
    assert list(cat) == ['trial', 'trial2']
    cat.pop('trial')
    assert list(cat) == ['trial2']
    assert cat['trial2'] is entry
 def get(self, **user_parameters):
     """Instantiate the NestedCatalogEntry"""
     if not self._default_source:
         self._default_source = Catalog.from_dict(
             entries=self.entries,
             name=self.name,
             metadata=self.metadata,
             description=self.description,
             **user_parameters)
         self._default_source.cat = self._catalog
         self._default_source.catalog_object = self._catalog
     return self._default_source
Esempio n. 15
0
def test_dict_save():
    from intake.catalog.base import Catalog
    fn = os.path.join(tempfile.mkdtemp(), 'mycat.yaml')
    entry = LocalCatalogEntry(name='trial', description='get this back',
                              driver='csv')
    cat = Catalog.from_dict({'trial': entry}, name='mycat')
    cat.save(fn)

    cat2 = open_catalog(fn)
    assert 'trial' in cat2
    assert cat2.name == 'mycat'
    assert cat2.trial._driver =='csv'
Esempio n. 16
0
def test_read_partition(intake_server):
    catalog = Catalog(intake_server)

    d = catalog.entry1.get()

    p2 = d.read_partition(1)
    p1 = d.read_partition(0)

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    assert pd.read_csv(file1).equals(p1)
    assert pd.read_csv(file2).equals(p2)
Esempio n. 17
0
def test_union_catalog():
    path = os.path.dirname(__file__)
    uri1 = os.path.join(path, 'catalog_union_1.yml')
    uri2 = os.path.join(path, 'catalog_union_2.yml')

    union_cat = Catalog([uri1, uri2])

    assert_items_equal(list(union_cat),
                       ['entry1', 'entry1_part', 'use_example1'])

    expected = {
        'name':
        'entry1_part',
        'container':
        'dataframe',
        'user_parameters': [{
            'name': 'part',
            'description': 'part of filename',
            'default': '1',
            'type': 'str',
            'allowed': ['1', '2'],
        }],
        'description':
        'entry1 part',
        'direct_access':
        'allow'
    }
    for k in expected:
        assert union_cat.entry1_part.describe()[k] == expected[k]

    # Implied creation of data source
    assert union_cat.entry1.container == 'dataframe'
    md = union_cat.entry1._metadata
    assert md == dict(foo='bar', bar=[1, 2, 3])

    # Use default parameters in explict creation of data source
    assert union_cat.entry1_part().container == 'dataframe'
    # Specify parameters in creation of data source
    assert union_cat.entry1_part(part='2').container == 'dataframe'
Esempio n. 18
0
def test_read_chunks(intake_server):
    catalog = Catalog(intake_server)

    d = catalog.entry1.get()

    chunks = list(d.read_chunked())
    assert len(chunks) == 2

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))

    assert expected_df.equals(pd.concat(chunks))
Esempio n. 19
0
def test_pickle(intake_server):
    catalog = Catalog(intake_server)

    d = catalog.entry1.get()

    new_d = pickle.loads(pickle.dumps(d, pickle.HIGHEST_PROTOCOL))

    df = new_d.read()

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))

    assert expected_df.equals(df)
Esempio n. 20
0
def test_dir(intake_server):
    PAGE_SIZE = 2
    catalog = Catalog(intake_server, page_size=PAGE_SIZE)
    assert len(catalog._entries._page_cache) == 0
    assert len(catalog._entries._direct_lookup_cache) == 0
    assert not catalog._entries.complete

    with pytest.warns(UserWarning, match="Tab-complete"):
        key_completions = catalog._ipython_key_completions_()
    with pytest.warns(UserWarning, match="Tab-complete"):
        dir_ = dir(catalog)
    # __dir__ triggers loading the first page.
    assert len(catalog._entries._page_cache) == 2
    assert len(catalog._entries._direct_lookup_cache) == 0
    assert not catalog._entries.complete
    assert set(key_completions) == set(['use_example1', 'nested'])
    assert 'metadata' in dir_  # a normal attribute
    assert 'use_example1' in dir_  # an entry from the first page
    assert 'arr' not in dir_  # an entry we haven't cached yet

    # Trigger fetching one specific name.
    catalog['arr']
    with pytest.warns(UserWarning, match="Tab-complete"):
        dir_ = dir(catalog)
    with pytest.warns(UserWarning, match="Tab-complete"):
        key_completions = catalog._ipython_key_completions_()
    assert 'metadata' in dir_
    assert 'arr' in dir_  # an entry cached via direct access
    assert 'arr' in key_completions

    # Load everything.
    list(catalog)
    assert catalog._entries.complete
    with pytest.warns(None) as record:
        assert set(catalog) == set(catalog._ipython_key_completions_())
        assert set(catalog).issubset(set(dir(catalog)))
    assert len(record) == 0

    # Load without pagination (with also loads everything).
    catalog = Catalog(intake_server, page_size=None)
    assert catalog._entries.complete
    with pytest.warns(None) as record:
        assert set(catalog) == set(catalog._ipython_key_completions_())
        assert set(catalog).issubset(set(dir(catalog)))
    assert len(record) == 0
Esempio n. 21
0
def test_reload_missing_local_directory(tempdir):
    catalog = Catalog(tempdir + '/*')
    assert_items_equal(list(catalog), [])

    with open(os.path.join(tempdir, YAML_FILENAME), 'w') as f:
        f.write('''
plugins:
  source:
    - module: intake.catalog.tests.example1_source
    - dir: '{{ CATALOG_DIR }}/example_plugin_dir'
sources:
  use_example1:
    description: example1 source plugin
    driver: example1
    args: {}
        ''')

    time.sleep(1)
    assert 'use_example1' in catalog
def test_getitem_and_getattr(intake_server):
    catalog = Catalog(intake_server)
    catalog['arr']
    with pytest.raises(KeyError):
        catalog['doesnotexist']
    with pytest.raises(KeyError):
        catalog['_doesnotexist']
    with pytest.raises(KeyError):
        # This exists as an *attribute* but not as an item.
        catalog['metadata']
    catalog.arr  # alias to catalog['arr']
    catalog.metadata  # a normal attribute
    with pytest.raises(AttributeError):
        catalog.doesnotexit
    with pytest.raises(AttributeError):
        catalog._doesnotexit
    assert catalog.arr is catalog['arr']
    assert isinstance(catalog.arr, RemoteCatalogEntry)
    assert isinstance(catalog.metadata, (dict, type(None)))
Esempio n. 23
0
def test_reload_updated_directory(intake_server_with_config):
    catalog = Catalog(intake_server_with_config)

    orig_entries = list(catalog)
    assert 'example2' not in orig_entries

    filename = os.path.join(TMP_DIR, 'intake_test_catalog2.yml')
    with open(filename, 'w') as f:
        f.write('''
sources:
  example2:
    description: source 2
    driver: csv
    args:
        urlpath: none
        ''')

    time.sleep(2)

    assert_items_equal(list(catalog), ['example2'] + orig_entries)
def test_info_describe(intake_server):
    catalog = Catalog(intake_server)

    assert_items_equal(list(catalog), ['use_example1', 'nested', 'entry1',
                                       'entry1_part', 'remote_env',
                                       'local_env', 'text', 'arr'])

    info = catalog['entry1'].describe()

    assert info == {
        'container': 'dataframe',
        'description': 'entry1 full',
        'name': 'entry1',
        'direct_access': 'forbid',
        'user_parameters': []
    }

    info = catalog['entry1_part'].describe()

    assert info['direct_access'] == 'allow'
Esempio n. 25
0
def main(argv=None):
    from intake.config import conf
    from intake import Catalog

    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Intake Catalog Server')
    parser.add_argument('-p', '--port', type=int, default=conf['port'],
                        help='port number for server to listen on')
    parser.add_argument('--sys-exit-on-sigterm', action='store_true',
                        help='internal flag used during unit testing to ensure .coverage file is written')
    parser.add_argument('catalog_args', metavar='FILE', type=str, nargs='+',
                        help='Name of catalog YAML file')
    args = parser.parse_args(argv[1:])

    if args.sys_exit_on_sigterm:
        signal.signal(signal.SIGTERM, call_exit_on_sigterm)

    logger.info('Creating catalog from:')
    for arg in args.catalog_args:
        logger.info('  - %s' % arg)

    catargs = args.catalog_args
    catargs = catargs[0] if len(catargs) == 1 else catargs
    logger.info("catalog_args: %s" % catargs)
    catalog = Catalog(catargs)

    logger.info('Entries:' + ','.join(list(catalog)))

    logger.info('Listening on port %d' % args.port)

    server = IntakeServer(catalog)
    app = server.make_app()
    server.start_periodic_functions(close_idle_after=3600.0)

    app.listen(args.port)
    tornado.ioloop.IOLoop.current().start()
Esempio n. 26
0
def test_datetime(intake_server):
    catalog = Catalog(intake_server)
    info = catalog["datetime"].describe()
    print(info)
    expected = {
        'name':
        'datetime',
        'container':
        'dataframe',
        'description':
        'datetime parameters',
        'direct_access':
        'forbid',
        'user_parameters': [{
            'name': 'time',
            'description': 'some time',
            'type': 'datetime',
            'default': pd.Timestamp('1970-01-01 00:00:00')
        }],
        'metadata': {},
    }
    for k in expected:
        assert info[k] == expected[k]
def test_pagination(intake_server):
    PAGE_SIZE = 2
    catalog = Catalog(intake_server, page_size=PAGE_SIZE)
    assert len(catalog._entries._page_cache) == 0
    assert len(catalog._entries._direct_lookup_cache) == 0
    # Trigger fetching one specific name.
    catalog['arr']
    assert len(catalog._entries._page_cache) == 0
    assert len(catalog._entries._direct_lookup_cache) == 1
    # Using `in` on a Catalog should not iterate.
    'arr' in catalog
    assert len(catalog._entries._page_cache) == 0
    assert len(catalog._entries._direct_lookup_cache) == 1
    # Trigger fetching just one full page.
    list(zip(range(PAGE_SIZE), catalog))
    assert len(catalog._entries._page_cache) == PAGE_SIZE
    assert len(catalog._entries._direct_lookup_cache) == 1
    # Trigger fetching all pages by list-ifying.
    list(catalog)
    assert len(catalog._entries._page_cache) > PAGE_SIZE
    assert len(catalog._entries._direct_lookup_cache) == 1
    # Now direct lookup by name should be free because everything is cached.
    catalog['text']
    assert len(catalog._entries._direct_lookup_cache) == 1
Esempio n. 28
0
 def get_app(self):
     local_catalog = Catalog(catalog_file)
     self.server = IntakeServer(local_catalog)
     return self.server.make_app()
Esempio n. 29
0
 def invoke(self, args):
     catalog = Catalog(args.uri)
     with catalog[args.name].get() as f:
         print(f.discover())
Esempio n. 30
0
def test_environment_evaluation(intake_server):
    catalog = Catalog(intake_server)
    import os
    os.environ['INTAKE_TEST'] = 'client'
    d = catalog['remote_env']