Ejemplo n.º 1
0
def test_union_catalog():
    path = os.path.dirname(__file__)
    uri1 = os.path.join(path, 'catalog_union_1.yml')
    uri2 = os.path.join(path, 'catalog_union_2.yml')

    union_cat = Catalog([uri1, uri2])

    assert_items_equal(list(union_cat),
                       ['entry1', 'entry1_part', 'use_example1'])

    assert union_cat.entry1_part.describe() == {
        'container':
        'dataframe',
        'user_parameters': [{
            'name': 'part',
            'description': 'part of filename',
            'default': '1',
            'type': 'str',
            'allowed': ['1', '2'],
        }],
        'description':
        'entry1 part',
        'direct_access':
        'allow'
    }

    desc_open = union_cat.entry1_part.describe_open()
    assert desc_open['args']['urlpath'].endswith('entry1_1.csv')
    del desc_open['args']['urlpath']  # Full path will be system dependent
    assert desc_open == {
        'args': {
            'metadata': {
                'bar': [2, 4, 6],
                'foo': 'baz'
            }
        },
        'description': 'entry1 part',
        'direct_access': 'allow',
        'metadata': {
            'bar': [2, 4, 6],
            'foo': 'baz'
        },
        'plugin': 'csv'
    }

    # Implied creation of data source
    assert union_cat.entry1.container == 'dataframe'
    assert union_cat.entry1.metadata == dict(foo='bar', bar=[1, 2, 3])

    # Use default parameters in explict creation of data source
    assert union_cat.entry1_part().container == 'dataframe'
    # Specify parameters in creation of data source
    assert union_cat.entry1_part(part='2').container == 'dataframe'
Ejemplo n.º 2
0
def test_remote_env(intake_server):
    import os
    os.environ['INTAKE_TEST'] = 'client'
    catalog = Catalog(intake_server)
    s = catalog.remote_env.get()
    assert 'INTAKE_TEST' in s._user_parameters['intake_test']

    s = catalog.local_env.get()
    assert 'client' == s._user_parameters['intake_test']

    # prevents *client* from getting env
    catalog = Catalog(intake_server, getenv=False)
    s = catalog.local_env.get()
    assert 'INTAKE_TEST' in s._user_parameters['intake_test']
Ejemplo n.º 3
0
def test_read_direct(intake_server):
    catalog = Catalog(intake_server)

    d = catalog['entry1_part'].get(part='2')

    info = d.discover()

    assert info == {
        'datashape': None,
        'dtype': np.dtype([('name', 'O'), ('score', '<f8'), ('rank', '<i8')]),
        'npartitions': 1,
        'shape': (None, ),  # do not know size of CSV ahead of time
        'metadata': {
            'bar': [2, 4, 6],
            'foo': 'baz'
        }
    }

    assert d.metadata == dict(foo='baz', bar=[2, 4, 6])
    assert d.description == 'entry1 part'
    df = d.read()
    test_dir = os.path.dirname(__file__)
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.read_csv(file2)

    assert d.direct  # this should be direct

    assert expected_df.equals(df)
Ejemplo n.º 4
0
def test_read(intake_server):
    catalog = Catalog(intake_server)

    d = catalog['entry1'].get()

    info = d.discover()

    assert info == {
        'datashape': None,
        'dtype': np.dtype([('name', 'O'), ('score', '<f8'), ('rank', '<i8')]),
        'npartitions': 2,
        'shape': (None, )  # Do not know CSV size ahead of time
    }

    assert d.metadata == dict(foo='bar', bar=[1, 2, 3])

    df = d.read()
    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))

    assert not d.direct  # this should be proxied

    assert expected_df.equals(df)
Ejemplo n.º 5
0
def listing(args):
    catalog = Catalog(args.uri)
    for entry in list(catalog):
        if args.full:
            print_entry_info(catalog, entry)
        else:
            print(entry)
Ejemplo n.º 6
0
def test_reload(intake_server):
    catalog = Catalog(intake_server)

    entries = list(catalog)
    assert entries == ['use_example1']

    with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f:
        f.write('''
plugins:
  source:
    - module: intake.catalog.tests.example1_source
    - dir: !template '{{ CATALOG_DIR }}/example_plugin_dir'
sources:
  use_example1:
    description: example1 source plugin
    driver: example1
    args: {}
  use_example1_1:
    description: example1 other
    driver: example1
    args: {}
        ''')

    time.sleep(2)

    assert_items_equal(list(catalog), ['use_example1', 'use_example1_1'])
Ejemplo n.º 7
0
def serve_chart(line):
    parts = line.split(' ')
    if len(parts) < 3:
        print(
            'Make sure input `%serve_chart {chart name} {chart version} {chart variable}`')
        return

    pj_name = get_ipython().user_global_ns['__framania_pj_name__']
    name = get_ipython().user_global_ns['__framania_analysis_name__']
    version = get_ipython().user_global_ns['__framania_analysis_version__']
    data_dir = get_ipython().user_global_ns['__framania_data_dir__']

    chart_name, chart_version, result_var_name = parts
    result_var = get_ipython().user_global_ns[result_var_name]

    chart_dir = data_dir / 'chart' / get_version_name(name, version)

    manager = HVManiaManager(pj_name, chart_dir)
    manager[chart_name] = result_var

    catalog = FramaniaExtendedIntakeCatalog(get_ipython().user_global_ns['__framania_catalog_file__'])
    source = FramaniaExtendedIntakeSource(
        Catalog(),
        chart_name, chart_version, None,
        get_ipython().user_global_ns['__framania_analysis_upstreams__'], False)
    catalog.append(source)

    print(f'Chart is served as {chart_name} {chart_version}')
Ejemplo n.º 8
0
def test_catalog_file_removal(temp_catalog_file):
    cat_dir = os.path.dirname(temp_catalog_file)
    cat = Catalog(cat_dir)
    assert set(cat) == {'a', 'b'}

    os.remove(temp_catalog_file)
    time.sleep(1.5)  # wait for catalog refresh
    assert set(cat) == set()
Ejemplo n.º 9
0
def test_union_catalog():
    path = os.path.dirname(__file__)
    uri1 = os.path.join(path, 'catalog_union_1.yml')
    uri2 = os.path.join(path, 'catalog_union_2.yml')

    union_cat = Catalog([uri1, uri2])

    assert_items_equal(list(union_cat), ['entry1', 'entry1_part', 'use_example1'])

    assert union_cat.entry1_part.describe() == {
        'container': 'dataframe',
        'user_parameters': [
            {
                'name': 'part',
                'description': 'part of filename',
                'default': '1',
                'type': 'str',
                'allowed': ['1', '2'],
            }
        ],
        'description': 'entry1 part',
        'direct_access': 'allow'
    }

    desc_open = union_cat.entry1_part.describe_open()
    assert desc_open['args']['urlpath'].endswith('entry1_1.csv')
    del desc_open['args']['urlpath']  # Full path will be system dependent
    assert desc_open == {
        'args': {'metadata': {'bar': [2, 4, 6], 'foo': 'baz'}},
        'description': 'entry1 part',
        'direct_access': 'allow',
        'metadata': {'bar': [2, 4, 6], 'foo': 'baz'},
        'plugin': 'csv'
    }

    # Implied creation of data source
    assert union_cat.entry1.container == 'dataframe'
    assert union_cat.entry1.metadata == dict(foo='bar', bar=[1, 2, 3])

    # Use default parameters in explict creation of data source
    assert union_cat.entry1_part().container == 'dataframe'
    # Specify parameters in creation of data source
    assert union_cat.entry1_part(part='2').container == 'dataframe'
Ejemplo n.º 10
0
def test_read_partition(intake_server):
    catalog = Catalog(intake_server)

    d = catalog.entry1.get()

    p2 = d.read_partition(1)
    p1 = d.read_partition(0)

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    assert pd.read_csv(file1).equals(p1)
    assert pd.read_csv(file2).equals(p2)
Ejemplo n.º 11
0
def test_read_chunks(intake_server):
    catalog = Catalog(intake_server)

    d = catalog.entry1.get()

    chunks = list(d.read_chunked())
    assert len(chunks) == 2

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))

    assert expected_df.equals(pd.concat(chunks))
Ejemplo n.º 12
0
def test_pickle(intake_server):
    catalog = Catalog(intake_server)

    d = catalog.entry1.get()

    new_d = pickle.loads(pickle.dumps(d, pickle.HIGHEST_PROTOCOL))

    df = new_d.read()

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))

    assert expected_df.equals(df)
Ejemplo n.º 13
0
def test_info_describe(intake_server):
    catalog = Catalog(intake_server)

    assert_items_equal(list(catalog),
                       ['use_example1', 'entry1', 'entry1_part'])

    info = catalog['entry1'].describe()

    assert info == {
        'container': 'dataframe',
        'description': 'entry1 full',
        'name': 'entry1',
        'direct_access': 'forbid',
        'user_parameters': []
    }

    info = catalog['entry1_part'].describe()

    assert info['direct_access'] == 'allow'
Ejemplo n.º 14
0
def test_reload_newfile(intake_server):
    catalog = Catalog(intake_server)

    orig_entries = list(catalog)
    assert 'example2' not in orig_entries

    filename = os.path.join(TMP_DIR, 'intake_test_catalog2.yml')
    with open(filename, 'w') as f:
        f.write('''
sources:
  example2:
    description: source 2
    driver: csv
    args: {}
        ''')

    time.sleep(2)

    assert_items_equal(list(catalog), ['example2'] + orig_entries)
Ejemplo n.º 15
0
def test_reload_missing_local_directory(tmpdir):
    catalog = Catalog(tmpdir)
    assert_items_equal(list(catalog), [])

    os.mkdir(tmpdir)
    with open(os.path.join(tmpdir, YAML_FILENAME), 'w') as f:
        f.write('''
plugins:
  source:
    - module: intake.catalog.tests.example1_source
    - dir: '{{ CATALOG_DIR }}/example_plugin_dir'
sources:
  use_example1:
    description: example1 source plugin
    driver: example1
    args: {}
        ''')

    assert_items_equal(list(catalog), ['use_example1'])
Ejemplo n.º 16
0
def test_catalog_description():
    """Make sure the description comes through the save."""

    cat1 = Catalog.from_dict({
                'name': LocalCatalogEntry('name',
                                          description='description',
                                          driver=intake.catalog.local.YAMLFileCatalog,
                                            )
    },
                name='overall catalog name',
                description='overall catalog description'

    )

    cat1.save('desc_test.yaml')

    cat2 = intake.open_catalog('desc_test.yaml')

    assert cat2.description is not None
Ejemplo n.º 17
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Intake Catalog Server')
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        default=5000,
                        help='port number for server to listen on')
    parser.add_argument(
        '--sys-exit-on-sigterm',
        action='store_true',
        help=
        'internal flag used during unit testing to ensure .coverage file is written'
    )
    parser.add_argument('catalog_args',
                        metavar='FILE',
                        type=str,
                        nargs='+',
                        help='Name of catalog YAML file')
    args = parser.parse_args(argv[1:])

    if args.sys_exit_on_sigterm:
        signal.signal(signal.SIGTERM, call_exit_on_sigterm)

    print('Creating catalog from:')
    for arg in args.catalog_args:
        print('  - %s' % arg)

    print("catalog_args", args.catalog_args)
    catalog = Catalog(args.catalog_args)

    print('Entries:', ','.join(list(catalog)))

    print('Listening on port %d' % args.port)

    server = IntakeServer(catalog)
    app = server.make_app()
    server.start_periodic_functions(close_idle_after=3600.0)

    app.listen(args.port)
    tornado.ioloop.IOLoop.current().start()
Ejemplo n.º 18
0
def test_read(intake_server):
    catalog = Catalog(intake_server)

    d = catalog['entry1'].get()

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))
    meta = expected_df[:0]

    info = d.discover()
    assert info['datashape'] is None
    assert info['dtype'].equals(meta)
    assert info['npartitions'] == 2
    assert info['shape'] == (None, 3)  # Do not know CSV size ahead of time

    assert d.metadata == dict(foo='bar', bar=[1, 2, 3])

    df = d.read()

    assert not d.direct  # this should be proxied

    assert expected_df.equals(df)
Ejemplo n.º 19
0
def test_read_direct(intake_server):
    catalog = Catalog(intake_server)

    d = catalog['entry1_part'].get(part='2')
    test_dir = os.path.dirname(__file__)
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.read_csv(file2)
    meta = expected_df[:0]

    info = d.discover()

    assert info['datashape'] is None
    assert info['dtype'].equals(meta)
    assert info['npartitions'] == 1
    assert info['shape'] == (None, 3)  # Do not know CSV size ahead of time
    assert info['metadata'] == {'bar': [2, 4, 6], 'foo': 'baz'}

    assert d.metadata == dict(foo='baz', bar=[2, 4, 6])
    assert d.description == 'entry1 part'
    df = d.read()

    assert d.direct  # this should be direct

    assert expected_df.equals(df)
Ejemplo n.º 20
0
def test_remote_cat(http_server):
    url = http_server + 'catalog1.yml'
    cat = Catalog(url)
    assert 'entry1' in cat
    assert cat.entry1.describe()
Ejemplo n.º 21
0
def test_bad_url(intake_server):
    bad_url = intake_server + '/nonsense_prefix'

    with pytest.raises(Exception):
        Catalog(bad_url)
Ejemplo n.º 22
0
def test_duplicate_parameters():
    path = os.path.dirname(__file__)
    uri = os.path.join(path, 'catalog_dup_parameters.yml')

    with pytest.raises(exceptions.DuplicateKeyError) as except_info:
        c = Catalog(uri)
Ejemplo n.º 23
0
def test_empty_catalog():
    cat = Catalog()
    assert list(cat) == []
Ejemplo n.º 24
0
def test_unknown_source(intake_server):
    catalog = Catalog(intake_server)

    with pytest.raises(Exception):
        catalog['does_not_exist'].describe()
Ejemplo n.º 25
0
def catalog1():
    path = os.path.dirname(__file__)
    return Catalog(os.path.join(path, 'catalog1.yml'))
Ejemplo n.º 26
0
def test_parser_validation_error(filename):
    with pytest.raises(exceptions.ValidationError):
        Catalog(abspath(filename + ".yml"))
Ejemplo n.º 27
0
def test_remote_datasource_interface(intake_server):
    catalog = Catalog(intake_server)

    d = catalog['entry1'].get()

    verify_datasource_interface(d)
Ejemplo n.º 28
0
def test_secret_auth_fail(intake_server_with_auth):
    auth = SecretClientAuth(secret='test_wrong_secret')
    with pytest.raises(Exception):
        catalog = Catalog(intake_server_with_auth, auth=auth)
Ejemplo n.º 29
0
def test_parser_obsolete_error(filename):
    with pytest.raises(exceptions.ObsoleteError):
        Catalog(abspath(filename + ".yml"))
Ejemplo n.º 30
0
def test_secret_auth(intake_server_with_auth):
    auth = SecretClientAuth(secret='test_secret')
    catalog = Catalog(intake_server_with_auth, auth=auth)

    entries = list(catalog)
    assert entries == ['use_example1']
Ejemplo n.º 31
0
 def get_app(self):
     catalog_file = os.path.join(os.path.dirname(__file__), 'catalog1.yml')
     local_catalog = Catalog(catalog_file)
     handlers = get_browser_handlers(local_catalog)
     return tornado.web.Application(handlers)