def test_get_globus_http_url(mock_projects):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    url = pc.get_globus_http_url('foo.txt')
    purl = urlparse(url)
    foo = MOCK_PROJECTS['foo-project']
    assert purl.netloc == foo['endpoint'] + '.e.globus.org'
    assert purl.scheme == 'https'
    assert purl.path == '/foo_folder/foo.txt'
def test_get_globus_url(mock_projects):
    foo = MOCK_PROJECTS['foo-project']
    pc = PilotClient()
    pc.project.current = 'foo-project'
    url = pc.get_globus_url('metadata/foo.txt')
    purl = urlparse(url)
    assert purl.netloc == foo['endpoint']
    assert purl.scheme == 'globus'
    assert purl.path == '/foo_folder/metadata/foo.txt'
def test_get_globus_app_url(mock_projects):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    url = pc.get_globus_app_url('metadata/foo.txt')
    purl = urlparse(url)
    assert purl.netloc == 'app.globus.org'
    assert purl.scheme == 'https'
    assert purl.path == '/file-manager'
    assert purl.query == 'origin_id=foo-project-endpoint&' \
                         'origin_path=%2Ffoo_folder%2Fmetadata%2Ffoo.txt'
def test_get_portal_url(mock_projects, mock_context):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    assert pc.get_portal_url('') == 'https://myportal/foo-project/'
    assert pc.get_portal_url('foo') == (
        'https://myportal/foo-project/'
        'globus%253A%252F%252Ffoo-project-endpoint%252Ffoo_folder%252Ffoo/')
    cfg = mock_context.load()
    del cfg['contexts']['test-context']['projects_portal_url']
    mock_context.save(cfg)
    assert pc.get_portal_url('foo') is None
Esempio n. 5
0
def list_command(test, output_json, limit):
    # Should require login if there are publicly visible records
    pc = PilotClient()
    if not pc.is_logged_in():
        click.echo('You are not logged in.')
        return

    search_authorizer = pc.get_authorizers()['search.api.globus.org']
    sc = globus_sdk.SearchClient(authorizer=search_authorizer)
    # TO DO: iterate instead of upping limit
    search_results = sc.search(index_id=pc.get_index(test), q='*', limit=limit)

    if output_json:
        click.echo(json.dumps(search_results.data, indent=4))
        return

    fmt = '{:21.20}{:11.10}{:10.9}{:7.6}{:7.6}{:7.6}{}'
    columns = [
        ('Title', lambda r: r['dc']['titles'][0]['title']),
        ('Data', lambda r: r['ncipilot']['data_type']),
        ('Dataframe', lambda r: r['ncipilot']['dataframe_type']),
        ('Rows', lambda r: str(r['field_metadata']['numrows'])),
        ('Cols', lambda r: str(r['field_metadata']['numcols'])),
        ('Size', get_size),
        ('Filename', get_identifier),
    ]

    # Build row data
    rows = []
    for result in search_results['gmeta']:
        content = result['content'][0]
        if content.get('testing'):
            content = content['testing']
        row = []
        for _, function in columns:
            try:
                row.append(function(content))
            except Exception:
                row.append('')
                # raise
        rows.append(row)

    formatted_rows = [fmt.format(*r) for r in rows]
    header = fmt.format(*[c[0] for c in columns])
    output = '{}\n{}'.format(header, '\n'.join(formatted_rows))
    click.echo(output)
Esempio n. 6
0
def process_hdfs(event):
    """Process the hdf file output from a `process_corr` run. The output is
    ready to be placed on petreldata.net."""
    import os
    import XPCS
    from XPCS.tools.xpcs_metadata import gather
    from XPCS.tools.xpcs_plots import make_plots
    from XPCS.tools.xpcs_qc import check_hdf_dataset
    from pilot.client import PilotClient
    staging = event['staging']
    ver = XPCS.xpcs_version
    pc = PilotClient()

    assert pc.context.current == 'xpcs', 'Not in XPCS context!'
    pc.project.current = 'nick-testing'

    skipped = 0
    outputs = {}
    for hdf_dir in os.listdir(staging):
        os.chdir(os.path.join(staging, hdf_dir))
        try:
            outputs[hdf_dir] = {}
            hdf_file = f'{hdf_dir}.hdf'
            if not os.path.exists(hdf_file):
                raise ValueError(f'{hdf_file} does not exist!')
            if check_hdf_dataset(hdf_file) is False:
                skipped += 1

            metadata = gather(hdf_file)
            outputs[hdf_dir]['make_plots'] = make_plots(hdf_file)
            metadata.update(event['custom_metadata'])
            os.chdir(staging)
            outputs[hdf_dir]['pilot'] = pc.upload(hdf_dir,
                                                  '/',
                                                  metadata=metadata,
                                                  update=True,
                                                  skip_analysis=True)
        except Exception as e:
            outputs[hdf_dir] = str(e)
    return {
        'total': len(os.listdir(staging)),
        'skipped': skipped,
        'outputs': outputs,
        'version': ver,
    }
def test_special_paths(mock_projects):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    assert pc.get_path('///') == '/foo_folder'
    assert pc.get_path('.') == '/foo_folder'
    assert pc.get_path('..') == '/foo_folder'
    assert pc.get_path('/foo/bar/baz.txt') == '/foo_folder/foo/bar/baz.txt'
def test_build_short_path(mock_projects):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    assert pc.build_short_path('foo', '/') == 'foo'
    assert pc.build_short_path('/foo/', '/') == 'foo'

    assert pc.build_short_path('foo', 'bar') == 'bar/foo'
    assert pc.build_short_path('/foo/', '/bar/') == 'bar/foo'
def test_get_subject_url(mock_projects):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    args = ('myfolder/dataframe.dat', )
    assert pc.get_globus_url(*args) == pc.get_subject_url(*args)
def test_get_index(mock_projects):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    assert pc.get_index() == 'foo-search-index'
    assert pc.get_index(project='foo-project-test') == 'foo-test-search-index'
def test_get_path(mock_projects):
    pc = PilotClient()
    pc.project.current = 'foo-project'
    assert pc.get_path('folder/file.txt') == '/foo_folder/folder/file.txt'
    path = pc.get_path('folder/file.txt', project='foo-project-test')
    assert path == '/foo_test_folder/folder/file.txt'
def test_invalid_project_with_explicit_name(mock_projects):
    pc = PilotClient()
    with pytest.raises(PilotInvalidProject):
        pc.get_index('does-not-exist')
def test_invalid_project(mock_projects):
    pc = PilotClient()
    with pytest.raises(PilotInvalidProject):
        pc.get_index()
Esempio n. 14
0
def get_pilot_client():
    logging_cfg.setup_logging(level='CRITICAL')
    return PilotClient()
Esempio n. 15
0
def describe(path, test, output_json):
    pc = PilotClient()
    if not pc.is_logged_in():
        click.echo('You are not logged in.')
        return

    old_entry = False
    fname, dirname = os.path.basename(path), os.path.dirname(path)
    entry = pc.get_search_entry(fname, dirname, test)
    if not entry:
        old_entry = True
        entry = pc.get_search_entry(fname, dirname, old=True)

    if not entry:
        click.echo('Unable to find entry')
        return

    if output_json:
        click.echo(json.dumps(entry, indent=4))
        return

    general_fmt = '{:21.20}{}'
    general_columns = [
        ('Title', lambda r: r['dc']['titles'][0]['title']),
        ('Authors', lambda r: [c['creatorName'] for c in r['dc']['creators']]),
        ('Publisher', lambda r: r['dc']['publisher']),
        ('Subjects', lambda r: [s['subject'] for s in r['dc']['subjects']]),
        ('Dates', get_dates),
        ('Data', lambda r: r['ncipilot']['data_type']),
        ('Dataframe', lambda r: r['ncipilot']['dataframe_type']),
        ('Rows', lambda r: str(r['field_metadata']['numrows'])),
        ('Columns', lambda r: str(r['field_metadata']['numcols'])),
        ('Formats', lambda r: r['dc']['formats']),
        ('Version', lambda r: r['dc']['version']),
        ('Size', get_size),
        ('Filename', get_identifier),
        ('Description', lambda r: r['dc']['descriptions'][0]['description']),
    ]

    def format_list(name, content):
        return [general_fmt.format(name, content[0])] + \
               [general_fmt.format('', item) for item in content[1:]]

    def format_entry(name, content):
        return [general_fmt.format(name, content)]

    output = fetch_format(general_columns, entry, format_entry, format_list)

    fmt = ('{:21.20}'
           '{:8.7}{:7.6}{:5.4}{:12.11}{:7.6}'
           '{:7.6}{:7.6}{:7.6}{:7.6}'
           '{:8.7}{:8.7}{:8.7}'
           )
    field_metadata = [
        ('Column Name', 'name'),

        ('Type', 'type'),
        ('Count', 'count'),
        ('Freq', 'frequency'),
        ('Top', 'top'),
        ('Unique', 'unique'),

        ('Min', 'min'),
        ('Max', 'max'),
        ('Mean', 'mean'),
        ('Std', 'std'),

        ('25-PCTL', '25'),
        ('50-PCTL', '50'),
        ('75-PCTL', '75'),
    ]
    names = [n for n, f in field_metadata]
    keys = [f for n, f in field_metadata]
    fm_output = []
    try:
        for field in entry['field_metadata']['field_definitions']:
            f_metadata = [str(field.get(key, '')) for key in keys]
            fm_output.append(fmt.format(*f_metadata))

        field_metadata_names = fmt.format(*names)
        output = '{}\n\nField Metadata\n{}\n{}'.format(output,
                                                       field_metadata_names,
                                                       '\n'.join(fm_output))
    except KeyError:
        output = '{}\n\nField Metadata\nNo Field Metadata'.format(output)

    if not test:
        sub = pc.get_subject_url(fname, dirname, test, old=old_entry)
        qsub = urllib.parse.quote_plus(urllib.parse.quote_plus(sub))
        portal_url = '{}{}'.format(PORTAL_DETAIL_PAGE_PREFIX, qsub)
        other_data = [general_fmt.format('Subject', sub),
                      general_fmt.format(path, portal_url)]
        output = '{}\n\nOther Data\n{}'.format(output, '\n'.join(other_data))

    click.echo(output)