def test_get_globus_http_url(mock_projects): pc = PilotClient() pc.project.current = 'foo-project' url = pc.get_globus_http_url('foo.txt') purl = urlparse(url) foo = MOCK_PROJECTS['foo-project'] assert purl.netloc == foo['endpoint'] + '.e.globus.org' assert purl.scheme == 'https' assert purl.path == '/foo_folder/foo.txt'
def test_get_globus_url(mock_projects): foo = MOCK_PROJECTS['foo-project'] pc = PilotClient() pc.project.current = 'foo-project' url = pc.get_globus_url('metadata/foo.txt') purl = urlparse(url) assert purl.netloc == foo['endpoint'] assert purl.scheme == 'globus' assert purl.path == '/foo_folder/metadata/foo.txt'
def test_get_globus_app_url(mock_projects): pc = PilotClient() pc.project.current = 'foo-project' url = pc.get_globus_app_url('metadata/foo.txt') purl = urlparse(url) assert purl.netloc == 'app.globus.org' assert purl.scheme == 'https' assert purl.path == '/file-manager' assert purl.query == 'origin_id=foo-project-endpoint&' \ 'origin_path=%2Ffoo_folder%2Fmetadata%2Ffoo.txt'
def test_get_portal_url(mock_projects, mock_context): pc = PilotClient() pc.project.current = 'foo-project' assert pc.get_portal_url('') == 'https://myportal/foo-project/' assert pc.get_portal_url('foo') == ( 'https://myportal/foo-project/' 'globus%253A%252F%252Ffoo-project-endpoint%252Ffoo_folder%252Ffoo/') cfg = mock_context.load() del cfg['contexts']['test-context']['projects_portal_url'] mock_context.save(cfg) assert pc.get_portal_url('foo') is None
def list_command(test, output_json, limit): # Should require login if there are publicly visible records pc = PilotClient() if not pc.is_logged_in(): click.echo('You are not logged in.') return search_authorizer = pc.get_authorizers()['search.api.globus.org'] sc = globus_sdk.SearchClient(authorizer=search_authorizer) # TO DO: iterate instead of upping limit search_results = sc.search(index_id=pc.get_index(test), q='*', limit=limit) if output_json: click.echo(json.dumps(search_results.data, indent=4)) return fmt = '{:21.20}{:11.10}{:10.9}{:7.6}{:7.6}{:7.6}{}' columns = [ ('Title', lambda r: r['dc']['titles'][0]['title']), ('Data', lambda r: r['ncipilot']['data_type']), ('Dataframe', lambda r: r['ncipilot']['dataframe_type']), ('Rows', lambda r: str(r['field_metadata']['numrows'])), ('Cols', lambda r: str(r['field_metadata']['numcols'])), ('Size', get_size), ('Filename', get_identifier), ] # Build row data rows = [] for result in search_results['gmeta']: content = result['content'][0] if content.get('testing'): content = content['testing'] row = [] for _, function in columns: try: row.append(function(content)) except Exception: row.append('') # raise rows.append(row) formatted_rows = [fmt.format(*r) for r in rows] header = fmt.format(*[c[0] for c in columns]) output = '{}\n{}'.format(header, '\n'.join(formatted_rows)) click.echo(output)
def process_hdfs(event): """Process the hdf file output from a `process_corr` run. The output is ready to be placed on petreldata.net.""" import os import XPCS from XPCS.tools.xpcs_metadata import gather from XPCS.tools.xpcs_plots import make_plots from XPCS.tools.xpcs_qc import check_hdf_dataset from pilot.client import PilotClient staging = event['staging'] ver = XPCS.xpcs_version pc = PilotClient() assert pc.context.current == 'xpcs', 'Not in XPCS context!' pc.project.current = 'nick-testing' skipped = 0 outputs = {} for hdf_dir in os.listdir(staging): os.chdir(os.path.join(staging, hdf_dir)) try: outputs[hdf_dir] = {} hdf_file = f'{hdf_dir}.hdf' if not os.path.exists(hdf_file): raise ValueError(f'{hdf_file} does not exist!') if check_hdf_dataset(hdf_file) is False: skipped += 1 metadata = gather(hdf_file) outputs[hdf_dir]['make_plots'] = make_plots(hdf_file) metadata.update(event['custom_metadata']) os.chdir(staging) outputs[hdf_dir]['pilot'] = pc.upload(hdf_dir, '/', metadata=metadata, update=True, skip_analysis=True) except Exception as e: outputs[hdf_dir] = str(e) return { 'total': len(os.listdir(staging)), 'skipped': skipped, 'outputs': outputs, 'version': ver, }
def test_special_paths(mock_projects): pc = PilotClient() pc.project.current = 'foo-project' assert pc.get_path('///') == '/foo_folder' assert pc.get_path('.') == '/foo_folder' assert pc.get_path('..') == '/foo_folder' assert pc.get_path('/foo/bar/baz.txt') == '/foo_folder/foo/bar/baz.txt'
def test_build_short_path(mock_projects): pc = PilotClient() pc.project.current = 'foo-project' assert pc.build_short_path('foo', '/') == 'foo' assert pc.build_short_path('/foo/', '/') == 'foo' assert pc.build_short_path('foo', 'bar') == 'bar/foo' assert pc.build_short_path('/foo/', '/bar/') == 'bar/foo'
def test_get_subject_url(mock_projects): pc = PilotClient() pc.project.current = 'foo-project' args = ('myfolder/dataframe.dat', ) assert pc.get_globus_url(*args) == pc.get_subject_url(*args)
def test_get_index(mock_projects): pc = PilotClient() pc.project.current = 'foo-project' assert pc.get_index() == 'foo-search-index' assert pc.get_index(project='foo-project-test') == 'foo-test-search-index'
def test_get_path(mock_projects): pc = PilotClient() pc.project.current = 'foo-project' assert pc.get_path('folder/file.txt') == '/foo_folder/folder/file.txt' path = pc.get_path('folder/file.txt', project='foo-project-test') assert path == '/foo_test_folder/folder/file.txt'
def test_invalid_project_with_explicit_name(mock_projects): pc = PilotClient() with pytest.raises(PilotInvalidProject): pc.get_index('does-not-exist')
def test_invalid_project(mock_projects): pc = PilotClient() with pytest.raises(PilotInvalidProject): pc.get_index()
def get_pilot_client(): logging_cfg.setup_logging(level='CRITICAL') return PilotClient()
def describe(path, test, output_json): pc = PilotClient() if not pc.is_logged_in(): click.echo('You are not logged in.') return old_entry = False fname, dirname = os.path.basename(path), os.path.dirname(path) entry = pc.get_search_entry(fname, dirname, test) if not entry: old_entry = True entry = pc.get_search_entry(fname, dirname, old=True) if not entry: click.echo('Unable to find entry') return if output_json: click.echo(json.dumps(entry, indent=4)) return general_fmt = '{:21.20}{}' general_columns = [ ('Title', lambda r: r['dc']['titles'][0]['title']), ('Authors', lambda r: [c['creatorName'] for c in r['dc']['creators']]), ('Publisher', lambda r: r['dc']['publisher']), ('Subjects', lambda r: [s['subject'] for s in r['dc']['subjects']]), ('Dates', get_dates), ('Data', lambda r: r['ncipilot']['data_type']), ('Dataframe', lambda r: r['ncipilot']['dataframe_type']), ('Rows', lambda r: str(r['field_metadata']['numrows'])), ('Columns', lambda r: str(r['field_metadata']['numcols'])), ('Formats', lambda r: r['dc']['formats']), ('Version', lambda r: r['dc']['version']), ('Size', get_size), ('Filename', get_identifier), ('Description', lambda r: r['dc']['descriptions'][0]['description']), ] def format_list(name, content): return [general_fmt.format(name, content[0])] + \ [general_fmt.format('', item) for item in content[1:]] def format_entry(name, content): return [general_fmt.format(name, content)] output = fetch_format(general_columns, entry, format_entry, format_list) fmt = ('{:21.20}' '{:8.7}{:7.6}{:5.4}{:12.11}{:7.6}' '{:7.6}{:7.6}{:7.6}{:7.6}' '{:8.7}{:8.7}{:8.7}' ) field_metadata = [ ('Column Name', 'name'), ('Type', 'type'), ('Count', 'count'), ('Freq', 'frequency'), ('Top', 'top'), ('Unique', 'unique'), ('Min', 'min'), ('Max', 'max'), ('Mean', 'mean'), ('Std', 'std'), ('25-PCTL', '25'), ('50-PCTL', '50'), ('75-PCTL', '75'), ] names = [n for n, f in field_metadata] keys = [f for n, f in field_metadata] fm_output = [] try: for field in entry['field_metadata']['field_definitions']: f_metadata = [str(field.get(key, '')) for key in keys] fm_output.append(fmt.format(*f_metadata)) field_metadata_names = fmt.format(*names) output = '{}\n\nField Metadata\n{}\n{}'.format(output, field_metadata_names, '\n'.join(fm_output)) except KeyError: output = '{}\n\nField Metadata\nNo Field Metadata'.format(output) if not test: sub = pc.get_subject_url(fname, dirname, test, old=old_entry) qsub = urllib.parse.quote_plus(urllib.parse.quote_plus(sub)) portal_url = '{}{}'.format(PORTAL_DETAIL_PAGE_PREFIX, qsub) other_data = [general_fmt.format('Subject', sub), general_fmt.format(path, portal_url)] output = '{}\n\nOther Data\n{}'.format(output, '\n'.join(other_data)) click.echo(output)