def data_glob(self, *pathargs, glob='*'): """Retrieve file list matching glob Parameters ---------- pathargs: (str[, ...]) Path components glob: str The file name match criterion Returns ------- file_paths: [str[, ...]] File paths that match the glob criterion. Note that the TEST_BIGDATA and `repo_path` roots are removed so these results can be fed back into `get_data()` """ # Get full path and proceed depending on whether # is a local path or URL. root = op.join(get_bigdata_root(), *self.repo_path) path = op.join(root, *pathargs) if op.exists(path): file_paths = _data_glob_local(path, glob) elif check_url(path): file_paths = _data_glob_url(path, glob) else: raise BigdataError('Path cannot be found: {}'.format(path)) # Remove the root from the paths root_len = len(root) + 1 # +1 to account for the folder delimiter. file_paths = [file_path[root_len:] for file_path in file_paths] return file_paths
def __init__(self, env="dev", inputs_root="jwst-pipeline", results_root="jwst-pipeline-results", docopy=True, input=None, input_remote=None, output=None, truth=None, truth_remote=None, remote_results_path=None, test_name=None, traceback=None, **kwargs): self._env = env self._inputs_root = inputs_root self._results_root = results_root self._bigdata_root = get_bigdata_root() self.docopy = docopy # Initialize @property attributes self.input = input self.input_remote = input_remote self.output = output self.truth = truth self.truth_remote = truth_remote # No @properties for the following attributes self.remote_results_path = remote_results_path self.test_name = test_name self.traceback = traceback # Initialize non-initialized attributes self.asn = None
def test_data_glob_url(glob_filter, nfiles, pytestconfig, request): """Test globbing over a URL Parameters ---------- glob_filter: str The glob filter to use. nfiles: int The number of files expected to find. """ inputs_root = pytestconfig.getini('inputs_root')[0] env = request.config.getoption('env') path = os.path.join(inputs_root, env, 'infrastructure/test_data_glob') files = _data_glob_url(path, glob_filter, root=get_bigdata_root()) assert len(files) == nfiles
def set_environ(self): # Enforce copies of data when TEST_BIGDATA is URL input_dir = get_bigdata_root() if input_dir and check_url(input_dir): self.docopy = True # NOTE: This could be explicitly controlled using pytest fixture # but too many ways to do the same thing would be confusing. # Refine this logic if using pytest fixture. # HSTCAL cannot open remote CRDS on FTP but central storage is okay. # So use central storage if available to avoid FTP. if self.prevref is None or self.prevref.startswith(('ftp', 'http')): os.environ[self.refstr] = self.curdir + os.sep self.use_ftp_crds = True # Turn off Astrometry updates os.environ['ASTROMETRY_STEP_CONTROL'] = 'OFF'
def _data_glob_url(url, glob='*'): """ Parameters ---------- url: str The URL to check glob: str The file name match criterion Returns ------- url_paths: [str[, ...]] Full URLS that match the glob criterion """ try: envkey = os.environ['API_KEY_FILE'] except KeyError: envkey = ARTIFACTORY_API_KEY_FILE try: with open(envkey) as fp: headers = {'X-JFrog-Art-Api': fp.readline().strip()} except (PermissionError, FileNotFoundError): print( "Warning: Anonymous Artifactory search requests are limited to " "1000 results. Use an API key and define API_KEY_FILE environment " "variable to get full search results.", file=sys.stderr) headers = None search_url = op.join(get_bigdata_root(), 'api/search/artifact') # Pick out "jwst-pipeline", the repo name repo = url.split('/')[4] params = {'name': glob, 'repos': repo} with requests.get(search_url, params=params, headers=headers) as r: url_paths = [ a['uri'].replace('api/storage/', '') for a in r.json()['results'] ] return url_paths
def data_glob(self, *pathargs, glob='*'): """Retrieve file list matching glob Parameters ---------- pathargs: (str[, ...]) Path components glob: str The file name match criterion Returns ------- file_paths: [str[, ...]] File paths that match the glob criterion. Note that the TEST_BIGDATA and `repo_path` roots are removed so these results can be fed back into `get_data()` """ # Get full path and proceed depending on whether # is a local path or URL. root = op.join(get_bigdata_root(), *self.repo_path) path = op.join(root, *pathargs) if op.exists(path): file_paths = _data_glob_local(path, glob) elif check_url(path): file_paths = _data_glob_url(path, glob) else: raise BigdataError('Path cannot be found: {}'.format(path)) # Remove the root from the paths root_len = len(root) + 1 # +1 to account for the folder delimiter. file_paths = [ file_path[root_len:] for file_path in file_paths ] return file_paths
def _data_glob_url(url, glob='*'): """ Parameters ---------- url: str The URL to check glob: str The file name match criterion Returns ------- url_paths: [str[, ...]] Full URLS that match the glob criterion """ try: envkey = os.environ['API_KEY_FILE'] except KeyError: envkey = ARTIFACTORY_API_KEY_FILE try: with open(envkey) as fp: headers = {'X-JFrog-Art-Api': fp.readline().strip()} except (PermissionError, FileNotFoundError): print("Warning: Anonymous Artifactory search requests are limited to " "1000 results. Use an API key and define API_KEY_FILE environment " "variable to get full search results.", file=sys.stderr) headers = None search_url = op.join(get_bigdata_root(), 'api/search/artifact') # Pick out "jwst-pipeline", the repo name repo = url.split('/')[4] params = {'name': glob, 'repos': repo} with requests.get(search_url, params=params, headers=headers) as r: url_paths = [a['uri'].replace('api/storage/', '') for a in r.json()['results']] return url_paths
def setup_class(self): self.root = get_bigdata_root()
def test_no_path(self): os.environ[self.key] = '/some/fake/path' assert get_bigdata_root(envkey=self.key) is None
def test_has_env_local(self): path = os.path.abspath(os.curdir) os.environ[self.key] = path assert get_bigdata_root(envkey=self.key) == path
def test_has_env_url(self): path = 'https://google.com' os.environ[self.key] = path assert get_bigdata_root(envkey=self.key) == path
def test_no_env(self): if self.key in os.environ: del os.environ[self.key] with pytest.raises(BigdataError): get_bigdata_root(envkey=self.key)
def auto_toggle_docopy(self): bigdata_root = get_bigdata_root() if bigdata_root and check_url(bigdata_root): self.docopy = True else: self.docopy = False