def data_glob(self, path=None, glob='*', docopy=None): """Get a list of files""" if path is None: path = self.input_remote else: self.input_remote = path if docopy is None: docopy = self.docopy # Get full path and proceed depending on whether # is a local path or URL. root = self.bigdata_root if op.exists(root): root_path = op.join(root, self._inputs_root, self._env) root_len = len(root_path) + 1 path = op.join(root_path, path) file_paths = _data_glob_local(path, glob) elif check_url(root): root_len = len(self._env) + 1 file_paths = _data_glob_url(self._inputs_root, self._env, path, glob, root=root) else: raise BigdataError('Path cannot be found: {}'.format(path)) # Remove the root from the paths file_paths = [file_path[root_len:] for file_path in file_paths] return file_paths
def data_glob(self, *pathargs, glob='*'): """Retrieve file list matching glob Parameters ---------- pathargs: (str[, ...]) Path components glob: str The file name match criterion Returns ------- file_paths: [str[, ...]] File paths that match the glob criterion. Note that the TEST_BIGDATA and `repo_path` roots are removed so these results can be fed back into `get_data()` """ # Get full path and proceed depending on whether # is a local path or URL. root = op.join(get_bigdata_root(), *self.repo_path) path = op.join(root, *pathargs) if op.exists(path): file_paths = _data_glob_local(path, glob) elif check_url(path): file_paths = _data_glob_url(path, glob) else: raise BigdataError('Path cannot be found: {}'.format(path)) # Remove the root from the paths root_len = len(root) + 1 # +1 to account for the folder delimiter. file_paths = [file_path[root_len:] for file_path in file_paths] return file_paths
def download_crds(refname, timeout=30, verbose=False): """ Download a CRDS file from HTTP to current directory. Parameters ---------- refname : str Filename. Examples:: '012345678_bia.fits' 'jref$012345678_bia.fits' '/path/to/012345678_bia.fits' But only filename with ``dir$name`` format would proceed to download stage. timeout : int or `None` Number of seconds before timeout error is raised. If `None`, no timeout happens but this is not recommended. verbose : bool If `True`, print messages to screen. This is useful for debugging. """ refdir = None # Expand IRAF-style dir shortcut. if '$' in refname: refdir, fname = refname.split('$') if refdir in os.environ: refname = os.path.join(os.environ[refdir], fname) else: refname = fname # CRDS file for given name never changes, so no need to re-download # if already copied over prior or directly accessible on disk somewhere. if os.path.isfile(refname): if verbose: print('{} already exists, skipping download'.format(refname)) return # Do not know where to download. if refdir is None: raise ValueError('Unknown HTTP destination for {}'.format(refname)) from ci_watson.artifactory_helpers import check_url, _download # NOTE: For this part to work, jref (for example) must point to # "." or reference file value in FITS header cannot have "jref$". url = 'http://ssb.stsci.edu/cdbs/{}/{}'.format(refdir, fname) if check_url(url): _download(url, fname, timeout=timeout) else: raise ValueError('Invalid URL {}'.format(url)) if verbose: print('Downloaded {} from {}'.format(refname, url))
def set_environ(self): # Enforce copies of data when TEST_BIGDATA is URL input_dir = get_bigdata_root() if input_dir and check_url(input_dir): self.docopy = True # NOTE: This could be explicitly controlled using pytest fixture # but too many ways to do the same thing would be confusing. # Refine this logic if using pytest fixture. # HSTCAL cannot open remote CRDS on FTP but central storage is okay. # So use central storage if available to avoid FTP. if self.prevref is None or self.prevref.startswith(('ftp', 'http')): os.environ[self.refstr] = self.curdir + os.sep self.use_ftp_crds = True # Turn off Astrometry updates os.environ['ASTROMETRY_STEP_CONTROL'] = 'OFF'
def data_glob(self, *pathargs, glob='*'): """Retrieve file list matching glob Parameters ---------- pathargs: (str[, ...]) Path components glob: str The file name match criterion Returns ------- file_paths: [str[, ...]] File paths that match the glob criterion. Note that the TEST_BIGDATA and `repo_path` roots are removed so these results can be fed back into `get_data()` """ # Get full path and proceed depending on whether # is a local path or URL. root = op.join(get_bigdata_root(), *self.repo_path) path = op.join(root, *pathargs) if op.exists(path): file_paths = _data_glob_local(path, glob) elif check_url(path): file_paths = _data_glob_url(path, glob) else: raise BigdataError('Path cannot be found: {}'.format(path)) # Remove the root from the paths root_len = len(root) + 1 # +1 to account for the folder delimiter. file_paths = [ file_path[root_len:] for file_path in file_paths ] return file_paths
def test_check_url(val, ans): assert check_url(val) is ans
def auto_toggle_docopy(self): bigdata_root = get_bigdata_root() if bigdata_root and check_url(bigdata_root): self.docopy = True else: self.docopy = False