Esempio n. 1
0
    def data_glob(self, path=None, glob='*', docopy=None):
        """Get a list of files"""
        if path is None:
            path = self.input_remote
        else:
            self.input_remote = path
        if docopy is None:
            docopy = self.docopy

        # Get full path and proceed depending on whether
        # is a local path or URL.
        root = self.bigdata_root
        if op.exists(root):
            root_path = op.join(root, self._inputs_root, self._env)
            root_len = len(root_path) + 1
            path = op.join(root_path, path)
            file_paths = _data_glob_local(path, glob)
        elif check_url(root):
            root_len = len(self._env) + 1
            file_paths = _data_glob_url(self._inputs_root,
                                        self._env,
                                        path,
                                        glob,
                                        root=root)
        else:
            raise BigdataError('Path cannot be found: {}'.format(path))

        # Remove the root from the paths
        file_paths = [file_path[root_len:] for file_path in file_paths]
        return file_paths
Esempio n. 2
0
    def data_glob(self, *pathargs, glob='*'):
        """Retrieve file list matching glob

        Parameters
        ----------
        pathargs: (str[, ...])
            Path components

        glob: str
            The file name match criterion

        Returns
        -------
        file_paths: [str[, ...]]
            File paths that match the glob criterion.
            Note that the TEST_BIGDATA and `repo_path`
            roots are removed so these results can be fed
            back into `get_data()`
        """

        # Get full path and proceed depending on whether
        # is a local path or URL.
        root = op.join(get_bigdata_root(), *self.repo_path)
        path = op.join(root, *pathargs)
        if op.exists(path):
            file_paths = _data_glob_local(path, glob)
        elif check_url(path):
            file_paths = _data_glob_url(path, glob)
        else:
            raise BigdataError('Path cannot be found: {}'.format(path))

        # Remove the root from the paths
        root_len = len(root) + 1  # +1 to account for the folder delimiter.
        file_paths = [file_path[root_len:] for file_path in file_paths]
        return file_paths
Esempio n. 3
0
def download_crds(refname, timeout=30, verbose=False):
    """
    Download a CRDS file from HTTP to current directory.

    Parameters
    ----------
    refname : str
        Filename. Examples::

            '012345678_bia.fits'
            'jref$012345678_bia.fits'
            '/path/to/012345678_bia.fits'

        But only filename with ``dir$name`` format would
        proceed to download stage.

    timeout : int or `None`
        Number of seconds before timeout error is raised.
        If `None`, no timeout happens but this is not recommended.

    verbose : bool
        If `True`, print messages to screen.
        This is useful for debugging.

    """
    refdir = None

    # Expand IRAF-style dir shortcut.
    if '$' in refname:
        refdir, fname = refname.split('$')
        if refdir in os.environ:
            refname = os.path.join(os.environ[refdir], fname)
        else:
            refname = fname

    # CRDS file for given name never changes, so no need to re-download
    # if already copied over prior or directly accessible on disk somewhere.
    if os.path.isfile(refname):
        if verbose:
            print('{} already exists, skipping download'.format(refname))
        return

    # Do not know where to download.
    if refdir is None:
        raise ValueError('Unknown HTTP destination for {}'.format(refname))

    from ci_watson.artifactory_helpers import check_url, _download

    # NOTE: For this part to work, jref (for example) must point to
    #       "." or reference file value in FITS header cannot have "jref$".
    url = 'http://ssb.stsci.edu/cdbs/{}/{}'.format(refdir, fname)
    if check_url(url):
        _download(url, fname, timeout=timeout)
    else:
        raise ValueError('Invalid URL {}'.format(url))

    if verbose:
        print('Downloaded {} from {}'.format(refname, url))
Esempio n. 4
0
    def set_environ(self):
        # Enforce copies of data when TEST_BIGDATA is URL
        input_dir = get_bigdata_root()

        if input_dir and check_url(input_dir):
            self.docopy = True

        # NOTE: This could be explicitly controlled using pytest fixture
        #       but too many ways to do the same thing would be confusing.
        #       Refine this logic if using pytest fixture.
        # HSTCAL cannot open remote CRDS on FTP but central storage is okay.
        # So use central storage if available to avoid FTP.
        if self.prevref is None or self.prevref.startswith(('ftp', 'http')):
            os.environ[self.refstr] = self.curdir + os.sep
            self.use_ftp_crds = True

        # Turn off Astrometry updates
        os.environ['ASTROMETRY_STEP_CONTROL'] = 'OFF'
Esempio n. 5
0
    def set_environ(self):
        # Enforce copies of data when TEST_BIGDATA is URL
        input_dir = get_bigdata_root()

        if input_dir and check_url(input_dir):
            self.docopy = True

        # NOTE: This could be explicitly controlled using pytest fixture
        #       but too many ways to do the same thing would be confusing.
        #       Refine this logic if using pytest fixture.
        # HSTCAL cannot open remote CRDS on FTP but central storage is okay.
        # So use central storage if available to avoid FTP.
        if self.prevref is None or self.prevref.startswith(('ftp', 'http')):
            os.environ[self.refstr] = self.curdir + os.sep
            self.use_ftp_crds = True

        # Turn off Astrometry updates
        os.environ['ASTROMETRY_STEP_CONTROL'] = 'OFF'
Esempio n. 6
0
    def data_glob(self, *pathargs, glob='*'):
        """Retrieve file list matching glob

        Parameters
        ----------
        pathargs: (str[, ...])
            Path components

        glob: str
            The file name match criterion

        Returns
        -------
        file_paths: [str[, ...]]
            File paths that match the glob criterion.
            Note that the TEST_BIGDATA and `repo_path`
            roots are removed so these results can be fed
            back into `get_data()`
        """

        # Get full path and proceed depending on whether
        # is a local path or URL.
        root = op.join(get_bigdata_root(), *self.repo_path)
        path = op.join(root, *pathargs)
        if op.exists(path):
            file_paths = _data_glob_local(path, glob)
        elif check_url(path):
            file_paths = _data_glob_url(path, glob)
        else:
            raise BigdataError('Path cannot be found: {}'.format(path))

        # Remove the root from the paths
        root_len = len(root) + 1  # +1 to account for the folder delimiter.
        file_paths = [
            file_path[root_len:]
            for file_path in file_paths
        ]
        return file_paths
def test_check_url(val, ans):
    assert check_url(val) is ans
Esempio n. 8
0
 def auto_toggle_docopy(self):
     bigdata_root = get_bigdata_root()
     if bigdata_root and check_url(bigdata_root):
         self.docopy = True
     else:
         self.docopy = False
Esempio n. 9
0
 def auto_toggle_docopy(self):
     bigdata_root = get_bigdata_root()
     if bigdata_root and check_url(bigdata_root):
         self.docopy = True
     else:
         self.docopy = False