Beispiel #1
0
    def retrieve_data_from_uid(self, uids, *, cache=True):
        """
        Stage & Download ALMA data.  Will print out the expected file size
        before attempting the download.

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'
        cache : bool
            Whether to cache the downloads.

        Returns
        -------
        downloaded_files : list
            A list of the downloaded file paths
        """
        if isinstance(uids, (str, bytes)):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        files = self.get_data_info(uids)
        file_urls = files['access_url']
        totalsize = files['content_length'].sum()*u.B

        # each_size, totalsize = self.data_size(files)
        log.info("Downloading files of size {0}...".format(totalsize.to(u.GB)))
        # TODO: Add cache=cache keyword here.  Currently would have no effect.
        downloaded_files = self.download_files(file_urls)
        return downloaded_files
Beispiel #2
0
    def get_artifact(self, artifact_id, filename=None, verbose=False):
        """
        Download artifacts from EHST. Artifact is a single Hubble product file.

        Parameters
        ----------
        artifact_id : string
            id of the artifact to be downloaded, mandatory
            The identifier of the physical product (file) we want to retrieve.
        filename : string
            file name to be used to store the artifact, optional, default None
            File name for the artifact
        verbose : bool
            optional, default 'False'
            flag to display information about the process

        Returns
        -------
        None. It downloads the artifact indicated
        """

        params = {"ARTIFACT_ID": artifact_id}
        response = self._request('GET',
                                 self.data_url,
                                 save=True,
                                 cache=True,
                                 params=params)
        if filename is None:
            filename = artifact_id

        if verbose:
            log.info(self.data_url + "?ARTIFACT_ID=" + artifact_id)
            log.info(self.copying_string.format(filename))

        shutil.move(response, filename)
Beispiel #3
0
    def __init__(self, provider="AWS", profile=None, verbose=False):
        """
        Initialize class to enable downloading public files from S3
        instead of STScI servers.
        Requires the boto3 and botocore libraries to function.

        Parameters
        ----------
        provider : str
            Which cloud data provider to use. Currently only AWS S3 is supported,
            so at the moment this argument is ignored.
        profile : str
            Profile to use to identify yourself to the cloud provider (usually in ~/.aws/config).
        verbose : bool
            Default False. Display extra info and warnings if true.
        """

        # Dealing with deprecated argument
        if profile is not None:
            warnings.warn(("MAST Open Data on AWS is now free to access and does "
                           "not require an AWS account"), AstropyDeprecationWarning)

        import boto3
        import botocore

        self.supported_missions = ["mast:hst/product", "mast:tess/product", "mast:kepler"]

        self.boto3 = boto3
        self.botocore = botocore
        self.config = botocore.client.Config(signature_version=botocore.UNSIGNED)

        self.pubdata_bucket = "stpubdata"

        if verbose:
            log.info("Using the S3 STScI public dataset")
Beispiel #4
0
    def test_esasky_get_spectra_obs_id(self):
        download_directory = "ESASkyRemoteTest"
        if not os.path.exists(download_directory):
            os.makedirs(download_directory)

        missions = [
            "ISO-IR", "Chandra", "IUE", "XMM-NEWTON", "HST-IR", "Herschel",
            "HST-UV", "HST-OPTICAL"
        ]

        result = ESASkyClass.get_spectra(observation_ids=[
            "02101201", "1005", "LWR13178", "0001730201", "ibh706cqq",
            "1342253595", "z1ax0102t", "oeik2s020"
        ],
                                         missions=missions,
                                         download_dir=download_directory)

        for mission in missions:
            file_path = os.path.join(download_directory, mission)
            assert os.path.exists(file_path)
            log.info("Checking {} data.".format(mission))
            if mission.upper() == "HERSCHEL":
                assert (isinstance(
                    result[mission.upper()]["1342253595"]["WBS"]
                    ["WBS-V_USB_4b"], HDUList))
                assert (isinstance(
                    result[mission.upper()]["1342253595"]["HRS"]
                    ["HRS-H_LSB_4b"], HDUList))
            else:
                assert (isinstance(result[mission.upper()][0], HDUList))

        result = None

        shutil.rmtree(download_directory)
Beispiel #5
0
    def get_postcard(self,
                     observation_id,
                     calibration_level="RAW",
                     resolution=256,
                     filename=None,
                     verbose=False):
        """
        Download postcards from EHST

        Parameters
        ----------
        observation_id : string
            id of the observation for which download the postcard, mandatory
            The identifier of the observation we want to retrieve, regardless
            of whether it is simple or composite.
        calibration_level : string
            calibration level, optional, default 'RAW'
            The identifier of the data reduction/processing applied to the
            data. By default, the most scientifically relevant level will be
            chosen. RAW, CALIBRATED, PRODUCT or AUXILIARY
        resolution : integer
            postcard resolution, optional, default 256
            Resolution of the retrieved postcard. 256 or 1024
        filename : string
            file name to be used to store the postcard, optional, default None
            File name for the artifact
        verbose : bool
            optional, default 'False'
            Flag to display information about the process

        Returns
        -------
        None. It downloads the observation postcard indicated
        """

        params = {
            "RETRIEVAL_TYPE": "POSTCARD",
            "OBSERVATION_ID": observation_id,
            "CALIBRATION_LEVEL": calibration_level,
            "RESOLUTION": resolution
        }

        response = self._request('GET',
                                 self.data_url,
                                 save=True,
                                 cache=True,
                                 params=params)

        if filename is None:
            filename = observation_id

        if verbose:
            log.info(self.data_url + "&".join([
                "?RETRIEVAL_TYPE=POSTCARD", "OBSERVATION_ID=" +
                observation_id, "CALIBRATION_LEVEL=" +
                calibration_level, "RESOLUTION=" + str(resolution)
            ]))
            log.info(self.copying_string.format(filename))

        shutil.move(response, filename)
Beispiel #6
0
    def test_esasky_get_images_obs_id(self):
        download_directory = "ESASkyRemoteTest"
        if not os.path.exists(download_directory):
            os.makedirs(download_directory)

        missions = [
            "SUZAKU", "ISO-IR", "Chandra", "XMM-OM-OPTICAL", "XMM",
            "XMM-OM-UV", "HST-IR", "Herschel", "Spitzer", "HST-UV",
            "HST-OPTICAL", "INTEGRAL"
        ]

        result = ESASkyClass.get_images(observation_ids=[
            "100001010", "01500403", "21171", "0852000101", "0851180201",
            "0851180201", "n3tr01c3q", "1342247257", "30002561-25100",
            "hst_07553_3h_wfpc2_f160bw_pc", "ocli05leq", "88600210001"
        ],
                                        missions=missions,
                                        download_dir=download_directory)

        for mission in missions:
            file_path = os.path.join(download_directory, mission)
            assert os.path.exists(file_path)
            log.info("Checking {} data.".format(mission))
            if mission.upper() == "HERSCHEL":
                assert (isinstance(result[mission.upper()][0]["250"], HDUList))
                assert (isinstance(result[mission.upper()][0]["350"], HDUList))
                assert (isinstance(result[mission.upper()][0]["500"], HDUList))
            else:
                assert (isinstance(result[mission.upper()][0], HDUList))

        result = None

        shutil.rmtree(download_directory)
Beispiel #7
0
def make_finder_chart(target,
                      radius,
                      save_prefix,
                      service=SkyView.get_images,
                      service_kwargs={
                          'survey': ['2MASS-K'],
                          'pixels': 500
                      },
                      alma_kwargs={
                          'public': False,
                          'science': False
                      },
                      **kwargs):
    """
    Create a "finder chart" showing where ALMA has pointed in various bands,
    including different color coding for public/private data and each band.

    Contours are set at various integration times.

    Parameters
    ----------
    target : `astropy.coordinates` or str
        A legitimate target name
    radius : `~astropy.units.Quantity`
        A degree-equivalent radius.
    save_prefix : str
        The prefix for the output files.  Both .reg and .png files will be
        written.  The .reg files will have the band numbers and
        public/private appended, while the .png file will be named
        prefix_almafinderchart.png
    service : function
        The ``get_images`` function of an astroquery service, e.g. SkyView.
    service_kwargs : dict
        The keyword arguments to pass to the specified service.  For example,
        for SkyView, you can give it the survey ID (e.g., 2MASS-K) and the
        number of pixels in the resulting image.  See the documentation for the
        individual services for more details.
    alma_kwargs : dict
        Keywords to pass to the ALMA archive when querying.
    private_band_colors / public_band_colors : tuple
        A tuple or list of colors to be associated with private/public
        observations in the various bands
    integration_time_contour_levels : list or np.array
        The levels at which to draw contours in units of seconds.  Default is
        log-spaced (2^n) seconds: [  1.,   2.,   4.,   8.,  16.,  32.])
    """
    log.info("Querying {0} for images".format(service))
    images = service(target, radius=radius, **service_kwargs)

    image0_hdu = images[0][0]

    return make_finder_chart_from_image(image0_hdu,
                                        target=target,
                                        radius=radius,
                                        save_prefix=save_prefix,
                                        alma_kwargs=alma_kwargs,
                                        **kwargs)
Beispiel #8
0
 def wrapper(*args, **kwargs):
     ts = time.time()
     for i in range(num_tries):
         result = function(*args, **kwargs)
     te = time.time()
     tt = (te - ts) / num_tries
     if verbose:  # pragma: no cover
         log.info('{} took {} s on AVERAGE for {} call(s).'.format(
             function.__name__, tt, num_tries))
     return tt, result
Beispiel #9
0
 def _get_product_filename(self, product_type, filename):
     if (product_type == "PRODUCT"):
         return filename
     elif (product_type == "SCIENCE_PRODUCT"):
         log.info("This is a SCIENCE_PRODUCT, the filename will be "
                  "renamed to " + filename + ".fits.gz")
         return filename + ".fits.gz"
     else:
         log.info("This is a POSTCARD, the filename will be "
                  "renamed to " + filename + ".jpg")
         return filename + ".jpg"
Beispiel #10
0
    def login(self, token=None, store_token=False, reenter_token=False):
        """
        Log session into the MAST portal.

        Parameters
        ----------
        token : string, optional
            Default is None.
            The token to authenticate the user.
            This can be generated at
            https://auth.mast.stsci.edu/token?suggested_name=Astroquery&suggested_scope=mast:exclusive_access.
            If not supplied, it will be prompted for if not in the keyring or set via $MAST_API_TOKEN
        store_token : bool, optional
            Default False.
            If true, MAST token will be stored securely in your keyring.
        reenter_token :  bool, optional
            Default False.
            Asks for the token even if it is already stored in the keyring or $MAST_API_TOKEN environment variable.
            This is the way to overwrite an already stored password on the keyring.
        """

        if token is None and "MAST_API_TOKEN" in os.environ:
            token = os.environ["MAST_API_TOKEN"]

        if token is None:
            token = keyring.get_password("astroquery:mast.stsci.edu.token",
                                         "masttoken")

        if token is None or reenter_token:
            info_msg = "If you do not have an API token already, visit the following link to create one: "
            log.info(info_msg + self.AUTH_URL)
            token = getpass("Enter MAST API Token: ")

        # store token if desired
        if store_token:
            keyring.set_password("astroquery:mast.stsci.edu.token",
                                 "masttoken", token)

        self.session.headers["Accept"] = "application/json"
        self.session.cookies["mast_token"] = token
        info = self.session_info()

        if not info["anon"]:
            log.info("MAST API token accepted, welcome {}".format(
                info["attrib"].get("display_name")))
        else:
            warn_msg = (
                "MAST API token invalid!\n"
                "To make create a new API token visit to following link: " +
                self.AUTH_URL)
            warnings.warn(warn_msg, AuthenticationWarning)

        return not info["anon"]
Beispiel #11
0
    def query_target(self,
                     name,
                     filename=None,
                     output_format='votable',
                     verbose=False):
        """
        It executes a query over EHST and download the xml with the results.

        Parameters
        ----------
        name : string
            target name to be requested, mandatory
        filename : string
            file name to be used to store the metadata, optional, default None
        output_format : string
            optional, default 'votable'
            output format of the query
        verbose : bool
            optional, default 'False'
            Flag to display information about the process

        Returns
        -------
        Table with the result of the query. It downloads metadata as a file.
        """

        params = {
            "RESOURCE_CLASS": "OBSERVATION",
            "USERNAME": "******",
            "SELECTED_FIELDS": "OBSERVATION",
            "QUERY": "(TARGET.TARGET_NAME=='" + name + "')",
            "RETURN_TYPE": str(output_format)
        }
        response = self._request('GET',
                                 self.metadata_url,
                                 save=True,
                                 cache=True,
                                 params=params)

        if verbose:
            log.info(self.metadata_url + "?RESOURCE_CLASS=OBSERVATION&"
                     "SELECTED_FIELDS=OBSERVATION&QUERY=(TARGET.TARGET_NAME"
                     "=='" + name + "')&USERNAME=ehst-astroquery&"
                     "RETURN_TYPE=" + str(output_format))
            log.info(self.copying_string.format(filename))
        if filename is None:
            filename = "target.xml"

        shutil.move(response, filename)

        return modelutils.read_results_table_from_file(filename,
                                                       str(output_format))
Beispiel #12
0
    def download_product(self,
                         observation_id,
                         calibration_level="RAW",
                         filename=None,
                         verbose=False):
        """
        Download products from EHST

        Parameters
        ----------
        observation_id : string
            id of the observation to be downloaded, mandatory
            The identifier of the observation we want to retrieve, regardless
            of whether it is simple or composite.
        calibration_level : string
            calibration level, optional, default 'RAW'
            The identifier of the data reduction/processing applied to the
            data. By default, the most scientifically relevant level will be
            chosen. RAW, CALIBRATED, PRODUCT or AUXILIARY
        filename : string
            file name to be used to store the artifact, optional, default
            None
            File name for the observation.
        verbose : bool
            optional, default 'False'
            flag to display information about the process

        Returns
        -------
        None. It downloads the observation indicated
        """

        params = {
            "OBSERVATION_ID": observation_id,
            "CALIBRATION_LEVEL": calibration_level
        }

        if filename is None:
            filename = observation_id + ".tar"

        response = self._request('GET',
                                 self.data_url,
                                 save=True,
                                 cache=True,
                                 params=params)

        if verbose:
            log.info(self.data_url + "?OBSERVATION_ID=" + observation_id +
                     "&CALIBRATION_LEVEL=" + calibration_level)
            log.info(self.copying_string.format(filename))

        shutil.move(response, filename)
Beispiel #13
0
    def get_files_from_tarballs(self, downloaded_files, *, regex=r'.*\.fits$',
                                path='cache_path', verbose=True):
        """
        Given a list of successfully downloaded tarballs, extract files
        with names matching a specified regular expression.  The default
        is to extract all FITS files

        NOTE: alma now supports direct listing and downloads of tarballs. See
        ``get_data_info`` and ``download_and_extract_files``

        Parameters
        ----------
        downloaded_files : list
            A list of downloaded files.  These should be paths on your local
            machine.
        regex : str
            A valid regular expression
        path : 'cache_path' or str
            If 'cache_path', will use the astroquery.Alma cache directory
            (``Alma.cache_location``), otherwise will use the specified path.
            Note that the subdirectory structure of the tarball will be
            maintained.

        Returns
        -------
        filelist : list
            A list of the extracted file locations on disk
        """

        if path == 'cache_path':
            path = self.cache_location
        elif not os.path.isdir(path):
            raise OSError("Specified an invalid path {0}.".format(path))

        fitsre = re.compile(regex)

        filelist = []

        for fn in downloaded_files:
            tf = tarfile.open(fn)
            for member in tf.getmembers():
                if fitsre.match(member.name):
                    if verbose:
                        log.info("Extracting {0} to {1}".format(member.name,
                                                                path))
                    tf.extract(member, path)
                    filelist.append(os.path.join(path, member.name))

        return filelist
Beispiel #14
0
def make_finder_chart_from_image(image,
                                 target,
                                 radius,
                                 save_prefix,
                                 alma_kwargs={
                                     'public': False,
                                     'science': False,
                                     'cache': False,
                                 },
                                 **kwargs):
    """
    Create a "finder chart" showing where ALMA has pointed in various bands,
    including different color coding for public/private data and each band.

    Contours are set at various integration times.

    Parameters
    ----------
    image : fits.PrimaryHDU or fits.ImageHDU object
        The image to overlay onto
    target : `astropy.coordinates` or str
        A legitimate target name
    radius : `astropy.units.Quantity`
        A degree-equivalent radius
    save_prefix : str
        The prefix for the output files.  Both .reg and .png files will be
        written.  The .reg files will have the band numbers and
        public/private appended, while the .png file will be named
        prefix_almafinderchart.png
    alma_kwargs : dict
        Keywords to pass to the ALMA archive when querying.
    private_band_colors / public_band_colors : tuple
        A tuple or list of colors to be associated with private/public
        observations in the various bands
    integration_time_contour_levels : list or np.array
        The levels at which to draw contours in units of seconds.  Default is
        log-spaced (2^n) seconds: [  1.,   2.,   4.,   8.,  16.,  32.])
    """
    log.info("Querying ALMA around {0}".format(target))
    catalog = Alma.query_region(coordinate=target,
                                radius=radius,
                                get_html_version=True,
                                **alma_kwargs)

    return make_finder_chart_from_image_and_catalog(image,
                                                    catalog=catalog,
                                                    save_prefix=save_prefix,
                                                    **kwargs)
Beispiel #15
0
 def _parse_result(self, response, verbose=False):
     # if verbose is False then suppress any VOTable related warnings
     if not verbose:
         commons.suppress_vo_warnings()
     # try to parse the result into an astropy.Table, else
     # return the raw result with an informative error message.
     try:
         # do something with regex to get the result into
         # astropy.Table form. return the Table.
         data = BytesIO(response.content)
         table = Table.read(data)
         return table
     except ValueError as e:
         # catch common errors here, but never use bare excepts
         # return raw result/ handle in some way
         log.info("Failed to convert query result to table", e)
         return response
Beispiel #16
0
    def get_postcard(self, tdt, *, filename=None, verbose=False):
        """
        Download postcards from ISO Data Archive

        Parameters
        ----------
        tdt : string
            id of the observation for which download the postcard, mandatory
            The identifier of the observation we want to retrieve, regardless
            of whether it is simple or composite.
        filename : string
            file name to be used to store the postcard, optional, default None
        verbose : bool
            optional, default 'False'
            Flag to display information about the process

        Returns
        -------
        File name to be used to store the postcard
        """

        link = self.get_postcard_link(tdt, filename, verbose)

        local_filepath = self._request('GET', link, cache=True, save=True)

        if filename is None:

            response = self._request('HEAD', link)
            response.raise_for_status()

            filename = re.findall('filename="(.+)"',
                                  response.headers["Content-Disposition"])[0]
        else:

            filename = filename + ".png"

        if verbose:
            log.info("Copying file to {0}...".format(filename))

        shutil.move(local_filepath, filename)

        if verbose:
            log.info("Wrote {0} to {1}".format(link, filename))

        return filename
Beispiel #17
0
    def _format_output(self, raw_output):
        if self.FORMAT == 'csv':

            fixed_raw_output = re.sub('<[^<]+?>', '', raw_output)
            split_output = fixed_raw_output.splitlines()

            # Remove any HTML tags

            columns = list(
                csv.reader([split_output[0]], delimiter=',', quotechar='"'))[0]
            rows = split_output[1:]

            # Quick test to see if API returned a valid csv file
            # If not, try to return JSON-compliant dictionary.
            test_row = list(csv.reader([rows[0]], delimiter=',',
                                       quotechar='"'))[0]

            if (len(columns) != len(test_row)):
                log.info("The API did not return a valid CSV output! \n"
                         "Outputing JSON-compliant dictionary instead.")

                output = json.loads(raw_output)
                return output

            # Initialize and populate dictionary
            output_dict = {key: [] for key in columns}

            for row in rows:

                split_row = list(
                    csv.reader([row], delimiter=',', quotechar='"'))[0]

                for ct, key in enumerate(columns):
                    output_dict[key].append(split_row[ct])

            # Convert dictionary to Astropy Table.
            output = Table(output_dict, names=columns)

        else:
            # Server response is JSON compliant. Simply
            # convert from raw text to dictionary.
            output = json.loads(raw_output)

        return output
Beispiel #18
0
    def logout(self, verbose=False):
        """Performs a logout

        Parameters
        ----------
        verbose : bool, optional, default 'False'
            flag to display information about the process
        """
        try:
            TapPlus.logout(self, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging out TAP server")
            return
        log.info("Gaia TAP server logout OK")
        try:
            TapPlus.logout(self.__gaiadata, verbose=verbose)
            log.info("Gaia data server logout OK")
        except HTTPError as err:
            log.error("Error logging out data server")
Beispiel #19
0
def species_lookuptable(cache=True):
    """
    Get a lookuptable from chemical name + OrdinaryStructuralFormula to VAMDC
    id
    """

    if not os.path.exists(Conf.cache_location):
        os.makedirs(Conf.cache_location)

    lut_path = os.path.join(Conf.cache_location, 'species_lookuptable.json')
    if os.path.exists(lut_path) and cache:
        log.info("Loading cached molecular line ID database")
        with open(lut_path, 'r') as fh:
            lutdict = json.load(fh)
        lookuptable = SpeciesLookuptable(lutdict)
    else:
        log.info("Loading molecular line ID database")

        from vamdclib import nodes as vnodes
        from vamdclib import request as vrequest

        nl = vnodes.Nodelist()
        nl.findnode('cdms')
        cdms = nl.findnode('cdms')

        request = vrequest.Request(node=cdms)

        # Retrieve all species from CDMS
        result = request.getspecies()
        molecules = result.data['Molecules']

        lutdict = {
            "{0} {1}".format(molecules[key].ChemicalName,
                             molecules[key].OrdinaryStructuralFormula):
            molecules[key].VAMDCSpeciesID
            for key in molecules
        }
        lookuptable = SpeciesLookuptable(lutdict)
        if cache:
            with open(lut_path, 'w') as fh:
                json.dump(lookuptable, fh)

    return lookuptable
Beispiel #20
0
    def get_download_link(self, tdt, retrieval_type, filename, verbose,
                          **kwargs):
        """
        Get download link for ISO

        Parameters
        ----------
        tdt : string
          id of the Target Dedicated Time (observation identifier) to be
          downloaded, mandatory
          The identifier of the observation we want to retrieve, 8 digits
          example: 40001501
        product_level : string
            level to download, optional, by default everything is selected
            values: DEFAULT_DATA_SET, FULLY_PROC, RAW_DATA, BASIC_SCIENCE,
            QUICK_LOOK, DEFAULT_DATA_SET, HPDP, ALL
        retrieval_type : string
            type of retrieval: OBSERVATION for full observation or STANDALONE
            for single files
        filename : string
            file name to be used to store the file
        verbose : bool
            optional, default 'False'
            flag to display information about the process

        Returns
        -------
        None if not verbose. It downloads the observation indicated
        If verbose returns the filename
        """

        link = self.data_url
        link = link + "retrieval_type=" + retrieval_type
        link = link + "&DATA_RETRIEVAL_ORIGIN=astroquery"
        link = link + "&tdt=" + tdt

        link = link + "".join("&{0}={1}".format(key, val)
                              for key, val in kwargs.items())

        if verbose:
            log.info(link)

        return link
Beispiel #21
0
    def login(self,
              user=None,
              password=None,
              credentials_file=None,
              verbose=False):
        """Performs a login.
        User and password arguments can be used or a file that contains
        username and password
        (2 lines: one for username and the following one for the password).
        If no arguments are provided, a prompt asking for username and
        password will appear.

        Parameters
        ----------
        user : str, default None
            login name
        password : str, default None
            user password
        credentials_file : str, default None
            file containing user and password in two lines
        verbose : bool, optional, default 'False'
            flag to display information about the process
        """
        try:
            log.info("Login to gaia TAP server")
            TapPlus.login(self,
                          user=user,
                          password=password,
                          credentials_file=credentials_file,
                          verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in TAP server")
            return
        u = self._TapPlus__user
        p = self._TapPlus__pwd
        try:
            log.info("Login to gaia data server")
            TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in data server")
            log.error("Logging out from TAP server")
            TapPlus.logout(self, verbose=verbose)
Beispiel #22
0
    def _run_job(self, job_location, verbose, poll_interval=20):
        """
        Start an async job (e.g. TAP or SODA) and wait for it to be completed.

        Parameters
        ----------
        job_location: str
            The url to query the job status and details
        verbose: bool
            Should progress be logged periodically
        poll_interval: int, optional
            The number of seconds to wait between checks on the status of the job.

        Returns
        -------
        The single word final status of the job. Normally COMPLETED or ERROR
        """
        # Start the async job
        if verbose:
            log.info("Starting the retrieval job...")
        self._request('POST',
                      job_location + "/phase",
                      data={'phase': 'RUN'},
                      cache=False)

        # Poll until the async job has finished
        prev_status = None
        count = 0
        job_details = self._get_job_details_xml(job_location)
        status = self._read_job_status(job_details, verbose)
        while status == 'EXECUTING' or status == 'QUEUED' or status == 'PENDING':
            count += 1
            if verbose and (status != prev_status or count > 10):
                log.info("Job is %s, polling every %d seconds." %
                         (status, poll_interval))
                count = 0
                prev_status = status
            time.sleep(poll_interval)
            job_details = self._get_job_details_xml(job_location)
            status = self._read_job_status(job_details, verbose)
        return status
Beispiel #23
0
    def get_postcard_link(self, tdt, filename=None, verbose=False):
        """
        Get postcard link for ISO

        Parameters
        ----------
        tdt : string
          id of the Target Dedicated Time (observation identifier) to be
          downloaded, mandatory
          The identifier of the observation we want to retrieve, 8 digits
          example: 40001501
        product_level : string
            level to download, optional, by default everything is selected
            values: DEFAULT_DATA_SET, FULLY_PROC, RAW_DATA, BASIC_SCIENCE,
            QUICK_LOOK, DEFAULT_DATA_SET, HPDP, ALL
        retrieval_type : string
            type of retrieval: OBSERVATION for full observation or STANDALONE
            for single files
        filename : string
            file name to be used to store the file
        verbose : bool
            optional, default 'False'
            flag to display information about the process

        Returns
        -------
        The postcard filename
        """

        link = self.data_url
        link = link + "retrieval_type=POSTCARD"
        link = link + "&DATA_RETRIEVAL_ORIGIN=astroquery"
        link = link + "&tdt=" + tdt

        if verbose:
            log.info(link)

        return link
Beispiel #24
0
    def login_gui(self, verbose=False):
        """Performs a login using a GUI dialog

        Parameters
        ----------
        verbose : bool, optional, default 'False'
            flag to display information about the process
        """
        try:
            log.info("Login to gaia TAP server")
            TapPlus.login_gui(self, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in TAP server")
            return
        u = self._TapPlus__user
        p = self._TapPlus__pwd
        try:
            log.info("Login to gaia data server")
            TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in data server")
            log.error("Logging out from TAP server")
            TapPlus.logout(self, verbose=verbose)
Beispiel #25
0
    def get_postcard(self, observation_id, *, image_type="OBS_EPIC",
                     filename=None, verbose=False):
        """
        Download postcards from XSA

        Parameters
        ----------
        observation_id : string
            id of the observation for which download the postcard, mandatory
            The identifier of the observation we want to retrieve, regardless
            of whether it is simple or composite.
        image_type : string
            image type, optional, default 'OBS_EPIC'
            The image_type to be returned. It can be: OBS_EPIC,
            OBS_RGS_FLUXED, OBS_RGS_FLUXED_2, OBS_RGS_FLUXED_3, OBS_EPIC_MT,
            OBS_RGS_FLUXED_MT, OBS_OM_V, OBS_OM_B, OBS_OM_U, OBS_OM_L,
            OBS_OM_M, OBS_OM_S, OBS_OM_W
        filename : string
            file name to be used to store the postcard, optional, default None
        verbose : bool
            optional, default 'False'
            Flag to display information about the process

        Returns
        -------
        None. It downloads the observation postcard indicated
        """

        params = {'RETRIEVAL_TYPE': 'POSTCARD',
                  'OBSERVATION_ID': observation_id,
                  'OBS_IMAGE_TYPE': image_type,
                  'PROTOCOL': 'HTTP'}

        link = self.data_url + "".join("&{0}={1}".format(key, val)
                                       for key, val in params.items())

        if verbose:
            log.info(link)

        local_filepath = self._request('GET', link, params, cache=True, save=True)

        if filename is None:
            response = self._request('HEAD', link)
            response.raise_for_status()
            filename = re.findall('filename="(.+)"', response.headers[
                "Content-Disposition"])[0]
        else:
            filename = observation_id + ".png"

        log.info(f"Copying file to {filename}...")

        shutil.move(local_filepath, filename)

        if verbose:
            log.info(f"Wrote {link} to {filename}")

        return filename
Beispiel #26
0
    def _read_job_status(self, job_details_xml, verbose):
        """
        Read job status from the job details XML

        Parameters
        ----------
        job_details_xml: `xml.etree.ElementTree`
            The SODA job details
        verbose: bool
            Should additional information be logged for errors

        Returns
        -------
        The single word status of the job. e.g. COMPLETED, EXECUTING, ERROR
        """
        status_node = job_details_xml.find(
            "{http://www.ivoa.net/xml/UWS/v1.0}phase")
        if status_node is None:
            if verbose:
                log.info("Unable to find status in status xml:")
                ElementTree.dump(job_details_xml)
            raise ValueError('Invalid job status xml received.')
        status = status_node.text
        return status
Beispiel #27
0
    def _check_existing_files(self,
                              datasets,
                              continuation=False,
                              destination=None):
        """Detect already downloaded datasets."""

        datasets_to_download = []
        files = []

        for dataset in datasets:
            ext = os.path.splitext(dataset)[1].lower()
            if ext in ('.fits', '.tar'):
                local_filename = dataset
            elif ext == '.fz':
                local_filename = dataset[:-3]
            elif ext == '.z':
                local_filename = dataset[:-2]
            else:
                local_filename = dataset + ".fits"

            if destination is not None:
                local_filename = os.path.join(destination, local_filename)
            elif self.cache_location is not None:
                local_filename = os.path.join(self.cache_location,
                                              local_filename)
            if os.path.exists(local_filename):
                log.info("Found {0}.fits...".format(dataset))
                if continuation:
                    datasets_to_download.append(dataset)
                else:
                    files.append(local_filename)
            elif os.path.exists(local_filename + ".Z"):
                log.info("Found {0}.fits.Z...".format(dataset))
                if continuation:
                    datasets_to_download.append(dataset)
                else:
                    files.append(local_filename + ".Z")
            elif os.path.exists(local_filename + ".fz"):  # RICE-compressed
                log.info("Found {0}.fits.fz...".format(dataset))
                if continuation:
                    datasets_to_download.append(dataset)
                else:
                    files.append(local_filename + ".fz")
            else:
                datasets_to_download.append(dataset)

        return datasets_to_download, files
Beispiel #28
0
    def _print_query_help(self, url, cache=True):
        """
        Download a form and print it in a quasi-human-readable way
        """
        log.info("List of accepted column_filters parameters.")
        log.info("The presence of a column in the result table can be "
                 "controlled if prefixed with a [ ] checkbox.")
        log.info("The default columns in the result table are shown as "
                 "already ticked: [x].")

        result_string = []

        resp = self._request("GET", url, cache=cache)
        doc = BeautifulSoup(resp.content, 'html5lib')
        form = doc.select("html body form pre")[0]
        # Unwrap all paragraphs
        paragraph = form.find('p')
        while paragraph:
            paragraph.unwrap()
            paragraph = form.find('p')
        # For all sections
        for section in form.select("table"):
            section_title = "".join(section.stripped_strings)
            section_title = "\n".join(["", section_title,
                                       "-" * len(section_title)])
            result_string.append(section_title)
            checkbox_name = ""
            checkbox_value = ""
            for tag in section.next_siblings:
                if tag.name == u"table":
                    break
                elif tag.name == u"input":
                    if tag.get(u'type') == u"checkbox":
                        checkbox_name = tag['name']
                        checkbox_value = u"[x]" if ('checked' in tag.attrs) else u"[ ]"
                        name = ""
                        value = ""
                    else:
                        name = tag['name']
                        value = ""
                elif tag.name == u"select":
                    options = []
                    for option in tag.select("option"):
                        options += ["{0} ({1})"
                                    .format(option['value'],
                                            "".join(option.stripped_strings))]
                    name = tag[u"name"]
                    value = ", ".join(options)
                else:
                    name = ""
                    value = ""
                if u"tab_" + name == checkbox_name:
                    checkbox = checkbox_value
                else:
                    checkbox = "   "
                if name != u"":
                    result_string.append("{0} {1}: {2}"
                                         .format(checkbox, name, value))

        print("\n".join(result_string))
        return result_string
Beispiel #29
0
    def retrieve_data(self, datasets, continuation=False, destination=None,
                      with_calib='none', request_all_objects=False,
                      unzip=True, request_id=None):
        """
        Retrieve a list of datasets form the ESO archive.

        Parameters
        ----------
        datasets : list of strings or string
            List of datasets strings to retrieve from the archive.
        destination: string
            Directory where the files are copied.
            Files already found in the destination directory are skipped,
            unless continuation=True.
            Default to astropy cache.
        continuation : bool
            Force the retrieval of data that are present in the destination
            directory.
        with_calib : string
            Retrieve associated calibration files: 'none' (default), 'raw' for
            raw calibrations, or 'processed' for processed calibrations.
        request_all_objects : bool
            When retrieving associated calibrations (``with_calib != 'none'``),
            this allows to request all the objects included the already
            downloaded ones, to be sure to retrieve all calibration files.
            This is useful when the download was interrupted. `False` by
            default.
        unzip : bool
            Unzip compressed files from the archive after download. `True` by
            default.
        request_id : str, int
            Retrieve from an existing request number rather than sending a new
            query, with the identifier from the URL in the email sent from
            the archive from the earlier request as in:

                https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id]

        Returns
        -------
        files : list of strings or string
            List of files that have been locally downloaded from the archive.

        Examples
        --------
        >>> dptbl = Eso.query_instrument('apex', pi_coi='ginsburg')
        >>> dpids = [row['DP.ID'] for row in dptbl if 'Map' in row['Object']]
        >>> files = Eso.retrieve_data(dpids)

        """
        calib_options = {'none': '', 'raw': 'CalSelectorRaw2Raw',
                         'processed': 'CalSelectorRaw2Master'}

        if with_calib not in calib_options:
            raise ValueError("invalid value for 'with_calib', "
                             "it must be 'none', 'raw' or 'processed'")

        if isinstance(datasets, str):
            return_list = False
            datasets = [datasets]
        else:
            return_list = True
        if not isinstance(datasets, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        # First: Detect datasets already downloaded
        if with_calib != 'none' and request_all_objects:
            datasets_to_download, files = list(datasets), []
        else:
            log.info("Detecting already downloaded datasets...")
            datasets_to_download, files = self._check_existing_files(
                datasets, continuation=continuation, destination=destination)

        # Second: Check that the datasets to download are in the archive
        if request_id is None:
            log.info("Checking availability of datasets to download...")
            valid_datasets = [self.verify_data_exists(ds)
                          for ds in datasets_to_download]
        else:
            # Assume all valid if a request_id was provided
            valid_datasets = [(ds, True) for ds in datasets_to_download]

        if not all(valid_datasets):
            invalid_datasets = [ds for ds, v in zip(datasets_to_download,
                                                    valid_datasets) if not v]
            raise ValueError("The following data sets were not found on the "
                             "ESO servers: {0}".format(invalid_datasets))

        # Third: Download the other datasets
        log.info("Downloading datasets...")
        if datasets_to_download:
            if not self.authenticated():
                self.login()
            url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html"
            with suspend_cache(self):  # Never cache staging operations
                if request_id is None:
                    log.info("Contacting retrieval server...")
                    retrieve_data_form = self._request("GET", url,
                                                        cache=False)
                    retrieve_data_form.raise_for_status()
                    log.info("Staging request...")
                    inputs = {"list_of_datasets": "\n".join(datasets_to_download)}
                    data_confirmation_form = self._activate_form(
                        retrieve_data_form, form_index=-1, inputs=inputs,
                        cache=False)

                    data_confirmation_form.raise_for_status()

                    root = BeautifulSoup(data_confirmation_form.content,
                                         'html5lib')
                    login_button = root.select('input[value=LOGIN]')
                    if login_button:
                        raise LoginError("Not logged in. "
                                    "You must be logged in to download data.")
                    inputs = {}
                    if with_calib != 'none':
                        inputs['requestCommand'] = calib_options[with_calib]

                    # TODO: There may be another screen for Not Authorized;
                    # that should be included too
                    # form name is "retrieve"; no id
                    data_download_form = self._activate_form(
                        data_confirmation_form, form_index=-1, inputs=inputs,
                        cache=False)
                else:
                    # Build URL by hand
                    request_url = 'https://dataportal.eso.org/rh/requests/'
                    request_url += f'{self.USERNAME}/{request_id}'
                    data_download_form = self._request("GET", request_url,
                                                       cache=False)

                    _content = data_download_form.content.decode('utf-8')
                    if ('Request Handler - Error' in _content):
                        # Likely a problem with the request_url
                        msg = (f"The form at {request_url} returned an error."
                                " See your recent requests at "
                                "https://dataportal.eso.org/rh/requests/"
                                f"{self.USERNAME}/recentRequests")

                        raise RemoteServiceError(msg)

                log.info("Staging form is at {0}"
                         .format(data_download_form.url))
                root = BeautifulSoup(data_download_form.content, 'html5lib')
                state = root.select('span[id=requestState]')[0].text
                t0 = time.time()
                while state not in ('COMPLETE', 'ERROR'):
                    time.sleep(2.0)
                    data_download_form = self._request("GET",
                                                       data_download_form.url,
                                                       cache=False)
                    root = BeautifulSoup(data_download_form.content,
                                         'html5lib')
                    state = root.select('span[id=requestState]')[0].text
                    print("{0:20.0f}s elapsed"
                          .format(time.time() - t0), end='\r')
                    sys.stdout.flush()
                if state == 'ERROR':
                    raise RemoteServiceError("There was a remote service "
                                             "error; perhaps the requested "
                                             "file could not be found?")

            if with_calib != 'none':
                # when requested files with calibrations, some javascript is
                # used to display the files, which prevent retrieving the files
                # directly. So instead we retrieve the download script provided
                # in the web page, and use it to extract the list of files.
                # The benefit of this is also that in the download script the
                # list of files is de-duplicated, whereas on the web page the
                # calibration files would be duplicated for each exposure.
                link = root.select('a[href$="/script"]')[0]
                if 'downloadRequest' not in link.text:
                    # Make sure that we found the correct link
                    raise RemoteServiceError(
                        "A link was found in the download file for the "
                        "calibrations that is not a downloadRequest link "
                        "and therefore appears invalid.")

                href = link.attrs['href']
                script = self._request("GET", href, cache=False)
                fileLinks = re.findall(
                    r'"(https://dataportal\.eso\.org/dataPortal/api/requests/.*)"',
                    script.text)

                # urls with api/ require using Basic Authentication, though
                # it's easier for us to reuse the existing requests session (to
                # avoid asking agin for a username/password if it is not
                # stored). So we remove api/ from the urls:
                fileLinks = [
                    f.replace('https://dataportal.eso.org/dataPortal/api/requests',
                              'https://dataportal.eso.org/dataPortal/requests')
                    for f in fileLinks]

                log.info("Detecting already downloaded datasets, "
                         "including calibrations...")
                fileIds = [f.rsplit('/', maxsplit=1)[1] for f in fileLinks]
                filteredIds, files = self._check_existing_files(
                    fileIds, continuation=continuation,
                    destination=destination)

                fileLinks = [f for f, fileId in zip(fileLinks, fileIds)
                             if fileId in filteredIds]
            else:
                fileIds = root.select('input[name=fileId]')
                fileLinks = ["http://dataportal.eso.org/dataPortal" +
                             fileId.attrs['value'].split()[1]
                             for fileId in fileIds]

            nfiles = len(fileLinks)
            log.info("Downloading {} files...".format(nfiles))
            log.debug("Files:\n{}".format('\n'.join(fileLinks)))
            for i, fileLink in enumerate(fileLinks, 1):
                fileId = fileLink.rsplit('/', maxsplit=1)[1]

                if request_id is not None:
                    # Since we fetched the script directly without sending
                    # a new request, check here that the file in the list
                    # is among those requested in the input list
                    if fileId.split('.fits')[0] not in datasets_to_download:
                        continue

                log.info("Downloading file {}/{}: {}..."
                         .format(i, nfiles, fileId))
                filename = self._request("GET", fileLink, save=True,
                                         continuation=True)

                if filename.endswith(('.gz', '.7z', '.bz2', '.xz', '.Z')) and unzip:
                    log.info("Unzipping file {0}...".format(fileId))
                    filename = system_tools.gunzip(filename)

                if destination is not None:
                    log.info("Copying file {0} to {1}...".format(fileId, destination))
                    destfile = os.path.join(destination, os.path.basename(filename))
                    shutil.move(filename, destfile)
                    files.append(destfile)
                else:
                    files.append(filename)

        # Empty the redirect cache of this request session
        # Only available and needed for requests versions < 2.17
        try:
            self._session.redirect_cache.clear()
        except AttributeError:
            pass
        log.info("Done!")
        if (not return_list) and (len(files) == 1):
            files = files[0]
        return files
Beispiel #30
0
    def _login(self, username=None, store_password=False,
               reenter_password=False):
        """
        Login to the ESO User Portal.

        Parameters
        ----------
        username : str, optional
            Username to the ESO Public Portal. If not given, it should be
            specified in the config file.
        store_password : bool, optional
            Stores the password securely in your keyring. Default is False.
        reenter_password : bool, optional
            Asks for the password even if it is already stored in the
            keyring. This is the way to overwrite an already stored passwork
            on the keyring. Default is False.
        """
        if username is None:
            if self.USERNAME != "":
                username = self.USERNAME
            elif self.username is not None:
                username = self.username
            else:
                raise LoginError("If you do not pass a username to login(), "
                                 "you should configure a default one!")
        else:
            # store username as we may need it to re-authenticate
            self.username = username

        # Get password from keyring or prompt
        password, password_from_keyring = self._get_password(
            "astroquery:www.eso.org", username, reenter=reenter_password)

        # Authenticate
        log.info("Authenticating {0} on www.eso.org...".format(username))

        # Do not cache pieces of the login process
        login_response = self._request("GET", "https://www.eso.org/sso/login",
                                       cache=False)
        root = BeautifulSoup(login_response.content, 'html5lib')
        login_input = root.find(name='input', attrs={'name': 'execution'})
        if login_input is None:
            raise ValueError("ESO login page did not have the correct attributes.")
        execution = login_input.get('value')

        login_result_response = self._request("POST", "https://www.eso.org/sso/login",
                                              data={'username': username,
                                                    'password': password,
                                                    'execution': execution,
                                                    '_eventId': 'submit',
                                                    'geolocation': '',
                                                    })
        login_result_response.raise_for_status()
        root = BeautifulSoup(login_result_response.content, 'html5lib')
        authenticated = root.find('h4').text == 'Login successful'

        if authenticated:
            log.info("Authentication successful!")
        else:
            log.exception("Authentication failed!")

        # When authenticated, save password in keyring if needed
        if authenticated and password_from_keyring is None and store_password:
            keyring.set_password("astroquery:www.eso.org", username, password)
        return authenticated