Beispiel #1
0
    def logout(self, verbose=False):
        """Performs a logout

        Parameters
        ----------
        verbose : bool, optional, default 'False'
            flag to display information about the process
        """
        try:
            TapPlus.logout(self, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging out TAP server")
            return
        log.info("Gaia TAP server logout OK")
        try:
            TapPlus.logout(self.__gaiadata, verbose=verbose)
            log.info("Gaia data server logout OK")
        except HTTPError as err:
            log.error("Error logging out data server")
Beispiel #2
0
    def _login(self, username, password):
        """
        Login to the Gemini Archive website.

        This method will authenticate the session as a particular user.  This may give you access
        to additional information or access based on your credentials

        Parameters
        ----------
        username : str
            The username to login as
        password : str
            The password for the given account
        """
        params = dict(username=username, password=password)
        r = self._session.request('POST', 'https://archive.gemini.edu/login/', params=params)
        if b'<P>Welcome, you are sucessfully logged in' not in r.content:
            log.error('Unable to login, please check your credentials')
            return False
        return True
Beispiel #3
0
    def login(self,
              user=None,
              password=None,
              credentials_file=None,
              verbose=False):
        """Performs a login.
        User and password arguments can be used or a file that contains
        username and password
        (2 lines: one for username and the following one for the password).
        If no arguments are provided, a prompt asking for username and
        password will appear.

        Parameters
        ----------
        user : str, default None
            login name
        password : str, default None
            user password
        credentials_file : str, default None
            file containing user and password in two lines
        verbose : bool, optional, default 'False'
            flag to display information about the process
        """
        try:
            log.info("Login to gaia TAP server")
            TapPlus.login(self,
                          user=user,
                          password=password,
                          credentials_file=credentials_file,
                          verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in TAP server")
            return
        u = self._TapPlus__user
        p = self._TapPlus__pwd
        try:
            log.info("Login to gaia data server")
            TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in data server")
            log.error("Logging out from TAP server")
            TapPlus.logout(self, verbose=verbose)
Beispiel #4
0
    def login_gui(self, verbose=False):
        """Performs a login using a GUI dialog

        Parameters
        ----------
        verbose : bool, optional, default 'False'
            flag to display information about the process
        """
        try:
            log.info("Login to gaia TAP server")
            TapPlus.login_gui(self, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in TAP server")
            return
        u = self._TapPlus__user
        p = self._TapPlus__pwd
        try:
            log.info("Login to gaia data server")
            TapPlus.login(self.__gaiadata, user=u, password=p, verbose=verbose)
        except HTTPError as err:
            log.error("Error logging in data server")
            log.error("Logging out from TAP server")
            TapPlus.logout(self, verbose=verbose)
Beispiel #5
0
 def api_key(self):
     """ Return the Astrometry.net API key. """
     if not conf.api_key:
         log.error("Astrometry.net API key not in configuration file")
     return conf.api_key
Beispiel #6
0
    def get_epic_lightcurve(self, filename, source_number, *,
                            instrument=[], path=""):
        """Extracts the EPIC sources light curve products from a given TAR file

        For a given TAR file obtained with ``XMMNewton.download_data``.

        This function extracts the EPIC sources light curve products in a given
        instrument (or instruments) from said TAR file

        The result is a dictionary containing the paths to the extracted EPIC
        sources light curve products with the key being the instrument

        If the instrument is not specified, this function will
        return all available instruments

        Parameters
        ----------
        filename : string, mandatory
            The name of the tarfile to be proccessed
        source_number : integer, mandatory
            The source number, in decimal, in the observation
        instruments : array of strings, optional, default []
            An array of strings indicating the desired instruments
        path: string, optional
            If set, extracts the EPIC images in the indicated path

        Returns
        -------
        A dictionary with the full paths of the extracted EPIC sources
        light curve products. The key is the instrument

        Notes
        -----
        The filenames will contain the source number in hexadecimal,
        as this is the convention used by the pipeline.

        The structure and the content of the extracted compressed FITS files
        are described in details in the Pipeline Products Description
        [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf).

        """
        _instrumnet = ["M1", "M2", "PN", "EP"]
        _band = [8]
        _product_type = ["SRCTSR", "FBKTSR"]
        _path = ""

        ret = None

        if instrument == []:
            instrument = _instrumnet
        else:
            for inst in instrument:
                if inst not in _instrumnet:
                    log.warning("Invalid instrument %s" % inst)
                    instrument.remove(inst)

        if path != "" and os.path.exists(path):
            _path = path

        try:
            with tarfile.open(filename, "r") as tar:
                ret = {}
                for member in tar.getmembers():
                    paths = os.path.split(member.name)
                    fname = paths[1]
                    paths = os.path.split(paths[0])
                    if paths[1] != "pps":
                        continue
                    fname_info = self._parse_filename(fname)
                    if fname_info["X"] != "P":
                        continue
                    if not fname_info["I"] in instrument:
                        continue
                    if not int(fname_info["S"]) in _band:
                        continue
                    if not fname_info["T"] in _product_type:
                        continue
                    if int(fname_info["X-"], 16) != source_number:
                        continue
                    tar.extract(member, _path)
                    key = fname_info["I"]
                    path_inst_name = os.path.abspath(os.path.join(_path, member.name))
                    if fname_info["T"] == "FBKTSR":
                        key = fname_info["I"] + "_bkg"
                    if ret.get(key) and type(ret.get(key)) == str:
                        log.warning("More than one file found with the "
                                    "instrument: %s" % key)
                        ret[key] = [ret[key], path_inst_name]
                    elif ret.get(key) and type(ret.get(key)) == list:
                        ret[key].append(path_inst_name)
                    else:
                        ret[key] = path_inst_name

        except FileNotFoundError:
            log.error("File %s not found" % (filename))
            return None

        if ret is None or ret == {}:
            log.info("Nothing to extract with the given parameters:\n"
                     "  PPS: %s\n"
                     "  Source Number: %u\n"
                     "  Instrument: %s\n" % (filename, source_number,
                                             instrument))

        return ret
Beispiel #7
0
    def get_epic_images(self, filename, band=[], instrument=[],
                        get_detmask=False, get_exposure_map=False, path="", **kwargs):

        """Extracts the EPIC images from a given TAR file

        This function extracts the EPIC images in a given band (or bands) and
        instrument (or instruments) from it

        The result is a dictionary containing the paths to the extracted EPIC
        images with keys being the band and the instrument

        If the band or the instrument are not specified this function will
        return all the available bands and instruments

        Additionally, ``get_detmask`` and ``get_exposure_map`` can be set to True.
        If so, this function will also extract the exposure maps and detector
        masks within the specified bands and instruments

        Parameters
        ----------
        filename : string, mandatory
            The name of the tarfile to be proccessed
        band : array of integers, optional, default []
            An array of intergers indicating the desired bands
        instruments : array of strings, optional, default []
            An array of strings indicating the desired instruments
        get_detmask : bool, optional
            If True, also extracts the detector masks
        get_exposure_map : bool, optional
            If True, also extracts the exposure maps
        path: string, optional
            If set, extracts the EPIC images in the indicated path

        Returns
        -------
        A dictionary of dictionaries with the full paths of the extracted
        EPIC images. The keys of each dictionary are the band for the first
        level dictionary and the instrument for the second level dictionaries

        Notes
        -----
        The structure and the content of the extracted compressed FITS files
        are described in details in the Pipeline Products Description
        [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf).

        """

        _product_type = ["IMAGE_"]
        _instrument = ["M1", "M2", "PN", "EP"]
        _band = [1, 2, 3, 4, 5, 8]
        _path = ""
        if get_detmask:
            _product_type.append("DETMSK")
        if get_exposure_map:
            _product_type.append("EXPMAP")
        if path != "" and os.path.exists(path):
            _path = path

        ret = None
        if band == []:
            band = _band
        else:
            for b in band:
                if b not in _band:
                    log.warning("Invalid band %u" % b)
                    band.remove(b)

        if instrument == []:
            instrument = _instrument
        else:
            for inst in instrument:
                if inst not in _instrument:
                    log.warning("Invalid instrument %s" % inst)
                    instrument.remove(inst)
        try:
            with tarfile.open(filename, "r") as tar:
                ret = {}
                for member in tar.getmembers():
                    paths = os.path.split(member.name)
                    fname = paths[1]
                    paths = os.path.split(paths[0])
                    if paths[1] != "pps":
                        continue
                    fname_info = self._parse_filename(fname)
                    if fname_info["X"] != "P":
                        continue
                    if not fname_info["I"] in instrument:
                        continue
                    if not int(fname_info["S"]) in band:
                        continue
                    if not fname_info["T"] in _product_type:
                        continue
                    tar.extract(member, _path)
                    if not ret.get(int(fname_info["S"])):
                        ret[int(fname_info["S"])] = {}
                    b = int(fname_info["S"])
                    ins = fname_info["I"]
                    path_member_name = os.path.abspath(os.path.join(_path, member.name))
                    if fname_info["T"] == "DETMSK":
                        ins = fname_info["I"] + "_det"
                    elif fname_info["T"] == "EXPMAP":
                        ins = fname_info["I"] + "_expo"
                    if ret[b].get(ins) and type(ret[b].get(ins)) == str:
                        log.warning("More than one file found with the "
                                    "band %u and "
                                    "the instrument: %s" % (b, ins))
                        ret[b][ins] = [ret[b][ins], path_member_name]
                    elif ret[b].get(ins) and type(ret[b].get(ins)) == list:
                        ret[b][ins].append(path_member_name)
                    else:
                        ret[b][ins] = path_member_name

        except FileNotFoundError:
            log.error("File %s not found" % (filename))
            return None

        return ret
Beispiel #8
0
    def get_epic_spectra(self, filename, source_number, *,
                         instrument=[], path="", verbose=False):
        """Extracts in path (when set) the EPIC sources spectral products from a
        given TAR file.

        This function extracts the EPIC sources spectral products in a given
        instrument (or instruments) from it
        The result is a dictionary containing the paths to the extracted EPIC
        sources spectral products with key being the instrument
        If the instrument is not specified this function will
        return all the available instruments

        Parameters
        ----------
        filename : string, mandatory
            The name of the tarfile to be processed
        source_number : integer, mandatory
            The source number, in decimal, in the observation
        instruments : array of strings, optional, default []
            An array of strings indicating the desired instruments
        path: string, optional
            If set, extracts the EPIC images in the indicated path
        verbose : bool
            optional, default 'False'
            flag to display information about the process

        Returns
        -------
        A dictionary with the full paths of the extracted EPIC sources
        spectral products. The key is the instrument

        Notes
        -----
        The filenames will contain the source number in hexadecimal,
        as this is the convention used by the pipeline.
        The structure and the content of the extracted compressed FITS files
        are described in details in the Pipeline Products Description
        [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf).

        """
        _instrument = ["M1", "M2", "PN", "EP"]
        _product_type = ["SRSPEC", "BGSPEC", "SRCARF"]
        _path = ""
        ret = None
        if instrument == []:
            instrument = _instrument
        else:
            for inst in instrument:
                if inst not in _instrument:
                    log.warning(f"Invalid instrument {inst}")
                    instrument.remove(inst)
        if path != "" and os.path.exists(path):
            _path = path
        try:
            with tarfile.open(filename, "r") as tar:
                ret = {}
                for member in tar.getmembers():
                    paths = os.path.split(member.name)
                    fname = paths[1]
                    paths = os.path.split(paths[0])
                    if paths[1] != "pps":
                        continue
                    fname_info = self._parse_filename(fname)
                    if fname_info["X"] != "P":
                        continue
                    if not fname_info["I"] in instrument:
                        continue
                    if not fname_info["T"] in _product_type:
                        continue
                    if int(fname_info["X-"], 16) != source_number:
                        continue
                    tar.extract(member, _path)
                    key = fname_info["I"]
                    path_inst_name = os.path.abspath(os.path.join(_path, member.name))
                    if fname_info["T"] == "BGSPEC":
                        key = fname_info["I"] + "_bkg"
                    elif fname_info["T"] == "SRCARF":
                        key = fname_info["I"] + "_arf"
                    else:
                        # process the source spectrum
                        with fits.open(path_inst_name) as hdul:
                            # pick up the SAS version, needed for the pn RMF
                            sasver = hdul[0].header["SASVERS"].split("-")[1][:-2]
                            #
                            # build a list with old RMF folders, going back two versions, last bulk in 18.0
                            #
                            hardcoded_old_pn_rmfs = ['2020-10-28_sas19.0.0','2019-07-31_sas18.0.0']
                            old_rmf = False
                            for irmf in hardcoded_old_pn_rmfs:
                                if (sasver in irmf):
                                    rmf_path = irmf
                                    #if (verbose):
                                    #    print (f'Info: picking up an old RMF file from {rmf_path} folder')
                                    old_rmf = True
                                    break
                            #
                            for ext in hdul:
                                if ext.name != "SPECTRUM":
                                    continue
                                rmf_fname = ext.header["RESPFILE"]
                                if fname_info["I"] == "M1" or fname_info["I"] == "M2":
                                    inst = "MOS/" + str(ext.header["SPECDELT"]) + "eV/"
                                elif fname_info["I"] == "PN":
                                    inst = "PN/"
                                    file_name, file_ext = os.path.splitext(rmf_fname)
                                    rmf_fname = file_name + "_v" + sasver + file_ext
                                    if (old_rmf):
                                        inst = f"old/pn/{rmf_path}/"

                                link = self._rmf_ftp + inst + rmf_fname

                                if verbose:
                                    log.info("rmf link is: %s" % link)

                                response = self._request('GET', link)
                                response.raise_for_status()

                                rsp_filename = os.path.join(_path, paths[0], paths[1], ext.header["RESPFILE"])

                                with open(rsp_filename, 'wb') as f:
                                    f.write(response.content)
                                    ret[fname_info["I"] + "_rmf"] = rsp_filename

                    if ret.get(key) and type(ret.get(key)) == str:
                        log.warning("More than one file found with the instrument: %s" % key)
                        ret[key] = [ret[key], path_inst_name]
                    elif ret.get(key) and type(ret.get(key)) == list:
                        ret[key].append(path_inst_name)
                    else:
                        ret[key] = path_inst_name

        except FileNotFoundError:
            log.error("File %s not found" % (filename))
            return

        if not ret:
            log.info("Nothing to extract with the given parameters:\n"
                     "  PPS: %s\n"
                     "  Source Number: %u\n"
                     "  Instrument: %s\n" % (filename, source_number,
                                             instrument))

        return ret
Beispiel #9
0
    def load_data(self, ids, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL", valid_data=True,
                  band=None, avoid_datatype_check=False, format="votable", output_file=None,
                  overwrite_output_file=False, verbose=False):
        """Loads the specified table
        TAP+ only

        Parameters
        ----------
        ids : str list, mandatory
            list of identifiers
        data_release: str, optional, default None
            data release from which data should be taken. E.g. 'Gaia DR2'
            By default, it takes the current default one.
        data_structure: str, optional, default 'INDIVIDUAL'
            it can be 'INDIVIDUAL', 'COMBINED', 'RAW':
            'INDIVIDUAL' means...
            'COMBINED' means...
            'RAW' means...
        retrieval_type : str, optional, default 'ALL'
            retrieval type identifier. It can be either 'epoch_photometry'
            for compatibility reasons or 'ALL' to retrieve all data from
            the list of sources.
        valid_data : bool, optional, default True
            By default, the epoch photometry service returns only valid data,
            that is, all data rows where flux is not null and
            rejected_by_photometry flag is not true. In order to retrieve
            all data associated to a given source without this filter,
            this request parameter should be included (valid_data=False)
        band : str, optional, default None, valid values: G, BP, RP
            By default, the epoch photometry service returns all the
            available photometry bands for the requested source.
            This parameter allows to filter the output lightcurve by its band.
        avoid_datatype_check: boolean, optional, default False.
            By default, this value will be set to False. If it is set to 'true'
            the Datalink items tags will not be checked.
        format : str, optional, default 'votable'
            loading format
        output_file : string, optional, default None
            file where the results are saved.
            If it is not provided, the http response contents are returned.
        overwrite_output_file : boolean, optional, default False
            To overwrite the output_file if it already exists.
        verbose : bool, optional, default 'False'
            flag to display information about the process

        Returns
        -------
        A table object
        """
        if retrieval_type is None:
            raise ValueError("Missing mandatory argument 'retrieval_type'")

        now = datetime.now()
        now_formatted = now.strftime("%Y%m%d_%H%M%S")
        temp_dirname = "temp_" + now_formatted
        downloadname_formated = "download_" + now_formatted

        output_file_specified = False
        if output_file is None:
            output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated)
        else:
            output_file_specified = True
            output_file = os.path.abspath(output_file)
            if not overwrite_output_file and os.path.exists(output_file):
                raise ValueError(f"{output_file} file already exists. Please use overwrite_output_file='False' to "
                                 f"overwrite output file.")

        path = os.path.dirname(output_file)

        if ids is None:
            raise ValueError("Missing mandatory argument 'ids'")

        if avoid_datatype_check is False:
            # we need to check params
            rt = str(retrieval_type).upper()
            if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES:
                raise ValueError(f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, "
                                 f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}")

        params_dict = {}

        if not valid_data or str(retrieval_type) == 'ALL':
            params_dict['VALID_DATA'] = "false"
        elif valid_data:
            params_dict['VALID_DATA'] = "true"

        if band is not None:
            if band != 'G' and band != 'BP' and band != 'RP':
                raise ValueError("Invalid band value '%s' (Valid values: " +
                                 "'G', 'BP' and 'RP)" % band)
            else:
                params_dict['BAND'] = band
        if isinstance(ids, str):
            ids_arg = ids
        else:
            if isinstance(ids, int):
                ids_arg = str(ids)
            else:
                ids_arg = ','.join(str(item) for item in ids)
        params_dict['ID'] = ids_arg
        if data_release is not None:
            params_dict['RELEASE'] = data_release
        params_dict['DATA_STRUCTURE'] = data_structure
        params_dict['FORMAT'] = str(format)
        params_dict['RETRIEVAL_TYPE'] = str(retrieval_type)
        params_dict['USE_ZIP_ALWAYS'] = 'true'

        if path != '':
            try:
                os.mkdir(path)
            except FileExistsError:
                log.error("Path %s already exist" % path)
            except OSError:
                log.error("Creation of the directory %s failed" % path)

        try:
            self.__gaiadata.load_data(params_dict=params_dict,
                                      output_file=output_file,
                                      verbose=verbose)
            files = Gaia.__get_data_files(output_file=output_file, path=path)
        except Exception as err:
            raise err
        finally:
            if not output_file_specified:
                shutil.rmtree(path)

        if verbose:
            if output_file_specified:
                log.info("output_file = %s" % output_file)

        log.debug("List of products available:")
        # for key, value in files.items():
        # print("Product =", key)

        items = [key for key in files.keys()]
        items.sort()
        for item in items:
            # print(f'* {item}')
            if verbose:
                log.debug("Product = " + item)

        return files
Beispiel #10
0
    def login(self, user=None, password=None, certificate_file=None):
        """
        login allows user to authenticate to the service. Both user/password
        and https client certificates are supported.

         Alternatively, the Cadc class can be instantiated with an
         authenticated session.

        Parameters
        ----------
        user : str, required if certificate is None
            username to login with
        password : str, required if user is set
            password to login with
        certificate : str, required if user is None
            path to certificate to use with logging in

        """
        # start with a new session
        if not isinstance(self.cadctap._session,
                          (requests.Session, authsession.AuthSession)):
            raise TypeError('Cannot login with user provided session that is '
                            'not an pyvo.authsession.AuthSession or '
                            'requests.Session')
        if not certificate_file and not (user and password):
            raise AttributeError('login credentials missing (user/password '
                                 'or certificate)')
        if certificate_file:
            if isinstance(self.cadctap._session, authsession.AuthSession):
                self.cadctap._session.credentials.\
                    set_client_certificate(certificate_file)
            else:
                # if the session was already used to call CADC, requests caches
                # it without using the cert. Therefore need to close all
                # existing https sessions first.
                https_adapter = self.cadctap._session.adapters['https://']
                if https_adapter:
                    https_adapter.close()
                self.cadctap._session.cert = certificate_file
        if user and password:
            login_url = get_access_url(self.CADCLOGIN_SERVICE_URI,
                                       'ivo://ivoa.net/std/UMS#login-0.1')
            if login_url is None:
                raise RuntimeError("No login URL")
            # need to login and get a cookie
            args = {"username": str(user), "password": str(password)}
            header = {
                "Content-type": "application/x-www-form-urlencoded",
                "Accept": "text/plain"
            }
            response = self._request(method='POST',
                                     url=login_url,
                                     data=args,
                                     headers=header,
                                     cache=False)
            try:
                response.raise_for_status()
            except Exception as e:
                log.error('Logging error: {}'.format(e))
                raise e
            # extract cookie
            cookie = '"{}"'.format(response.text)
            if cookie is not None:
                if isinstance(self.cadctap._session, authsession.AuthSession):
                    self.cadctap._session.credentials.set_cookie(
                        CADC_COOKIE_PREFIX, cookie)
                else:
                    self.cadctap._session.cookies.set(CADC_COOKIE_PREFIX,
                                                      cookie)
Beispiel #11
0
    def download_files(self, files, *, savedir=None, cache=True,
                       continuation=True, skip_unauthorized=True,
                       verify_only=False):
        """
        Given a list of file URLs, download them

        Note: Given a list with repeated URLs, each will only be downloaded
        once, so the return may have a different length than the input list

        Parameters
        ----------
        files : list
            List of URLs to download
        savedir : None or str
            The directory to save to.  Default is the cache location.
        cache : bool
            Cache the download?
        continuation : bool
            Attempt to continue where the download left off (if it was broken)
        skip_unauthorized : bool
            If you receive "unauthorized" responses for some of the download
            requests, skip over them.  If this is False, an exception will be
            raised.
        verify_only : bool
            Option to go through the process of checking the files to see if
            they're the right size, but not actually download them.  This
            option may be useful if a previous download run failed partway.
        """

        if self.USERNAME:
            auth = self._get_auth_info(self.USERNAME)
        else:
            auth = None

        downloaded_files = []
        if savedir is None:
            savedir = self.cache_location
        for file_link in unique(files):
            log.debug("Downloading {0} to {1}".format(file_link, savedir))
            try:
                check_filename = self._request('HEAD', file_link, auth=auth)
                check_filename.raise_for_status()
            except requests.HTTPError as ex:
                if ex.response.status_code == 401:
                    if skip_unauthorized:
                        log.info("Access denied to {url}.  Skipping to"
                                 " next file".format(url=file_link))
                        continue
                    else:
                        raise(ex)

            try:
                filename = re.search("filename=(.*)",
                                     check_filename.headers['Content-Disposition']).groups()[0]
            except KeyError:
                log.info(f"Unable to find filename for {file_link}  "
                         "(missing Content-Disposition in header).  "
                         "Skipping to next file.")
                continue

            if savedir is not None:
                filename = os.path.join(savedir,
                                        filename)

            if verify_only:
                existing_file_length = os.stat(filename).st_size
                if 'content-length' in check_filename.headers:
                    length = int(check_filename.headers['content-length'])
                    if length == 0:
                        warnings.warn('URL {0} has length=0'.format(url))
                    elif existing_file_length == length:
                        log.info(f"Found cached file {filename} with expected size {existing_file_length}.")
                    elif existing_file_length < length:
                        log.info(f"Found cached file {filename} with size {existing_file_length} < expected "
                                 f"size {length}.  The download should be continued.")
                    elif existing_file_length > length:
                        warnings.warn(f"Found cached file {filename} with size {existing_file_length} > expected "
                                      f"size {length}.  The download is likely corrupted.",
                                      CorruptDataWarning)
                else:
                    warnings.warn(f"Could not verify {url} because it has no 'content-length'")

            try:
                if not verify_only:
                    self._download_file(file_link,
                                        filename,
                                        timeout=self.TIMEOUT,
                                        auth=auth,
                                        cache=cache,
                                        method='GET',
                                        head_safe=False,
                                        continuation=continuation)

                downloaded_files.append(filename)
            except requests.HTTPError as ex:
                if ex.response.status_code == 401:
                    if skip_unauthorized:
                        log.info("Access denied to {url}.  Skipping to"
                                 " next file".format(url=file_link))
                        continue
                    else:
                        raise(ex)
                elif ex.response.status_code == 403:
                    log.error("Access denied to {url}".format(url=file_link))
                    if 'dataPortal' in file_link and 'sso' not in file_link:
                        log.error("The URL may be incorrect.  Try using "
                                  "{0} instead of {1}"
                                  .format(file_link.replace('dataPortal/',
                                                            'dataPortal/sso/'),
                                          file_link))
                    raise ex
                elif ex.response.status_code == 500:
                    # empirically, this works the second time most of the time...
                    self._download_file(file_link,
                                        filename,
                                        timeout=self.TIMEOUT,
                                        auth=auth,
                                        cache=cache,
                                        method='GET',
                                        head_safe=False,
                                        continuation=continuation)

                    downloaded_files.append(filename)
                else:
                    raise ex
        return downloaded_files
Beispiel #12
0
    def query_async(self, payload, *, public=True, science=True,
                    legacy_columns=False, get_query_payload=None, **kwargs):
        """
        Perform a generic query with user-specified payload

        Parameters
        ----------
        payload : dictionary
            Please consult the `help` method
        public : bool
            True to return only public datasets, False to return private only,
            None to return both
        science : bool
            True to return only science datasets, False to return only
            calibration, None to return both
        legacy_columns : bool
            True to return the columns from the obsolete ALMA advanced query,
            otherwise return the current columns based on ObsCore model.

        Returns
        -------

        Table with results. Columns are those in the ALMA ObsCore model
        (see ``help_tap``) unless ``legacy_columns`` argument is set to True.
        """

        if payload is None:
            payload = {}
        for arg in kwargs:
            value = kwargs[arg]
            if 'band_list' == arg and isinstance(value, list):
                value = ' '.join([str(_) for _ in value])
            if arg in payload:
                payload[arg] = '{} {}'.format(payload[arg], value)
            else:
                payload[arg] = value

        if science is not None:
            payload['science_observation'] = science
        if public is not None:
            payload['public_data'] = public
        if get_query_payload:
            return payload

        query = _gen_sql(payload)
        result = self.query_tap(query, maxrec=payload.get('maxrec', None))
        if result is not None:
            result = result.to_table()
        else:
            # Should not happen
            raise RuntimeError('BUG: Unexpected result None')
        if legacy_columns:
            legacy_result = Table()
            # add 'Observation date' column

            for col_name in _OBSCORE_TO_ALMARESULT:
                if col_name in result.columns:
                    if col_name == 't_min':
                        legacy_result['Observation date'] = \
                            [Time(_['t_min'], format='mjd').strftime(
                                ALMA_DATE_FORMAT) for _ in result]
                    else:
                        legacy_result[_OBSCORE_TO_ALMARESULT[col_name]] = \
                            result[col_name]
                else:
                    log.error("Invalid column mapping in OBSCORE_TO_ALMARESULT: "
                              "{}:{}.  Please "
                              "report this as an Issue."
                              .format(col_name, _OBSCORE_TO_ALMARESULT[col_name]))
            return legacy_result
        return result
Beispiel #13
0
    def download_and_extract_files(self, urls, *, delete=True, regex=r'.*\.fits$',
                                   include_asdm=False, path='cache_path',
                                   verbose=True):
        """
        Given a list of tarball URLs, it extracts all the FITS files (or
        whatever matches the regex)

        Parameters
        ----------
        urls : str or list
            A single URL or a list of URLs
        include_asdm : bool
            Only affects cycle 1+ data.  If set, the ASDM files will be
            downloaded in addition to the script and log files.  By default,
            though, this file will be downloaded and deleted without extracting
            any information: you must change the regex if you want to extract
            data from an ASDM tarball
        """

        if isinstance(urls, str):
            urls = [urls]
        if not isinstance(urls, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")
        filere = re.compile(regex)

        all_files = []
        tar_files = []
        expanded_files = []
        for url in urls:
            if url[-4:] != '.tar':
                raise ValueError("URLs should be links to tarballs.")

            tarfile_name = os.path.split(url)[-1]
            if tarfile_name in self._cycle0_tarfile_content['ID']:
                # It is a cycle 0 file: need to check if it contains FITS
                match = (self._cycle0_tarfile_content['ID'] == tarfile_name)
                if not any(re.match(regex, x) for x in
                           self._cycle0_tarfile_content['Files'][match]):
                    log.info("No FITS files found in {0}".format(tarfile_name))
                    continue
            else:
                if 'asdm' in tarfile_name and not include_asdm:
                    log.info("ASDM tarballs do not contain FITS files; "
                             "skipping.")
                    continue

            tar_file = url.split('/')[-1]
            files = self.get_data_info(tar_file)
            if files:
                expanded_files += [x for x in files['access_url'] if
                                   filere.match(x.split('/')[-1])]
            else:
                tar_files.append(url)

        try:
            # get the tar files
            downloaded = self.download_files(tar_files, savedir=path)
            fitsfilelist = self.get_files_from_tarballs(downloaded,
                                                        regex=regex, path=path,
                                                        verbose=verbose)

            if delete:
                for tarball_name in downloaded:
                    log.info("Deleting {0}".format(tarball_name))
                    os.remove(tarball_name)

            all_files += fitsfilelist

            # download the other files
            all_files += self.download_files(expanded_files, savedir=path)

        except requests.ConnectionError as ex:
            self.partial_file_list = all_files
            log.error("There was an error downloading the file. "
                      "A partially completed download list is "
                      "in Alma.partial_file_list")
            raise ex
        except requests.HTTPError as ex:
            if ex.response.status_code == 401:
                log.info("Access denied to {url}.  Skipping to"
                         " next file".format(url=url))
            else:
                raise ex
        return all_files
Beispiel #14
0
    def load_data(self,
                  ids,
                  data_release=None,
                  data_structure='INDIVIDUAL',
                  retrieval_type="ALL",
                  valid_data=False,
                  band=None,
                  avoid_datatype_check=False,
                  format="votable",
                  output_file=None,
                  overwrite_output_file=False,
                  verbose=False):
        """Loads the specified table
        TAP+ only

        Parameters
        ----------
        ids : str list, mandatory
            list of identifiers
        data_release: str, optional, default None
            data release from which data should be taken. E.g. 'Gaia DR2'
            By default, it takes the current default one.
        data_structure: str, optional, default 'INDIVIDUAL'
            it can be 'INDIVIDUAL', 'COMBINED', 'RAW':
            'INDIVIDUAL' means products are provided in separate files for each sourceId. All files are zipped in a single
            bundle, even if only one source/file is considered
            'COMBINED' means products are provided in a single file concatenating the data of all sourceIds together.
            How this is organised depends on the chosen format
            'RAW' means products are provided following a Data Model similar to that used in the MDB, meaning in
            particular that parameters stored as arrays will remain as such. Like in the COMBINED structure, a single
            file is provided for the data of all sourceIds together, but in this case there will be always be one
            row per sourceId
        retrieval_type : str, optional, default 'ALL' to retrieve all data  from the list of sources
            retrieval type identifier. For GAIA DR2 possible values are ['EPOCH_PHOTOMETRY']
            For future GAIA DR3 (Once published), possible values will be ['EPOC_PHOTOMETRY', 'RVS', 'XP_CONTINUOUS',
            'XP_SAMPLED', 'MCMC_GSPPHOT' or 'MCMC_MSC']
        valid_data : bool, optional, default False
            By default, the epoch photometry service returns all available data, including
            data rows where flux is null and/or the rejected_by_photometry flag is set to True.
            In order to retrieve only valid data (data rows where flux is not null and/or the
            rejected_by_photometry flag is set to False) this request parameter should be included
            with valid_data=True.
        band : str, optional, default None, valid values: G, BP, RP
            By default, the epoch photometry service returns all the
            available photometry bands for the requested source.
            This parameter allows to filter the output lightcurve by its band.
        avoid_datatype_check: boolean, optional, default False.
            By default, this value will be set to False. If it is set to 'true'
            the Datalink items tags will not be checked.
        format : str, optional, default 'votable'
            loading format. Other available formats are 'csv', 'ecsv','json','votable_plain' and 'fits'
        output_file : string, optional, default None
            file where the results are saved.
            If it is not provided, the http response contents are returned.
        overwrite_output_file : boolean, optional, default False
            To overwrite the output_file if it already exists.
        verbose : bool, optional, default 'False'
            flag to display information about the process

        Returns
        -------
        A table object
        """

        if retrieval_type is None:
            raise ValueError("Missing mandatory argument 'retrieval_type'")

        now = datetime.now()
        now_formatted = now.strftime("%Y%m%d_%H%M%S")
        temp_dirname = "temp_" + now_formatted
        downloadname_formated = "download_" + now_formatted

        output_file_specified = False
        if output_file is None:
            output_file = os.path.join(os.getcwd(), temp_dirname,
                                       downloadname_formated)
        else:
            output_file_specified = True
            output_file = os.path.abspath(output_file)
            if not overwrite_output_file and os.path.exists(output_file):
                raise ValueError(
                    f"{output_file} file already exists. Please use overwrite_output_file='False' to "
                    f"overwrite output file.")

        path = os.path.dirname(output_file)

        if ids is None:
            raise ValueError("Missing mandatory argument 'ids'")

        if avoid_datatype_check is False:
            # we need to check params
            rt = str(retrieval_type).upper()
            if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES:
                raise ValueError(
                    f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, "
                    f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}"
                )

        params_dict = {}

        if not valid_data or str(retrieval_type) == 'ALL':
            params_dict['VALID_DATA'] = "false"
        elif valid_data:
            params_dict['VALID_DATA'] = "true"

        if band is not None:
            if band != 'G' and band != 'BP' and band != 'RP':
                raise ValueError("Invalid band value '%s' (Valid values: " +
                                 "'G', 'BP' and 'RP)" % band)
            else:
                params_dict['BAND'] = band
        if isinstance(ids, str):
            ids_arg = ids
        else:
            if isinstance(ids, int):
                ids_arg = str(ids)
            else:
                ids_arg = ','.join(str(item) for item in ids)
        params_dict['ID'] = ids_arg
        if data_release is not None:
            params_dict['RELEASE'] = data_release
        params_dict['DATA_STRUCTURE'] = data_structure
        params_dict['FORMAT'] = str(format)
        params_dict['RETRIEVAL_TYPE'] = str(retrieval_type)
        params_dict['USE_ZIP_ALWAYS'] = 'true'

        if path != '':
            try:
                os.mkdir(path)
            except FileExistsError:
                log.error("Path %s already exist" % path)
            except OSError:
                log.error("Creation of the directory %s failed" % path)

        try:
            self.__gaiadata.load_data(params_dict=params_dict,
                                      output_file=output_file,
                                      verbose=verbose)
            files = Gaia.__get_data_files(output_file=output_file, path=path)
        except Exception as err:
            raise err
        finally:
            if not output_file_specified:
                shutil.rmtree(path)

        if verbose:
            if output_file_specified:
                log.info("output_file = %s" % output_file)

        log.debug("List of products available:")
        # for key, value in files.items():
        # print("Product =", key)

        items = [key for key in files.keys()]
        items.sort()
        for item in items:
            # print(f'* {item}')
            if verbose:
                log.debug("Product = " + item)

        return files
Beispiel #15
0
    def get_epic_spectra(self, filename, source_number, *,
                         instrument=[], path="", verbose=False):
        """Extracts in path (when set) the EPIC sources spectral products from a
        given TAR file.
        For a given TAR file obtained with:
            XMM.download_data(OBS_ID,level="PPS",extension="FTZ",filename=tarfile)
        This function extracts the EPIC sources spectral products in a given
        instrument (or instruments) from it
        The result is a dictionary containing the paths to the extracted EPIC
        sources spectral products with key being the instrument
        If the instrument is not specified this function will
        return all the available instruments

        Examples:
        Extracting all bands and instruments:
            result = XMM.get_epic_spectra(tarfile,83,
                                         instrument=['M1','M2','PN'])
        If we want to retrieve the source spectrum of the instrument PN
            fits_image = result['PN']
        fits_image will be the full path to the extracted FTZ file

        Parameters
        ----------
        filename : string, mandatory
            The name of the tarfile to be processed
        source_number : integer, mandatory
            The source number, in decimal, in the observation
        instruments : array of strings, optional, default []
            An array of strings indicating the desired instruments
        path: string, optional
            If set, extracts the EPIC images in the indicated path
        verbose : bool
            optional, default 'False'
            flag to display information about the process

        Returns
        -------
        A dictionary with the full paths of the extracted EPIC sources
        spectral products. The key is the instrument

        Notes
        -----
        The filenames will contain the source number in hexadecimal,
        as this is the convention used by the pipeline.
        The structure and the content of the extracted compressed FITS files
        are described in details in the Pipeline Products Description
        [XMM-SOC-GEN-ICD-0024](https://xmm-tools.cosmos.esa.int/external/xmm_obs_info/odf/data/docs/XMM-SOC-GEN-ICD-0024.pdf).
        """
        _instrument = ["M1", "M2", "PN", "EP"]
        _product_type = ["SRSPEC", "BGSPEC", "SRCARF"]
        _path = ""
        ret = None
        if instrument == []:
            instrument = _instrument
        else:
            for inst in instrument:
                if inst not in _instrument:
                    log.warning(f"Invalid instrument {inst}")
                    instrument.remove(inst)
        if path != "" and os.path.exists(path):
            _path = path
        try:
            with tarfile.open(filename, "r") as tar:
                ret = {}
                for member in tar.getmembers():
                    paths = os.path.split(member.name)
                    fname = paths[1]
                    paths = os.path.split(paths[0])
                    if paths[1] != "pps":
                        continue
                    fname_info = self._parse_filename(fname)
                    if fname_info["X"] != "P":
                        continue
                    if not fname_info["I"] in instrument:
                        continue
                    if not fname_info["T"] in _product_type:
                        continue
                    if int(fname_info["X-"], 16) != source_number:
                        continue
                    tar.extract(member, _path)
                    key = fname_info["I"]
                    path_inst_name = os.path.abspath(os.path.join(_path, member.name))
                    if fname_info["T"] == "BGSPEC":
                        key = fname_info["I"] + "_bkg"
                    elif fname_info["T"] == "SRCARF":
                        key = fname_info["I"] + "_arf"
                    else:
                        with fits.open(path_inst_name) as hdul:
                            for ext in hdul:
                                if ext.name != "SPECTRUM":
                                    continue
                                rmf_fname = ext.header["RESPFILE"]
                                if fname_info["I"] == "M1" or fname_info["I"] == "M2":
                                    inst = "MOS/" + str(ext.header["SPECDELT"]) + "eV/"
                                elif fname_info["I"] == "PN":
                                    inst = "PN/"
                                    file_name, file_ext = os.path.splitext(rmf_fname)
                                    rmf_fname = file_name + "_v18.0" + file_ext

                                link = self._rmf_ftp + inst + rmf_fname

                                if verbose:
                                    log.info("rmf link is: %s" % link)

                                response = self._request('GET', link)

                                rsp_filename = os.path.join(_path, paths[0], paths[1], ext.header["RESPFILE"])

                                with open(rsp_filename, 'wb') as f:
                                    f.write(response.content)
                                    ret[fname_info["I"] + "_rmf"] = rsp_filename

                    if ret.get(key) and type(ret.get(key)) == str:
                        log.warning("More than one file found with the instrument: %s" % key)
                        ret[key] = [ret[key], path_inst_name]
                    elif ret.get(key) and type(ret.get(key)) == list:
                        ret[key].append(path_inst_name)
                    else:
                        ret[key] = path_inst_name

        except FileNotFoundError:
            log.error("File %s not found" % (filename))
            return

        if not ret:
            log.info("Nothing to extract with the given parameters:\n"
                     "  PPS: %s\n"
                     "  Source Number: %u\n"
                     "  Instrument: %s\n" % (filename, source_number,
                                             instrument))

        return ret