Beispiel #1
0
    def query_async(self, payload, cache=True, public=True, science=True,
                    view_format='raw'):
        """
        Perform a generic query with user-specified payload

        Parameters
        ----------
        payload : dict
            A dictionary of payload keywords that are accepted by the ALMA
            archive system.  You can look these up by examining the forms at
            http://almascience.org/aq or using the `help` method
        cache : bool
            Cache the query?
        public : bool
            Return only publicly available datasets?
        science : bool
            Return only data marked as "science" in the archive?
        """

        url = urljoin(self._get_dataarchive_url(), 'aq/')

        payload.update({'result_view': view_format, 'format': 'VOTABLE',
                        'download': 'true'})
        if public:
            payload['public_data'] = 'public'
        if science:
            payload['science_observations'] = '=%TARGET%'

        self.validate_query(payload)

        response = self._request('GET', url, params=payload,
                                 timeout=self.TIMEOUT, cache=cache)
        response.raise_for_status()

        return response
Beispiel #2
0
    def _login(self, username, store_password=False):
        # Check if already logged in
        loginpage = self._request("GET", "https://asa.alma.cl/cas/login",
                                  cache=False)
        root = BeautifulSoup(loginpage.content, 'html5lib')
        if root.find('div', class_='success'):
            log.info("Already logged in.")
            return True

        # Get password from keyring or prompt
        password_from_keyring = keyring.get_password("astroquery:asa.alma.cl",
                                                     username)
        if password_from_keyring is None:
            if system_tools.in_ipynb():
                log.warn("You may be using an ipython notebook:"
                         " the password form will appear in your terminal.")
            password = getpass.getpass("{0}, enter your ALMA password:"******"\n".format(username))
        else:
            password = password_from_keyring
        # Authenticate
        log.info("Authenticating {0} on asa.alma.cl ...".format(username))
        # Do not cache pieces of the login process
        data = {kw: root.find('input', {'name': kw})['value']
                for kw in ('lt', '_eventId', 'execution')}
        data['username'] = username
        data['password'] = password

        login_response = self._request("POST", "https://asa.alma.cl/cas/login",
                                       params={'service':
                                               urljoin(self.archive_url,
                                                       'rh/login')},
                                       data=data,
                                       cache=False)

        authenticated = ('You have successfully logged in' in
                         login_response.content)

        if authenticated:
            log.info("Authentication successful!")
            self._username = username
        else:
            log.exception("Authentication failed!")
        # When authenticated, save password in keyring if needed
        if authenticated and password_from_keyring is None and store_password:
            keyring.set_password("astroquery:asa.alma.cl", username, password)
        return authenticated
Beispiel #3
0
    def get_cycle0_uid_contents(self, uid):
        """
        List the file contents of a UID from Cycle 0.  Will raise an error
        if the UID is from cycle 1+, since those data have been released in a
        different and more consistent format.
        See http://almascience.org/documents-and-tools/cycle-2/ALMAQA2Productsv1.01.pdf
        for details.
        """

        # First, check if UID is in the Cycle 0 listing
        if uid in self.cycle0_table['uid']:
            cycle0id = self.cycle0_table[self.cycle0_table['uid'] == uid][0]['ID']
            contents = [row['Files']
                        for row in self._cycle0_tarfile_content
                        if cycle0id in row['ID']]
            return contents
        else:
            info_url = urljoin(self._get_dataarchive_url(),
                               'documents-and-tools/cycle-2/ALMAQA2Productsv1.01.pdf')
            raise ValueError("Not a Cycle 0 UID.  See {0} for details about"
                             " cycle 1+ data release formats.".format(info_url))
Beispiel #4
0
    def _cycle0_tarfile_content(self):
        """
        In principle, this is a static file, but we'll retrieve it just in case
        """
        if not hasattr(self, '_cycle0_tarfile_content_table'):
            url = urljoin(self._get_dataarchive_url(),
                          'alma-data/archive/cycle-0-tarfile-content')
            response = self._request('GET', url, cache=True)

            # html.parser is needed because some <tr>'s have form:
            # <tr width="blah"> which the default parser does not pick up
            root = BeautifulSoup(response.content, 'html.parser')
            html_table = root.find('table',class_='grid listing')
            data = list(zip(*[(x.findAll('td')[0].text, x.findAll('td')[1].text)
                              for x in html_table.findAll('tr')]))
            columns = [Column(data=data[0], name='ID'),
                       Column(data=data[1], name='Files')]
            tbl = Table(columns)
            assert len(tbl) == response.text.count('<tr') == 8497
            self._cycle0_tarfile_content_table = tbl
        else:
            tbl = self._cycle0_tarfile_content_table
        return tbl
Beispiel #5
0
    def _cycle0_tarfile_content(self):
        """
        In principle, this is a static file, but we'll retrieve it just in case
        """
        if not hasattr(self, '_cycle0_tarfile_content_table'):
            url = urljoin(self._get_dataarchive_url(),
                          'alma-data/archive/cycle-0-tarfile-content')
            response = self._request('GET', url, cache=True)

            # html.parser is needed because some <tr>'s have form:
            # <tr width="blah"> which the default parser does not pick up
            root = BeautifulSoup(response.content, 'html.parser')
            html_table = root.find('table', class_='grid listing')
            data = list(zip(*[(x.findAll('td')[0].text,
                               x.findAll('td')[1].text)
                              for x in html_table.findAll('tr')]))
            columns = [Column(data=data[0], name='ID'),
                       Column(data=data[1], name='Files')]
            tbl = Table(columns)
            assert len(tbl) == 8497
            self._cycle0_tarfile_content_table = tbl
        else:
            tbl = self._cycle0_tarfile_content_table
        return tbl
Beispiel #6
0
    def get_cycle0_uid_contents(self, uid):
        """
        List the file contents of a UID from Cycle 0.  Will raise an error
        if the UID is from cycle 1+, since those data have been released in
        a different and more consistent format.  See
        http://almascience.org/documents-and-tools/cycle-2/ALMAQA2Productsv1.01.pdf
        for details.
        """

        # First, check if UID is in the Cycle 0 listing
        if uid in self.cycle0_table['uid']:
            cycle0id = self.cycle0_table[self.cycle0_table['uid'] ==
                                         uid][0]['ID']
            contents = [
                row['Files'] for row in self._cycle0_tarfile_content
                if cycle0id in row['ID']
            ]
            return contents
        else:
            info_url = urljoin(
                self._get_dataarchive_url(),
                'documents-and-tools/cycle-2/ALMAQA2Productsv1.01.pdf')
            raise ValueError("Not a Cycle 0 UID.  See {0} for details about "
                             "cycle 1+ data release formats.".format(info_url))
Beispiel #7
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """

        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        # 'ALMA+uid___A002_X391d0b_X7b'
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST', url, data=payload,
                                 timeout=self.TIMEOUT, cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST', url, data=payload,
                                     timeout=self.TIMEOUT, cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warn("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        assert len(request_id) == 36
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 os.path.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        assert len(dpid) == 9
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET', os.path.join(data_page_url,
                                                        'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self._username if hasattr(self, '_username') else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(uri=url_decomposed,
                                                    staging_page_id=dpid,
                                                    username=username,
                                                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)

        # staging_root = BeautifulSoup(data_page.content)
        # downloadFileURL = staging_root.find('form').attrs['action']
        # data_list_url = os.path.split(downloadFileURL)[0]

        # # Old version, unreliable: data_list_url = staging_submission.url
        # log.debug("Data list URL: {0}".format(data_list_url))
        # self._staging_log['data_list_url'] = data_list_url

        # time.sleep(1)
        # data_list_page = self._request('GET', data_list_url, cache=True)
        # self._staging_log['data_list_page'] = data_list_page
        # data_list_page.raise_for_status()

        # if 'Error' in data_list_page.text:
        #     errormessage = staging_root.find(
        #         'div', id='errorContent').string.strip()
        #     raise RemoteServiceError(errormessage)

        # tbl = self._parse_staging_request_page(data_list_page)

        return tbl
Beispiel #8
0
    def _login(self,
               username=None,
               store_password=False,
               reenter_password=False):
        """
        Login to the ALMA Science Portal.

        Parameters
        ----------
        username : str, optional
            Username to the ALMA Science Portal. If not given, it should be
            specified in the config file.
        store_password : bool, optional
            Stores the password securely in your keyring. Default is False.
        reenter_password : bool, optional
            Asks for the password even if it is already stored in the
            keyring. This is the way to overwrite an already stored passwork
            on the keyring. Default is False.
        """

        if username is None:
            if not self.USERNAME:
                raise LoginError("If you do not pass a username to login(), "
                                 "you should configure a default one!")
            else:
                username = self.USERNAME

        # Check if already logged in
        loginpage = self._request("GET",
                                  "https://asa.alma.cl/cas/login",
                                  cache=False)
        root = BeautifulSoup(loginpage.content, 'html5lib')
        if root.find('div', class_='success'):
            log.info("Already logged in.")
            return True

        # Get password from keyring or prompt
        if reenter_password is False:
            password_from_keyring = keyring.get_password(
                "astroquery:asa.alma.cl", username)
        else:
            password_from_keyring = None

        if password_from_keyring is None:
            if system_tools.in_ipynb():
                log.warning("You may be using an ipython notebook:"
                            " the password form will appear in your terminal.")
            password = getpass.getpass("{0}, enter your ALMA password:"******"\n".format(username))
        else:
            password = password_from_keyring
        # Authenticate
        log.info("Authenticating {0} on asa.alma.cl ...".format(username))
        # Do not cache pieces of the login process
        data = {
            kw: root.find('input', {'name': kw})['value']
            for kw in ('lt', '_eventId', 'execution')
        }
        data['username'] = username
        data['password'] = password

        login_response = self._request(
            "POST",
            "https://asa.alma.cl/cas/login",
            params={'service': urljoin(self.archive_url, 'rh/login')},
            data=data,
            cache=False)

        authenticated = ('You have successfully logged in'
                         in login_response.text)

        if authenticated:
            log.info("Authentication successful!")
            self.USERNAME = username
        else:
            log.exception("Authentication failed!")
        # When authenticated, save password in keyring if needed
        if authenticated and password_from_keyring is None and store_password:
            keyring.set_password("astroquery:asa.alma.cl", username, password)
        return authenticated
Beispiel #9
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """
        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types + (np.bytes_, )):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        # 'ALMA+uid___A002_X391d0b_X7b'
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST',
                                 url,
                                 data=payload,
                                 timeout=self.TIMEOUT,
                                 cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        if 'login' in response.url:
            raise ValueError(
                "You must login before downloading this data set.")

        if response.status_code == 405:
            if hasattr(self, '_last_successful_staging_log'):
                log.warning(
                    "Error 405 received.  If you have previously staged "
                    "the same UIDs, the result returned is probably "
                    "correct, otherwise you may need to create a fresh "
                    "astroquery.Alma instance.")
                return self._last_successful_staging_log['result']
            else:
                raise HTTPError(
                    "Received an error 405: this may indicate you "
                    "have already staged the data.  Try downloading "
                    "the file URLs directly with download_files.")
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST',
                                     url,
                                     data=payload,
                                     timeout=self.TIMEOUT,
                                     cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warning("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 url_helpers.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET',
                                    url_helpers.join(data_page_url, 'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self.USERNAME if self.USERNAME else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(
                        uri=url_decomposed,
                        staging_page_id=dpid,
                        username=username,
                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)
        self._staging_log['result'] = tbl
        self._staging_log['file_urls'] = tbl['URL']
        self._last_successful_staging_log = self._staging_log

        return tbl
Beispiel #10
0
    def query_async(self,
                    payload,
                    cache=True,
                    public=True,
                    science=True,
                    max_retries=5,
                    get_html_version=False,
                    get_query_payload=False,
                    **kwargs):
        """
        Perform a generic query with user-specified payload

        Parameters
        ----------
        payload : dict
            A dictionary of payload keywords that are accepted by the ALMA
            archive system.  You can look these up by examining the forms at
            http://almascience.org/aq or using the `help` method
        cache : bool
            Cache the query?
            (note: HTML queries *cannot* be cached using the standard caching
            mechanism because the URLs are different each time
        public : bool
            Return only publicly available datasets?
        science : bool
            Return only data marked as "science" in the archive?

        """

        url = urljoin(self._get_dataarchive_url(), 'aq/')

        payload.update(kwargs)
        if get_html_version:
            payload.update({
                'result_view': 'observation',
                'format': 'URL',
                'download': 'true'
            })
        else:
            payload.update({
                'result_view': 'raw',
                'format': 'VOTABLE',
                'download': 'true'
            })
        if public:
            payload['public_data'] = 'public'
        if science:
            payload['science_observations'] = '=%TARGET%'

        self.validate_query(payload)

        if get_query_payload:
            return payload

        response = self._request('GET',
                                 url,
                                 params=payload,
                                 timeout=self.TIMEOUT,
                                 cache=cache and not get_html_version)
        self._last_response = response
        response.raise_for_status()

        if get_html_version:
            if 'run' not in response.text:
                if max_retries > 0:
                    log.info(
                        "Failed query.  Retrying up to {0} more times".format(
                            max_retries))
                    return self.query_async(
                        payload=payload,
                        cache=False,
                        public=public,
                        science=science,
                        max_retries=max_retries - 1,
                        get_html_version=get_html_version,
                        get_query_payload=get_query_payload,
                        **kwargs)
                raise RemoteServiceError(
                    "Incorrect return from HTML table query.")
            response2 = self._request(
                'GET',
                "{0}/{1}/{2}".format(self._get_dataarchive_url(), 'aq',
                                     response.text),
                params={'query_url': response.url.split("?")[-1]},
                timeout=self.TIMEOUT,
                cache=False,
            )
            self._last_response = response2
            response2.raise_for_status()
            if len(response2.text) == 0:
                if max_retries > 0:
                    log.info(
                        "Failed (empty) query.  Retrying up to {0} more times".
                        format(max_retries))
                    return self.query_async(
                        payload=payload,
                        cache=cache,
                        public=public,
                        science=science,
                        max_retries=max_retries - 1,
                        get_html_version=get_html_version,
                        get_query_payload=get_query_payload,
                        **kwargs)
                raise RemoteServiceError("Empty return.")
            return response2

        else:
            return response
Beispiel #11
0
    def _login(self, username=None, store_password=False,
               reenter_password=False):
        """
        Login to the ALMA Science Portal.

        Parameters
        ----------
        username : str, optional
            Username to the ALMA Science Portal. If not given, it should be
            specified in the config file.
        store_password : bool, optional
            Stores the password securely in your keyring. Default is False.
        reenter_password : bool, optional
            Asks for the password even if it is already stored in the
            keyring. This is the way to overwrite an already stored passwork
            on the keyring. Default is False.
        """

        if username is None:
            if not self.USERNAME:
                raise LoginError("If you do not pass a username to login(), "
                                 "you should configure a default one!")
            else:
                username = self.USERNAME

        # Check if already logged in
        loginpage = self._request("GET", "https://asa.alma.cl/cas/login",
                                  cache=False)
        root = BeautifulSoup(loginpage.content, 'html5lib')
        if root.find('div', class_='success'):
            log.info("Already logged in.")
            return True

        # Get password from keyring or prompt
        if reenter_password is False:
            password_from_keyring = keyring.get_password(
                "astroquery:asa.alma.cl", username)
        else:
            password_from_keyring = None

        if password_from_keyring is None:
            if system_tools.in_ipynb():
                log.warning("You may be using an ipython notebook:"
                            " the password form will appear in your terminal.")
            password = getpass.getpass("{0}, enter your ALMA password:"******"\n".format(username))
        else:
            password = password_from_keyring
        # Authenticate
        log.info("Authenticating {0} on asa.alma.cl ...".format(username))
        # Do not cache pieces of the login process
        data = {kw: root.find('input', {'name': kw})['value']
                for kw in ('lt', '_eventId', 'execution')}
        data['username'] = username
        data['password'] = password

        login_response = self._request("POST", "https://asa.alma.cl/cas/login",
                                       params={'service':
                                               urljoin(self.archive_url,
                                                       'rh/login')},
                                       data=data,
                                       cache=False)

        authenticated = ('You have successfully logged in' in
                         login_response.text)

        if authenticated:
            log.info("Authentication successful!")
            self.USERNAME = username
        else:
            log.exception("Authentication failed!")
        # When authenticated, save password in keyring if needed
        if authenticated and password_from_keyring is None and store_password:
            keyring.set_password("astroquery:asa.alma.cl", username, password)
        return authenticated
Beispiel #12
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'
        cache : True
            This is *forced* true, because the ALMA servers don't support repeats
            of the same request.
            Whether to cache the staging process.  This should generally be
            left as False when used interactively.

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """

        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        #'ALMA+uid___A002_X391d0b_X7b'
        #payload = [('dataset','ALMA+'+clean_uid(uid)) for uid in uids]
        payload = {'dataset':['ALMA+'+clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url':url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST', url, data=payload,
                                 timeout=self.TIMEOUT, cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST', url, data=payload,
                                     timeout=self.TIMEOUT, cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warn("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        assert len(request_id) == 36
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 os.path.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        has_completed = False
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        dpid = data_page_url.split("/")[-1]
        assert len(dpid) == 9
        self._staging_log['staging_page_id'] = dpid

        while not has_completed:
            time.sleep(1)
            # CANNOT cache this step: please_wait will happen infinitely
            data_page = self._request('GET', data_page_url, cache=False)
            if 'Please wait' not in data_page.text:
                has_completed = True
            print(".",end='')
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()
        staging_root = BeautifulSoup(data_page.content)
        downloadFileURL = staging_root.find('form').attrs['action']
        data_list_url = os.path.split(downloadFileURL)[0]

        # Old version, unreliable: data_list_url = staging_submission.url
        log.debug("Data list URL: {0}".format(data_list_url))
        self._staging_log['data_list_url'] = data_list_url

        time.sleep(1)
        data_list_page = self._request('GET', data_list_url, cache=True)
        self._staging_log['data_list_page'] = data_list_page
        data_list_page.raise_for_status()

        if 'Error' in data_list_page.text:
            errormessage = root.find('div', id='errorContent').string.strip()
            raise RemoteServiceError(errormessage)

        tbl = self._parse_staging_request_page(data_list_page)

        return tbl
Beispiel #13
0
def _absurl_from_url(url, base_url):
    if url[:4] != 'http':
        return urlparse.urljoin(base_url, url)
    return url
Beispiel #14
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """
        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        #'ALMA+uid___A002_X391d0b_X7b'
        #payload = [('dataset','ALMA+'+clean_uid(uid)) for uid in uids]
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST',
                                 url,
                                 data=payload,
                                 timeout=self.TIMEOUT,
                                 cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST',
                                     url,
                                     data=payload,
                                     timeout=self.TIMEOUT,
                                     cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warn("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        assert len(request_id) == 36
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 os.path.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        assert len(dpid) == 9
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET',
                                    os.path.join(data_page_url, 'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self._username if hasattr(self,
                                             '_username') else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(
                        uri=url_decomposed,
                        staging_page_id=dpid,
                        username=username,
                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)

        # staging_root = BeautifulSoup(data_page.content)
        # downloadFileURL = staging_root.find('form').attrs['action']
        # data_list_url = os.path.split(downloadFileURL)[0]

        # # Old version, unreliable: data_list_url = staging_submission.url
        # log.debug("Data list URL: {0}".format(data_list_url))
        # self._staging_log['data_list_url'] = data_list_url

        # time.sleep(1)
        # data_list_page = self._request('GET', data_list_url, cache=True)
        # self._staging_log['data_list_page'] = data_list_page
        # data_list_page.raise_for_status()

        # if 'Error' in data_list_page.text:
        #     errormessage = staging_root.find('div', id='errorContent').string.strip()
        #     raise RemoteServiceError(errormessage)

        # tbl = self._parse_staging_request_page(data_list_page)

        return tbl
Beispiel #15
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """

        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types + (np.bytes_,)):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        # 'ALMA+uid___A002_X391d0b_X7b'
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST', url, data=payload,
                                 timeout=self.TIMEOUT, cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        if response.status_code == 405:
            if hasattr(self,'_last_successful_staging_log'):
                log.warning("Error 405 received.  If you have previously staged "
                            "the same UIDs, the result returned is probably "
                            "correct, otherwise you may need to create a fresh "
                            "astroquery.Alma instance.")
                return self._last_successful_staging_log['result']
            else:
                raise HTTPError("Received an error 405: this may indicate you "
                                "have already staged the data.  Try downloading "
                                "the file URLs directly with download_files.")
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST', url, data=payload,
                                     timeout=self.TIMEOUT, cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warning("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 url_helpers.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET', url_helpers.join(data_page_url,
                                                            'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self.USERNAME if self.USERNAME else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(uri=url_decomposed,
                                                    staging_page_id=dpid,
                                                    username=username,
                                                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)
        self._staging_log['result'] = tbl
        self._staging_log['file_urls'] = tbl['URL']
        self._last_successful_staging_log = self._staging_log

        return tbl
Beispiel #16
0
def _absurl_from_url(url, base_url):
    if url[:4] != 'http':
        return urlparse.urljoin(base_url, url)
    return url
Beispiel #17
0
    def query_async(self, payload, cache=True, public=True, science=True,
                    max_retries=5,
                    get_html_version=False, get_query_payload=False, **kwargs):
        """
        Perform a generic query with user-specified payload

        Parameters
        ----------
        payload : dict
            A dictionary of payload keywords that are accepted by the ALMA
            archive system.  You can look these up by examining the forms at
            http://almascience.org/aq or using the `help` method
        cache : bool
            Cache the query?
            (note: HTML queries *cannot* be cached using the standard caching
            mechanism because the URLs are different each time
        public : bool
            Return only publicly available datasets?
        science : bool
            Return only data marked as "science" in the archive?

        """

        url = urljoin(self._get_dataarchive_url(), 'aq/')

        payload.update(kwargs)
        if get_html_version:
            payload.update({'result_view': 'observation', 'format': 'URL',
                            'download': 'true'})
        else:
            payload.update({'result_view': 'raw', 'format': 'VOTABLE',
                            'download': 'true'})
        if public:
            payload['public_data'] = 'public'
        if science:
            payload['science_observations'] = '=%TARGET%'

        self.validate_query(payload)

        if get_query_payload:
            return payload

        response = self._request('GET', url, params=payload,
                                 timeout=self.TIMEOUT,
                                 cache=cache and not get_html_version)
        self._last_response = response
        response.raise_for_status()

        if get_html_version:
            if 'run' not in response.text:
                if max_retries > 0:
                    log.info("Failed query.  Retrying up to {0} more times"
                             .format(max_retries))
                    return self.query_async(payload=payload, cache=False,
                                            public=public, science=science,
                                            max_retries=max_retries-1,
                                            get_html_version=get_html_version,
                                            get_query_payload=get_query_payload,
                                            **kwargs)
                raise RemoteServiceError("Incorrect return from HTML table query.")
            response2 = self._request('GET',
                                      "{0}/{1}/{2}".format(
                                          self._get_dataarchive_url(), 'aq',
                                          response.text),
                                      params={'query_url':
                                              response.url.split("?")[-1]},
                                      timeout=self.TIMEOUT,
                                      cache=False,
                                      )
            self._last_response = response2
            response2.raise_for_status()
            if len(response2.text) == 0:
                if max_retries > 0:
                    log.info("Failed (empty) query.  Retrying up to {0} more times"
                             .format(max_retries))
                    return self.query_async(payload=payload, cache=cache,
                                            public=public, science=science,
                                            max_retries=max_retries-1,
                                            get_html_version=get_html_version,
                                            get_query_payload=get_query_payload,
                                            **kwargs)
                raise RemoteServiceError("Empty return.")
            return response2

        else:
            return response