def test_cycle1(self, temp_dir):
        # About 500 MB
        alma = Alma()
        alma.cache_location = temp_dir

        target = 'NGC4945'
        project_code = '2012.1.00912.S'

        payload = {
            'project_code': project_code,
            'source_name_alma': target,
        }
        result = alma.query(payload=payload)
        assert len(result) == 1

        # Need new Alma() instances each time
        a1 = alma()
        uid_url_table_mous = a1.stage_data(result['Member ous id'])
        a2 = alma()
        uid_url_table_asdm = a2.stage_data(result['Asdm uid'])
        # I believe the fixes as part of #495 have resulted in removal of a
        # redundancy in the table creation, so a 1-row table is OK here.
        # A 2-row table may not be OK any more, but that's what it used to
        # be...
        assert len(uid_url_table_asdm) == 1
        assert len(uid_url_table_mous) >= 2  # now is len=3 (Nov 17, 2016)

        # URL should look like:
        # https://almascience.eso.org/dataPortal/requests/anonymous/944120962/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar
        # https://almascience.eso.org/rh/requests/anonymous/944222597/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar

        small = uid_url_table_mous['size'] < 1

        urls_to_download = uid_url_table_mous[small]['URL']

        uri = urlparse(urls_to_download[0])
        assert uri.path == (
            '/dataPortal/requests/anonymous/{0}/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar'  # noqa
            .format(a1._staging_log['staging_page_id']))

        # THIS IS FAIL
        # '2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar'
        left = uid_url_table_mous['URL'][0].split("/")[-1]
        assert left == '2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar'
        right = uid_url_table_mous['uid'][0]
        assert right == 'uid://A002/X5a9a13/X528'
        assert left[15:-15] == right.replace(":", "_").replace("/", "_")
        data = alma.download_and_extract_files(urls_to_download)

        assert len(data) == 6
    def test_cycle1(self, temp_dir):
        # About 500 MB
        alma = Alma()
        alma.cache_location = temp_dir

        target = "NGC4945"
        project_code = "2012.1.00912.S"

        payload = {"project_code": project_code, "source_name_alma": target}
        result = alma.query(payload=payload)
        assert len(result) == 1

        # Need new Alma() instances each time
        a1 = alma()
        uid_url_table_mous = a1.stage_data(result["Member ous id"])
        a2 = alma()
        uid_url_table_asdm = a2.stage_data(result["Asdm uid"])
        # I believe the fixes as part of #495 have resulted in removal of a
        # redundancy in the table creation, so a 1-row table is OK here.
        # A 2-row table may not be OK any more, but that's what it used to
        # be...
        assert len(uid_url_table_asdm) == 1
        assert len(uid_url_table_mous) == 2

        # URL should look like:
        # https://almascience.eso.org/dataPortal/requests/anonymous/944120962/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar
        # https://almascience.eso.org/rh/requests/anonymous/944222597/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar

        small = uid_url_table_mous["size"] < 1

        urls_to_download = uid_url_table_mous[small]["URL"]

        uri = urlparse(urls_to_download[0])
        assert uri.path == (
            "/dataPortal/requests/anonymous/{0}/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar".format(
                a1._staging_log["staging_page_id"]
            )
        )

        # THIS IS FAIL
        # '2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar'
        left = uid_url_table_mous["URL"][0].split("/")[-1]
        assert left == "2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar"
        right = uid_url_table_mous["uid"][0]
        assert right == "uid://A002/X5a9a13/X528"
        assert left[15:-15] == right.replace(":", "_").replace("/", "_")
        data = alma.download_and_extract_files(urls_to_download)

        assert len(data) == 6
Beispiel #3
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """

        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        # 'ALMA+uid___A002_X391d0b_X7b'
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST', url, data=payload,
                                 timeout=self.TIMEOUT, cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST', url, data=payload,
                                     timeout=self.TIMEOUT, cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warn("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        assert len(request_id) == 36
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 os.path.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        assert len(dpid) == 9
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET', os.path.join(data_page_url,
                                                        'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self._username if hasattr(self, '_username') else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(uri=url_decomposed,
                                                    staging_page_id=dpid,
                                                    username=username,
                                                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)

        # staging_root = BeautifulSoup(data_page.content)
        # downloadFileURL = staging_root.find('form').attrs['action']
        # data_list_url = os.path.split(downloadFileURL)[0]

        # # Old version, unreliable: data_list_url = staging_submission.url
        # log.debug("Data list URL: {0}".format(data_list_url))
        # self._staging_log['data_list_url'] = data_list_url

        # time.sleep(1)
        # data_list_page = self._request('GET', data_list_url, cache=True)
        # self._staging_log['data_list_page'] = data_list_page
        # data_list_page.raise_for_status()

        # if 'Error' in data_list_page.text:
        #     errormessage = staging_root.find(
        #         'div', id='errorContent').string.strip()
        #     raise RemoteServiceError(errormessage)

        # tbl = self._parse_staging_request_page(data_list_page)

        return tbl
Beispiel #4
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """
        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types + (np.bytes_, )):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        # 'ALMA+uid___A002_X391d0b_X7b'
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST',
                                 url,
                                 data=payload,
                                 timeout=self.TIMEOUT,
                                 cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        if 'login' in response.url:
            raise ValueError(
                "You must login before downloading this data set.")

        if response.status_code == 405:
            if hasattr(self, '_last_successful_staging_log'):
                log.warning(
                    "Error 405 received.  If you have previously staged "
                    "the same UIDs, the result returned is probably "
                    "correct, otherwise you may need to create a fresh "
                    "astroquery.Alma instance.")
                return self._last_successful_staging_log['result']
            else:
                raise HTTPError(
                    "Received an error 405: this may indicate you "
                    "have already staged the data.  Try downloading "
                    "the file URLs directly with download_files.")
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST',
                                     url,
                                     data=payload,
                                     timeout=self.TIMEOUT,
                                     cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warning("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 url_helpers.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET',
                                    url_helpers.join(data_page_url, 'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self.USERNAME if self.USERNAME else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(
                        uri=url_decomposed,
                        staging_page_id=dpid,
                        username=username,
                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)
        self._staging_log['result'] = tbl
        self._staging_log['file_urls'] = tbl['URL']
        self._last_successful_staging_log = self._staging_log

        return tbl
Beispiel #5
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """

        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types + (np.bytes_,)):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        # 'ALMA+uid___A002_X391d0b_X7b'
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST', url, data=payload,
                                 timeout=self.TIMEOUT, cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        if response.status_code == 405:
            if hasattr(self,'_last_successful_staging_log'):
                log.warning("Error 405 received.  If you have previously staged "
                            "the same UIDs, the result returned is probably "
                            "correct, otherwise you may need to create a fresh "
                            "astroquery.Alma instance.")
                return self._last_successful_staging_log['result']
            else:
                raise HTTPError("Received an error 405: this may indicate you "
                                "have already staged the data.  Try downloading "
                                "the file URLs directly with download_files.")
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST', url, data=payload,
                                     timeout=self.TIMEOUT, cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warning("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 url_helpers.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET', url_helpers.join(data_page_url,
                                                            'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self.USERNAME if self.USERNAME else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(uri=url_decomposed,
                                                    staging_page_id=dpid,
                                                    username=username,
                                                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)
        self._staging_log['result'] = tbl
        self._staging_log['file_urls'] = tbl['URL']
        self._last_successful_staging_log = self._staging_log

        return tbl
Beispiel #6
0
    def stage_data(self, uids):
        """
        Stage ALMA data

        Parameters
        ----------
        uids : list or str
            A list of valid UIDs or a single UID.
            UIDs should have the form: 'uid://A002/X391d0b/X7b'

        Returns
        -------
        data_file_table : Table
            A table containing 3 columns: the UID, the file URL (for future
            downloading), and the file size
        """
        """
        With log.set_level(10)
        INFO: Staging files... [astroquery.alma.core]
        DEBUG: First request URL: https://almascience.eso.org/rh/submission [astroquery.alma.core]
        DEBUG: First request payload: {'dataset': [u'ALMA+uid___A002_X3b3400_X90f']} [astroquery.alma.core]
        DEBUG: First response URL: https://almascience.eso.org/rh/checkAuthenticationStatus/3f98de33-197e-4692-9afa-496842032ea9/submission [astroquery.alma.core]
        DEBUG: Request ID: 3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        DEBUG: Submission URL: https://almascience.eso.org/rh/submission/3f98de33-197e-4692-9afa-496842032ea9 [astroquery.alma.core]
        .DEBUG: Data list URL: https://almascience.eso.org/rh/requests/anonymous/786823226 [astroquery.alma.core]
        """

        if isinstance(uids, six.string_types):
            uids = [uids]
        if not isinstance(uids, (list, tuple, np.ndarray)):
            raise TypeError("Datasets must be given as a list of strings.")

        log.info("Staging files...")

        self._get_dataarchive_url()

        url = urljoin(self.dataarchive_url, 'rh/submission')
        log.debug("First request URL: {0}".format(url))
        #'ALMA+uid___A002_X391d0b_X7b'
        #payload = [('dataset','ALMA+'+clean_uid(uid)) for uid in uids]
        payload = {'dataset': ['ALMA+' + clean_uid(uid) for uid in uids]}
        log.debug("First request payload: {0}".format(payload))

        self._staging_log = {'first_post_url': url}

        # Request staging for the UIDs
        # This component cannot be cached, since the returned data can change
        # if new data are uploaded
        response = self._request('POST',
                                 url,
                                 data=payload,
                                 timeout=self.TIMEOUT,
                                 cache=False)
        self._staging_log['initial_response'] = response
        log.debug("First response URL: {0}".format(response.url))
        response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            time.sleep(1)
            # CANNOT cache this stage: it not a real data page!  results in
            # infinite loops
            response = self._request('POST',
                                     url,
                                     data=payload,
                                     timeout=self.TIMEOUT,
                                     cache=False)
            self._staging_log['initial_response'] = response
            if 'j_spring_cas_security_check' in response.url:
                log.warn("Staging request was not successful.  Try again?")
            response.raise_for_status()

        if 'j_spring_cas_security_check' in response.url:
            raise RemoteServiceError("Could not access data.  This error "
                                     "can arise if the data are private and "
                                     "you do not have access rights or are "
                                     "not logged in.")

        request_id = response.url.split("/")[-2]
        assert len(request_id) == 36
        self._staging_log['request_id'] = request_id
        log.debug("Request ID: {0}".format(request_id))

        # Submit a request for the specific request ID identified above
        submission_url = urljoin(self.dataarchive_url,
                                 os.path.join('rh/submission', request_id))
        log.debug("Submission URL: {0}".format(submission_url))
        self._staging_log['submission_url'] = submission_url
        staging_submission = self._request('GET', submission_url, cache=True)
        self._staging_log['staging_submission'] = staging_submission
        staging_submission.raise_for_status()

        data_page_url = staging_submission.url
        self._staging_log['data_page_url'] = data_page_url
        dpid = data_page_url.split("/")[-1]
        assert len(dpid) == 9
        self._staging_log['staging_page_id'] = dpid

        # CANNOT cache this step: please_wait will happen infinitely
        data_page = self._request('GET', data_page_url, cache=False)
        self._staging_log['data_page'] = data_page
        data_page.raise_for_status()

        has_completed = False
        while not has_completed:
            time.sleep(1)
            summary = self._request('GET',
                                    os.path.join(data_page_url, 'summary'),
                                    cache=False)
            summary.raise_for_status()
            print(".", end='')
            sys.stdout.flush()
            has_completed = summary.json()['complete']

        self._staging_log['summary'] = summary
        summary.raise_for_status()
        self._staging_log['json_data'] = json_data = summary.json()

        username = self._username if hasattr(self,
                                             '_username') else 'anonymous'

        # templates:
        # https://almascience.eso.org/dataPortal/requests/keflavich/946895898/ALMA/
        # 2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar/2013.1.00308.S_uid___A001_X196_X93_001_of_001.tar
        # uid___A002_X9ee74a_X26f0/2013.1.00308.S_uid___A002_X9ee74a_X26f0.asdm.sdm.tar

        url_decomposed = urlparse(data_page_url)
        base_url = ('{uri.scheme}://{uri.netloc}/'
                    'dataPortal/requests/{username}/'
                    '{staging_page_id}/ALMA'.format(
                        uri=url_decomposed,
                        staging_page_id=dpid,
                        username=username,
                    ))
        tbl = self._json_summary_to_table(json_data, base_url=base_url)

        # staging_root = BeautifulSoup(data_page.content)
        # downloadFileURL = staging_root.find('form').attrs['action']
        # data_list_url = os.path.split(downloadFileURL)[0]

        # # Old version, unreliable: data_list_url = staging_submission.url
        # log.debug("Data list URL: {0}".format(data_list_url))
        # self._staging_log['data_list_url'] = data_list_url

        # time.sleep(1)
        # data_list_page = self._request('GET', data_list_url, cache=True)
        # self._staging_log['data_list_page'] = data_list_page
        # data_list_page.raise_for_status()

        # if 'Error' in data_list_page.text:
        #     errormessage = staging_root.find('div', id='errorContent').string.strip()
        #     raise RemoteServiceError(errormessage)

        # tbl = self._parse_staging_request_page(data_list_page)

        return tbl