def _response_hook(self, response, *args, **kwargs): loglevel = log.getEffectiveLevel() if loglevel >= 10: # Log request at DEBUG severity request_hdrs = '\n'.join( f'{k}: {v}' for k, v in response.request.headers.items()) request_log = textwrap.indent( f"-----------------------------------------\n" f"{response.request.method} {response.request.url}\n" f"{request_hdrs}\n" f"\n" f"{response.request.body}\n" f"-----------------------------------------", '\t') log.debug(f"HTTP request\n{request_log}") if loglevel >= 5: # Log response at super-DEBUG severity response_hdrs = '\n'.join(f'{k}: {v}' for k, v in response.headers.items()) if kwargs.get('stream'): response_log = textwrap.indent( f"-----------------------------------------\n" f"{response.status_code} {response.reason} {response.url}\n" f"{response_hdrs}\n" "Streaming Data\n" f"-----------------------------------------", '\t') else: response_log = textwrap.indent( f"-----------------------------------------\n" f"{response.status_code} {response.reason} {response.url}\n" f"{response_hdrs}\n" f"\n" f"{response.text}\n" f"-----------------------------------------", '\t') log.log(5, f"HTTP response\n{response_log}")
def to_cache(response, cache_file): log.debug("Caching data to {0}".format(cache_file)) response = copy.deepcopy(response) if hasattr(response, 'request'): for key in tuple(response.request.hooks.keys()): del response.request.hooks[key] with open(cache_file, "wb") as f: pickle.dump(response, f)
def query_surveys(self, surveys='', cache=True, help=False, open_form=False, **kwargs): """ Query survey Phase 3 data contained in the ESO archive. Parameters ---------- survey : string or list Name of the survey(s) to query. Should beone or more of the names returned by `~astroquery.eso.EsoClass.list_surveys`. If specified as a string, should be a comma-separated list of survey names. cache : bool Cache the response for faster subsequent retrieval Returns ------- table : `~astropy.table.Table` or `None` A table representing the data available in the archive for the specified survey, matching the constraints specified in ``kwargs``. The number of rows returned is capped by the ROW_LIMIT configuration item. `None` is returned when the query has no results. """ url = "http://archive.eso.org/wdb/wdb/adp/phase3_main/form" if open_form: webbrowser.open(url) elif help: self._print_surveys_help(url, cache=cache) else: survey_form = self._request("GET", url, cache=cache) query_dict = kwargs query_dict["wdbo"] = "csv/download" if isinstance(surveys, str): surveys = surveys.split(",") query_dict['collection_name'] = surveys if self.ROW_LIMIT >= 0: query_dict["max_rows_returned"] = int(self.ROW_LIMIT) else: query_dict["max_rows_returned"] = 10000 survey_response = self._activate_form(survey_form, form_index=0, form_id='queryform', inputs=query_dict, cache=cache) content = survey_response.content # First line is always garbage content = content.split(b'\n', 1)[1] log.debug("Response content:\n{0}".format(content)) if _check_response(content): table = Table.read(BytesIO(content), format="ascii.csv", comment="^#") return table else: warnings.warn("Query returned no results", NoResultsWarning)
def _parse_result(self, response, verbose=False): """ Parses the results form the HTTP response to `~astropy.table.Table`. Parameters ---------- response : `requests.Response` The HTTP response object verbose : bool, optional Defaults to `False`. When true it will display warnings whenever the VOtable returned from the Service doesn't conform to the standard. Returns ------- table : `~astropy.table.Table` """ if not verbose: commons.suppress_vo_warnings() content = response.text log.debug(content) # Check if results were returned if 'The catalog is not in the list' in content: raise Exception("Catalogue not found") # Check that object name was not malformed if 'Either wrong or missing coordinate/object name' in content: raise Exception("Malformed coordinate/object name") # Check that the results are not of length zero if len(content) == 0: raise Exception("The LCOGT server sent back an empty reply") # Read it in using the astropy VO table reader try: first_table = votable.parse(six.BytesIO(response.content), pedantic=False).get_first_table() except Exception as ex: self.response = response self.table_parse_error = ex raise TableParseError("Failed to parse LCOGT votable! The raw " " response can be found in self.response," " and the error in self.table_parse_error.") # Convert to astropy.table.Table instance table = first_table.to_table() # Check if table is empty if len(table) == 0: warnings.warn( "Query returned no results, so the table will " "be empty", NoResultsWarning) return table
def from_cache(self, cache_location): request_file = self.request_file(cache_location) try: with open(request_file, "rb") as f: response = pickle.load(f) if not isinstance(response, requests.Response): response = None except IOError: # TODO: change to FileNotFoundError once drop py2 support response = None if response: log.debug("Retrieving data from {0}".format(request_file)) return response
def query_tap(self, query, maxrec=None): """ Send query to the ALMA TAP. Results in pyvo.dal.TapResult format. result.table in Astropy table format Parameters ---------- maxrec : int maximum number of records to return """ log.debug('TAP query: {}'.format(query)) return self.tap.search(query, language='ADQL', maxrec=maxrec)
def _query(self, url, column_filters={}, columns=[], open_form=False, help=False, cache=True, **kwargs): table = None if open_form: webbrowser.open(url) elif help: self._print_query_help(url) else: instrument_form = self._request("GET", url, cache=cache) query_dict = {} query_dict.update(column_filters) # TODO: replace this with individually parsed kwargs query_dict.update(kwargs) query_dict["wdbo"] = "csv/download" # Default to returning the DP.ID since it is needed for header # acquisition query_dict['tab_dp_id'] = kwargs.pop('tab_dp_id', 'on') for k in columns: query_dict["tab_" + k] = True if self.ROW_LIMIT >= 0: query_dict["max_rows_returned"] = int(self.ROW_LIMIT) else: query_dict["max_rows_returned"] = 10000 # used to be form 0, but now there's a new 'logout' form at the top # (form_index = -1 and 0 both work now that form_id is included) instrument_response = self._activate_form(instrument_form, form_index=-1, form_id='queryform', inputs=query_dict, cache=cache) content = instrument_response.content # First line is always garbage content = content.split(b'\n', 1)[1] log.debug("Response content:\n{0}".format(content)) if _check_response(content): table = Table.read(BytesIO(content), format="ascii.csv", comment='^#') return table else: warnings.warn("Query returned no results", NoResultsWarning)
def get_images(self, coordinates, radius, collection=None, get_url_list=False, show_progress=False): """ A coordinate-based query function that returns a list of fits files with cutouts around the passed in coordinates. Parameters ---------- coordinates : str or `astropy.coordinates`. Coordinates around which to query. radius : str or `astropy.units.Quantity` The radius of the cone search AND cutout area. collection : str, optional Name of the CADC collection to query. get_url_list : bool, optional If ``True``, returns the list of data urls rather than the downloaded FITS files. Default is ``False``. show_progress : bool, optional Whether to display a progress bar if the file is downloaded from a remote server. Default is ``False``. Returns ------- list : A list of `~astropy.io.fits.HDUList` objects (or a list of str if returning urls). """ filenames = self.get_images_async(coordinates, radius, collection, get_url_list, show_progress) if get_url_list: return filenames images = [] for fn in filenames: try: images.append(fn.get_fits()) except requests.exceptions.HTTPError as err: # Catch HTTPError if user is unauthorized to access file log.debug("{} - Problem retrieving the file: {}".format( str(err), str(err.url))) pass return images
def _HEADER_data_size(self, files): """ Given a list of file URLs, return the data size. This is useful for assessing how much data you might be downloading! (This is discouraged by the ALMA archive, as it puts unnecessary load on their system) """ totalsize = 0 * u.B data_sizes = {} pb = ProgressBar(len(files)) for index, fileLink in enumerate(files): response = self._request('HEAD', fileLink, stream=False, cache=False, timeout=self.TIMEOUT) filesize = (int(response.headers['content-length']) * u.B).to(u.GB) totalsize += filesize data_sizes[fileLink] = filesize log.debug("File {0}: size {1}".format(fileLink, filesize)) pb.update(index + 1) response.raise_for_status() return data_sizes, totalsize.to(u.GB)
def get_mockreturn(self, method, url, data=None, timeout=10, files=None, params=None, headers=None, **kwargs): log.debug("get_mockreturn url:{} params:{} kwargs:{}".format(url, params, kwargs)) if kwargs and 'auth' in kwargs: auth = kwargs['auth'] if auth and (auth[0] != 'user' or auth[1] != 'password'): log.debug("Rejecting credentials") return create_auth_failure_response() if 'data/async' in str(url): # Responses for an asynchronous SODA job if str(url).endswith('data/async'): self.first_job_pass = True return create_soda_create_response('111-000-111-000') elif str(url).endswith('/phase') and method == 'POST': key = "RUN_JOB" elif str(url).endswith('111-000-111-000') and method == 'GET': key = "RUN_JOB" if self.first_job_pass else "COMPLETED_JOB" self.first_job_pass = False else: raise ValueError("Unexpected SODA async {} call to url {}".format(method, url)) elif 'datalink' in str(url): if 'cube-244' in str(url): key = 'DATALINK' else: key = 'DATALINK_NOACCESS' else: key = params['POS'].split()[0] if params['POS'] else None filename = data_path(DATA_FILES[key]) log.debug('providing ' + filename) content = open(filename, 'rb').read() return MockResponse(content)
def pyregion_subset(region, data, mywcs): """ Return a subset of an image (``data``) given a region. Parameters ---------- region : `~pyregion.Shape` A Shape from a pyregion-parsed region file data : np.ndarray An array with shape described by WCS mywcs : `astropy.wcs.WCS` A world coordinate system describing the data """ import pyregion shapelist = pyregion.ShapeList([region]) if shapelist[0].coord_format not in ('physical', 'image'): celhdr = mywcs.sub([wcs.WCSSUB_CELESTIAL]).to_header() pixel_regions = shapelist.as_imagecoord(celhdr) else: # For this to work, we'd need to change the reference pixel after # cropping. Alternatively, we can just make the full-sized # mask... todo.... raise NotImplementedError("Can't use non-celestial coordinates " "with regions.") pixel_regions = shapelist # This is a hack to use mpl to determine the outer bounds of the regions # (but it's a legit hack - pyregion needs a major internal refactor # before we can approach this any other way, I think -AG) mpl_objs = pixel_regions.get_mpl_patches_texts()[0] # Find the minimal enclosing box containing all of the regions # (this will speed up the mask creation below) extent = mpl_objs[0].get_extents() xlo, ylo = extent.min xhi, yhi = extent.max all_extents = [obj.get_extents() for obj in mpl_objs] for ext in all_extents: xlo = int(np.round(xlo if xlo < ext.min[0] else ext.min[0])) ylo = int(np.round(ylo if ylo < ext.min[1] else ext.min[1])) xhi = int(np.round(xhi if xhi > ext.max[0] else ext.max[0])) yhi = int(np.round(yhi if yhi > ext.max[1] else ext.max[1])) log.debug("Region boundaries: ") log.debug("xlo={xlo}, ylo={ylo}, xhi={xhi}, yhi={yhi}".format(xlo=xlo, ylo=ylo, xhi=xhi, yhi=yhi)) subwcs = mywcs[int(ylo):int(yhi), int(xlo):int(xhi)] subhdr = subwcs.sub([wcs.WCSSUB_CELESTIAL]).to_header() subdata = data[int(ylo):int(yhi), int(xlo):int(xhi)] mask = shapelist.get_mask(header=subhdr, shape=subdata.shape) log.debug("Shapes: data={0}, subdata={2}, mask={1}".format( data.shape, mask.shape, subdata.shape)) return (xlo, xhi, ylo, yhi), mask
def _response_hook(self, response, *args, **kwargs): # Log request at INFO severity request_hdrs = '\n'.join(f'{k}: {v}' for k, v in response.request.headers.items()) request_log = textwrap.indent( f"-----------------------------------------\n" f"{response.request.method} {response.request.url}\n" f"{request_hdrs}\n" f"\n" f"{response.request.body}\n" f"-----------------------------------------", '\t') log.debug(f"HTTP request\n{request_log}") # Log response at DEBUG severity response_hdrs = '\n'.join(f'{k}: {v}' for k, v in response.headers.items()) response_log = textwrap.indent( f"-----------------------------------------\n" f"{response.status_code} {response.reason} {response.url}\n" f"{response_hdrs}\n" f"\n" f"{response.text}\n" f"-----------------------------------------", '\t') log.log(5, f"HTTP response\n{response_log}")
def get_mockreturn(self, method, url, data=None, timeout=10, files=None, params=None, headers=None, **kwargs): log.debug("get_mockreturn url:{} params:{} kwargs:{}".format( url, params, kwargs)) if kwargs and 'auth' in kwargs: auth = kwargs['auth'] if auth and (auth[0] != USERNAME or auth[1] != PASSWORD): log.debug("Rejecting credentials") return create_auth_failure_response() if 'data/async' in str(url): # Responses for an asynchronous SODA job if str(url).endswith('data/async'): self.first_job_pass = True self.completed_job_key = "COMPLETED_JOB" return create_soda_create_response('111-000-111-000') elif str(url).endswith('/phase') and method == 'POST': key = "RUN_JOB" elif str(url).endswith( '111-000-111-000/parameters') and method == 'POST': assert "POS" in data print(data['POS']) pos_parts = data['POS'].split(' ') assert len(pos_parts) == 4 self.completed_job_key = 'cutout_{}_{:.4f}_{:.4f}_{:.4f}'.format( pos_parts[0], float(pos_parts[1]), float(pos_parts[2]), float(pos_parts[3])) return create_soda_create_response('111-000-111-000') elif str(url).endswith('111-000-111-000') and method == 'GET': key = "RUN_JOB" if self.first_job_pass else self.completed_job_key self.first_job_pass = False else: raise ValueError("Unexpected SODA async {} call to url {}".format( method, url)) elif 'datalink' in str(url): if 'cube-244' in str(url): key = 'DATALINK' else: key = 'DATALINK_NOACCESS' elif str( url) == 'https://data.csiro.au/casda_vo_proxy/vo/tap/availability': key = 'AVAILABILITY' else: key = params['POS'].split()[0] if params['POS'] else None filename = data_path(DATA_FILES[key]) log.debug('providing ' + filename) content = open(filename, 'rb').read() return MockResponse(content)
def pyregion_subset(region, data, mywcs): """ Return a subset of an image (`data`) given a region. """ shapelist = pyregion.ShapeList([region]) if shapelist[0].coord_format not in ('physical', 'image'): # Requires astropy >0.4... # pixel_regions = shapelist.as_imagecoord(self.wcs.celestial.to_header()) # convert the regions to image (pixel) coordinates celhdr = mywcs.sub([wcs.WCSSUB_CELESTIAL]).to_header() pixel_regions = shapelist.as_imagecoord(celhdr) else: # For this to work, we'd need to change the reference pixel after cropping. # Alternatively, we can just make the full-sized mask... todo.... raise NotImplementedError( "Can't use non-celestial coordinates with regions.") pixel_regions = shapelist # This is a hack to use mpl to determine the outer bounds of the regions # (but it's a legit hack - pyregion needs a major internal refactor # before we can approach this any other way, I think -AG) mpl_objs = pixel_regions.get_mpl_patches_texts()[0] # Find the minimal enclosing box containing all of the regions # (this will speed up the mask creation below) extent = mpl_objs[0].get_extents() xlo, ylo = extent.min xhi, yhi = extent.max all_extents = [obj.get_extents() for obj in mpl_objs] for ext in all_extents: xlo = int(xlo if xlo < ext.min[0] else ext.min[0]) ylo = int(ylo if ylo < ext.min[1] else ext.min[1]) xhi = int(xhi if xhi > ext.max[0] else ext.max[0]) yhi = int(yhi if yhi > ext.max[1] else ext.max[1]) log.debug("Region boundaries: ") log.debug("xlo={xlo}, ylo={ylo}, xhi={xhi}, yhi={yhi}".format(xlo=xlo, ylo=ylo, xhi=xhi, yhi=yhi)) subwcs = mywcs[ylo:yhi, xlo:xhi] subhdr = subwcs.sub([wcs.WCSSUB_CELESTIAL]).to_header() subdata = data[ylo:yhi, xlo:xhi] mask = shapelist.get_mask(header=subhdr, shape=subdata.shape) log.debug("Shapes: data={0}, subdata={2}, mask={1}".format( data.shape, mask.shape, subdata.shape)) return (xlo, xhi, ylo, yhi), mask
def _parse_kwargs(self, min_frequency=None, max_frequency=None, band='any', top20=None, chemical_name=None, chem_re_flags=0, energy_min=None, energy_max=None, energy_type=None, intensity_lower_limit=None, intensity_type=None, transition=None, version=None, exclude=None, only_astronomically_observed=None, only_NRAO_recommended=None, line_lists=None, line_strengths=None, energy_levels=None, export=None, export_limit=None, noHFS=None, displayHFS=None, show_unres_qn=None, show_upper_degeneracy=None, show_molecule_tag=None, show_qn_code=None, show_lovas_labref=None, show_lovas_obsref=None, show_orderedfreq_only=None, show_nrao_recommended=None, parse_chemistry_locally=True): """ The Splatalogue service returns lines with rest frequencies in the range [min_frequency, max_frequency]. Parameters ---------- min_frequency : `astropy.units` Minimum frequency (or any spectral() equivalent) max_frequency : `astropy.units` Maximum frequency (or any spectral() equivalent) band : str The observing band. If it is not 'any', it overrides minfreq/maxfreq. top20: str One of ``'comet'``, ``'planet'``, ``'top20'``, ``'ism_hotcore'``, ``'ism_darkcloud'``, ``'ism_diffusecloud'``. Overrides chemical_name chemical_name : str Name of the chemical to search for. Treated as a regular expression. An empty set ('', (), [], {}) will match *any* species. Examples: ``'H2CO'`` - 13 species have H2CO somewhere in their formula. ``'Formaldehyde'`` - There are 8 isotopologues of Formaldehyde (e.g., H213CO). ``'formaldehyde'`` - Thioformaldehyde,Cyanoformaldehyde. ``'formaldehyde',chem_re_flags=re.I`` - Formaldehyde,thioformaldehyde, and Cyanoformaldehyde. ``' H2CO '`` - Just 1 species, H2CO. The spaces prevent including others. parse_chemistry_locally : bool Attempt to determine the species ID #'s locally before sending the query? This will prevent queries that have no matching species. It also performs a more flexible regular expression match to the species IDs. See the examples in `get_species_ids` chem_re_flags : int See the `re` module energy_min : `None` or float Energy range to include. See energy_type energy_max : `None` or float Energy range to include. See energy_type energy_type : ``'el_cm1'``, ``'eu_cm1'``, ``'eu_k'``, ``'el_k'`` Type of energy to restrict. L/U for lower/upper state energy, cm/K for *inverse* cm, i.e. wavenumber, or K for Kelvin intensity_lower_limit : `None` or float Lower limit on the intensity. See intensity_type intensity_type : `None` or ``'sij'``, ``'cdms_jpl'``, ``'aij'`` The type of intensity on which to place a lower limit transition : str e.g. 1-0 version : ``'v1.0'``, ``'v2.0'``, ``'v3.0'`` or ``'vall'`` Data version exclude : list Types of lines to exclude. Default is: (``'potential'``, ``'atmospheric'``, ``'probable'``) Can also exclude ``'known'``. To exclude nothing, use 'none', not the python object None, since the latter is meant to indicate 'leave as default' only_astronomically_observed : bool Show only astronomically observed species? only_NRAO_recommended : bool Show only NRAO recommended species? line_lists : list Options: Lovas, SLAIM, JPL, CDMS, ToyoMA, OSU, Recomb, Lisa, RFI line_strengths : list * CDMS/JPL Intensity : ls1 * Sij : ls3 * Aij : ls4 * Lovas/AST : ls5 energy_levels : list * E_lower (cm^-1) : el1 * E_lower (K) : el2 * E_upper (cm^-1) : el3 * E_upper (K) : el4 export : bool Set up arguments for the export server (as opposed to the HTML server)? export_limit : int Maximum number of lines in output file noHFS : bool No HFS Display displayHFS : bool Display HFS Intensity show_unres_qn : bool Display Unresolved Quantum Numbers show_upper_degeneracy : bool Display Upper State Degeneracy show_molecule_tag : bool Display Molecule Tag show_qn_code : bool Display Quantum Number Code show_lovas_labref : bool Display Lab Ref show_lovas_obsref : bool Display Obs Ref show_orderedfreq_only : bool Display Ordered Frequency ONLY show_nrao_recommended : bool Display NRAO Recommended Frequencies Returns ------- payload : dict Dictionary of the parameters to send to the SPLAT page """ payload = {'submit': 'Search', 'frequency_units': 'GHz', } if band != 'any': if band not in self.FREQUENCY_BANDS: raise ValueError("Invalid frequency band.") if min_frequency is not None or max_frequency is not None: warnings.warn("Band was specified, so the frequency " "specification is overridden") payload['band'] = band elif min_frequency is not None and max_frequency is not None: # allow setting payload without having *ANY* valid frequencies set min_frequency = min_frequency.to(u.GHz, u.spectral()) max_frequency = max_frequency.to(u.GHz, u.spectral()) if min_frequency > max_frequency: min_frequency, max_frequency = max_frequency, min_frequency payload['from'] = min_frequency.value payload['to'] = max_frequency.value if top20 is not None: if top20 in self.TOP20_LIST: payload['top20'] = top20 else: raise ValueError("Top20 is not one of the allowed values") elif chemical_name in ('', {}, (), [], set()): # include all payload['sid[]'] = [] elif chemical_name is not None: if parse_chemistry_locally: species_ids = self.get_species_ids(chemical_name, chem_re_flags) if len(species_ids) == 0: raise ValueError("No matching chemical species found.") payload['sid[]'] = list(species_ids.values()) else: payload['chemical_name'] = chemical_name if energy_min is not None: payload['energy_range_from'] = float(energy_min) if energy_max is not None: payload['energy_range_to'] = float(energy_max) if energy_type is not None: validate_energy_type(energy_type) payload['energy_range_type'] = energy_type if intensity_type is not None: payload['lill'] = 'lill_' + intensity_type if intensity_lower_limit is not None: payload[payload['lill']] = intensity_lower_limit if transition is not None: payload['tran'] = transition if version in self.versions: payload['data_version'] = version elif version is not None: raise ValueError("Invalid version specified. Allowed versions " "are {vers}".format(vers=str(self.versions))) if exclude == 'none': for e in ('potential', 'atmospheric', 'probable', 'known'): # Setting a keyword value to 'None' removes it (see query_lines_async) log.debug("Setting no_{0} to None".format(e)) payload['no_' + e] = None elif exclude is not None: for e in exclude: payload['no_' + e] = 'no_' + e if only_astronomically_observed: payload['include_only_observed'] = 'include_only_observed' if only_NRAO_recommended: payload['include_only_nrao'] = 'include_only_nrao' if line_lists is not None: if type(line_lists) not in (tuple, list): raise TypeError("Line lists should be a list of linelist " "names. See Splatalogue.ALL_LINE_LISTS") for L in self.ALL_LINE_LISTS: kwd = 'display' + L if L in line_lists: payload[kwd] = kwd else: payload[kwd] = '' if line_strengths is not None: for LS in line_strengths: payload[LS] = LS if energy_levels is not None: for EL in energy_levels: payload[EL] = EL for b in ("noHFS", "displayHFS", "show_unres_qn", "show_upper_degeneracy", "show_molecule_tag", "show_qn_code", "show_lovas_labref", "show_orderedfreq_only", "show_lovas_obsref", "show_nrao_recommended"): if locals()[b]: payload[b] = b # default arg, unmodifiable... payload['jsMath'] = 'font:symbol,warn:0' payload['__utma'] = '' payload['__utmc'] = '' if export: payload['submit'] = 'Export' payload['export_delimiter'] = 'colon' # or tab or comma payload['export_type'] = 'current' payload['offset'] = 0 payload['range'] = 'on' if export_limit is not None: payload['limit'] = export_limit else: payload['limit'] = self.LINES_LIMIT return payload
def make_finder_chart_from_image_and_catalog( image, catalog, save_prefix, alma_kwargs={ 'public': False, 'science': False }, bands=(3, 4, 5, 6, 7, 8, 9, 10), private_band_colors=( 'maroon', 'red', 'orange', 'coral', 'brown', 'yellow', 'mediumorchid', 'palegoldenrod', ), public_band_colors=( 'blue', 'cyan', 'green', 'turquoise', 'teal', 'darkslategrey', 'chartreuse', 'lime', ), integration_time_contour_levels=np.logspace(0, 5, base=2, num=6), save_masks=False, use_saved_masks=False, linewidth=1, ): """ Create a "finder chart" showing where ALMA has pointed in various bands, including different color coding for public/private data and each band. Contours are set at various integration times. Parameters ---------- image : fits.PrimaryHDU or fits.ImageHDU object The image to overlay onto catalog : astropy.Table object The catalog of ALMA observations save_prefix : str The prefix for the output files. Both .reg and .png files will be written. The .reg files will have the band numbers and public/private appended, while the .png file will be named prefix_almafinderchart.png alma_kwargs : dict Keywords to pass to the ALMA archive when querying. private_band_colors / public_band_colors : tuple A tuple or list of colors to be associated with private/public observations in the various bands integration_time_contour_levels : list or np.array The levels at which to draw contours in units of seconds. Default is log-spaced (2^n) seconds: [ 1., 2., 4., 8., 16., 32.]) """ import aplpy import pyregion all_bands = bands bands = used_bands = [band for band in np.unique(catalog['band_list'])] log.info("The bands used include: {0}".format(used_bands)) band_colors_priv = dict(zip(all_bands, private_band_colors)) band_colors_pub = dict(zip(all_bands, public_band_colors)) log.info("Color map private: {0}".format(band_colors_priv)) log.info("Color map public: {0}".format(band_colors_pub)) if use_saved_masks: hit_mask_public = {} hit_mask_private = {} for band in bands: pubfile = '{0}_band{1}_public.fits'.format(save_prefix, band) if os.path.exists(pubfile): hit_mask_public[band] = fits.getdata(pubfile) privfile = '{0}_band{1}_private.fits'.format(save_prefix, band) if os.path.exists(privfile): hit_mask_private[band] = fits.getdata(privfile) else: today = np.datetime64('today') # At least temporarily obsolete # private_circle_parameters = { # band: [(row['RA'], row['Dec'], np.mean(rad).to(u.deg).value) # for row, rad in zip(catalog, primary_beam_radii) # if not row['Release date'] or # (np.datetime64(row['Release date']) > today and row['Band'] == band)] # for band in bands} # public_circle_parameters = { # band: [(row['RA'], row['Dec'], np.mean(rad).to(u.deg).value) # for row, rad in zip(catalog, primary_beam_radii) # if row['Release date'] and # (np.datetime64(row['Release date']) <= today and row['Band'] == band)] # for band in bands} # unique_private_circle_parameters = { # band: np.array(list(set(private_circle_parameters[band]))) # for band in bands} # unique_public_circle_parameters = { # band: np.array(list(set(public_circle_parameters[band]))) # for band in bands} release_dates = np.array(catalog['obs_release_date'], dtype=np.datetime64) for band in bands: log.info("BAND {0}".format(band)) privrows = sum((catalog['band_list'] == band) & (release_dates > today)) pubrows = sum((catalog['band_list'] == band) & (release_dates <= today)) log.info("PUBLIC: Number of rows: {0}".format(pubrows, )) log.info("PRIVATE: Number of rows: {0}.".format(privrows)) log.debug('Creating regions') prv_regions = { band: pyregion.ShapeList([ add_meta_to_reg(fp, {'integration': row['t_exptime']}) for row in catalog for fp in footprint_to_reg(row['s_region']) if (not row['obs_release_date']) or ( np.datetime64(row['obs_release_date']) > today and row['band_list'] == band) ]) for band in bands } pub_regions = { band: pyregion.ShapeList([ add_meta_to_reg(fp, {'integration': row['t_exptime']}) for row in catalog for fp in footprint_to_reg(row['s_region']) if row['obs_release_date'] and ( np.datetime64(row['obs_release_date']) <= today and row['band_list'] == band) ]) for band in bands } log.debug('Creating masks') prv_mask = { band: fits.PrimaryHDU(prv_regions[band].get_mask(image).astype('int'), header=image.header) for band in bands if prv_regions[band] } pub_mask = { band: fits.PrimaryHDU(pub_regions[band].get_mask(image).astype('int'), header=image.header) for band in bands if pub_regions[band] } hit_mask_public = { band: np.zeros_like(image.data) for band in pub_mask } hit_mask_private = { band: np.zeros_like(image.data) for band in prv_mask } mywcs = wcs.WCS(image.header) for band in bands: log.debug( 'Adding integration-scaled masks for Band: {0}'.format(band)) shapes = prv_regions[band] for shape in shapes: # private: release_date = 'sometime' says when it will be released (xlo, xhi, ylo, yhi), mask = pyregion_subset(shape, hit_mask_private[band], mywcs) log.debug("{0},{1},{2},{3}: {4}".format( xlo, xhi, ylo, yhi, mask.sum())) hit_mask_private[band][ ylo:yhi, xlo:xhi] += shape.meta['integration'] * mask if save_masks: shapes.write('{0}_band{1}_private.reg'.format( save_prefix, band)) shapes = pub_regions[band] for shape in shapes: # public: release_date = '' should mean already released (xlo, xhi, ylo, yhi), mask = pyregion_subset(shape, hit_mask_public[band], mywcs) log.debug("{0},{1},{2},{3}: {4}".format( xlo, xhi, ylo, yhi, mask.sum())) hit_mask_public[band][ ylo:yhi, xlo:xhi] += shape.meta['integration'] * mask if save_masks: shapes.write('{0}_band{1}_public.reg'.format( save_prefix, band)) if save_masks: for band in bands: if band in hit_mask_public: if hit_mask_public[band].any(): hdu = fits.PrimaryHDU(data=hit_mask_public[band], header=image.header) hdu.writeto('{0}_band{1}_public.fits'.format( save_prefix, band), clobber=True) if band in hit_mask_private: if hit_mask_private[band].any(): hdu = fits.PrimaryHDU(data=hit_mask_private[band], header=image.header) hdu.writeto('{0}_band{1}_private.fits'.format( save_prefix, band), clobber=True) fig = aplpy.FITSFigure(fits.HDUList(image), convention='calabretta') fig.show_grayscale(stretch='arcsinh', vmid=np.nanmedian(image.data)) for band in bands: if band in hit_mask_public: if hit_mask_public[band].any(): fig.show_contour(fits.PrimaryHDU(data=hit_mask_public[band], header=image.header), levels=integration_time_contour_levels, colors=[band_colors_pub[int(band)]] * len(integration_time_contour_levels), linewidth=linewidth, convention='calabretta') if band in hit_mask_private: if hit_mask_private[band].any(): fig.show_contour(fits.PrimaryHDU(data=hit_mask_private[band], header=image.header), levels=integration_time_contour_levels, colors=[band_colors_priv[int(band)]] * len(integration_time_contour_levels), linewidth=linewidth, convention='calabretta') fig.save('{0}_almafinderchart.png'.format(save_prefix)) return image, catalog, hit_mask_public, hit_mask_private
def to_cache(response, cache_file): log.debug("Caching data to {0}".format(cache_file)) with open(cache_file, "wb") as f: pickle.dump(response, f)
def parse_lamda_lines(data): """ Extract a LAMDA datafile into a dictionary of tables (non-pythonic! more like, fortranic) """ meta_rad = {} meta_mol = {} meta_coll = {} levels = [] radtrans = [] collider = None ncolltrans = None for ii, line in enumerate(data): if line[0] == '!': continue if 'molecule' not in meta_mol: meta_mol['molecule'] = _cln(line) continue if 'molwt' not in meta_mol: meta_mol['molwt'] = float(_cln(line)) continue if 'nenergylevels' not in meta_mol: meta_mol['nenergylevels'] = int(_cln(line)) continue if len(levels) < meta_mol['nenergylevels']: lev, en, wt = _cln(line).split()[:3] jul = " ".join(_cln(line).split()[3:]) levels.append([int(lev), float(en), int(float(wt)), jul]) continue if 'radtrans' not in meta_rad: meta_rad['radtrans'] = int(_cln(line)) continue if len(radtrans) < meta_rad['radtrans']: # Can have wavenumber at the end. Ignore that. trans, up, low, aval, freq, eu = _cln(line).split()[:6] radtrans.append([ int(trans), int(up), int(low), float(aval), float(freq), float(eu) ]) continue if 'ncoll' not in meta_coll: meta_coll['ncoll'] = int(_cln(line)) collrates = {} continue if collider is None: collider = int(line[0]) collname = collider_ids[collider] collrates[collider] = [] meta_coll[collname] = { 'collider': collname, 'collider_id': collider } continue if ncolltrans is None: ncolltrans = int(_cln(line)) meta_coll[collname]['ntrans'] = ncolltrans continue if 'ntemp' not in meta_coll[collname]: meta_coll[collname]['ntemp'] = int(_cln(line)) continue if 'temperatures' not in meta_coll[collname]: meta_coll[collname]['temperatures'] = [ int(float(x)) for x in _cln(line).split() ] continue if len(collrates[collider]) < meta_coll[collname]['ntrans']: trans, up, low = [int(x) for x in _cln(line).split()[:3]] temperatures = [float(x) for x in _cln(line).split()[3:]] collrates[collider].append([trans, up, low] + temperatures) if len(collrates[collider]) == meta_coll[collname]['ntrans']: # meta_coll[collider_ids[collider]+'_collrates'] = collrates log.debug("{ii} Finished loading collider {0:d}: " "{1}".format(collider, collider_ids[collider], ii=ii)) collider = None ncolltrans = None if len(collrates) == meta_coll['ncoll']: # All done! break if len(levels[0]) == 4: mol_table_names = ['Level', 'Energy', 'Weight', 'J'] elif len(levels[0]) == 5: mol_table_names = ['Level', 'Energy', 'Weight', 'J', 'F'] else: raise ValueError("Unrecognized levels structure.") mol_table_columns = [ table.Column(name=name, data=data) for name, data in zip(mol_table_names, zip(*levels)) ] mol_table = table.Table(data=mol_table_columns, meta=meta_mol) rad_table_names = [ 'Transition', 'Upper', 'Lower', 'EinsteinA', 'Frequency', 'E_u(K)' ] rad_table_columns = [ table.Column(name=name, data=data) for name, data in zip(rad_table_names, zip(*radtrans)) ] rad_table = table.Table(data=rad_table_columns, meta=meta_rad) coll_tables = {collider_ids[collider]: None for collider in collrates} for collider in collrates: collname = collider_ids[collider] coll_table_names = (['Transition', 'Upper', 'Lower'] + [ 'C_ij(T={0:d})'.format(tem) for tem in meta_coll[collname]["temperatures"] ]) coll_table_columns = [ table.Column(name=name, data=data) for name, data in zip(coll_table_names, zip(*collrates[collider])) ] coll_table = table.Table(data=coll_table_columns, meta=meta_coll[collname]) coll_tables[collname] = coll_table return coll_tables, rad_table, mol_table
def load_data(self, ids, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL", valid_data=True, band=None, avoid_datatype_check=False, format="votable", output_file=None, overwrite_output_file=False, verbose=False): """Loads the specified table TAP+ only Parameters ---------- ids : str list, mandatory list of identifiers data_release: str, optional, default None data release from which data should be taken. E.g. 'Gaia DR2' By default, it takes the current default one. data_structure: str, optional, default 'INDIVIDUAL' it can be 'INDIVIDUAL', 'COMBINED', 'RAW': 'INDIVIDUAL' means... 'COMBINED' means... 'RAW' means... retrieval_type : str, optional, default 'ALL' retrieval type identifier. It can be either 'epoch_photometry' for compatibility reasons or 'ALL' to retrieve all data from the list of sources. valid_data : bool, optional, default True By default, the epoch photometry service returns only valid data, that is, all data rows where flux is not null and rejected_by_photometry flag is not true. In order to retrieve all data associated to a given source without this filter, this request parameter should be included (valid_data=False) band : str, optional, default None, valid values: G, BP, RP By default, the epoch photometry service returns all the available photometry bands for the requested source. This parameter allows to filter the output lightcurve by its band. avoid_datatype_check: boolean, optional, default False. By default, this value will be set to False. If it is set to 'true' the Datalink items tags will not be checked. format : str, optional, default 'votable' loading format output_file : string, optional, default None file where the results are saved. If it is not provided, the http response contents are returned. overwrite_output_file : boolean, optional, default False To overwrite the output_file if it already exists. verbose : bool, optional, default 'False' flag to display information about the process Returns ------- A table object """ if retrieval_type is None: raise ValueError("Missing mandatory argument 'retrieval_type'") now = datetime.now() now_formatted = now.strftime("%Y%m%d_%H%M%S") temp_dirname = "temp_" + now_formatted downloadname_formated = "download_" + now_formatted output_file_specified = False if output_file is None: output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated) else: output_file_specified = True output_file = os.path.abspath(output_file) if not overwrite_output_file and os.path.exists(output_file): raise ValueError(f"{output_file} file already exists. Please use overwrite_output_file='False' to " f"overwrite output file.") path = os.path.dirname(output_file) if ids is None: raise ValueError("Missing mandatory argument 'ids'") if avoid_datatype_check is False: # we need to check params rt = str(retrieval_type).upper() if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES: raise ValueError(f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, " f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}") params_dict = {} if not valid_data or str(retrieval_type) == 'ALL': params_dict['VALID_DATA'] = "false" elif valid_data: params_dict['VALID_DATA'] = "true" if band is not None: if band != 'G' and band != 'BP' and band != 'RP': raise ValueError("Invalid band value '%s' (Valid values: " + "'G', 'BP' and 'RP)" % band) else: params_dict['BAND'] = band if isinstance(ids, str): ids_arg = ids else: if isinstance(ids, int): ids_arg = str(ids) else: ids_arg = ','.join(str(item) for item in ids) params_dict['ID'] = ids_arg if data_release is not None: params_dict['RELEASE'] = data_release params_dict['DATA_STRUCTURE'] = data_structure params_dict['FORMAT'] = str(format) params_dict['RETRIEVAL_TYPE'] = str(retrieval_type) params_dict['USE_ZIP_ALWAYS'] = 'true' if path != '': try: os.mkdir(path) except FileExistsError: log.error("Path %s already exist" % path) except OSError: log.error("Creation of the directory %s failed" % path) try: self.__gaiadata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose) files = Gaia.__get_data_files(output_file=output_file, path=path) except Exception as err: raise err finally: if not output_file_specified: shutil.rmtree(path) if verbose: if output_file_specified: log.info("output_file = %s" % output_file) log.debug("List of products available:") # for key, value in files.items(): # print("Product =", key) items = [key for key in files.keys()] items.sort() for item in items: # print(f'* {item}') if verbose: log.debug("Product = " + item) return files
def get_access_url(service, capability=None): """ Returns the URL corresponding to a service by doing a lookup in the cadc registry. It returns the access URL corresponding to cookie authentication. :param service: the service the capability belongs to. It can be identified by a CADC uri ('ivo://cadc.nrc.ca/) which is looked up in the CADC registry or by the URL where the service capabilities is found. :param capability: uri representing the capability for which the access url is sought :return: the access url Note ------ This function implements the functionality of a CADC registry as defined by the IVOA. It should be eventually moved to its own directory. Caching should be considered to reduce the number of remote calls to CADC registry """ caps_url = '' if service.startswith('http'): if not capability: return service caps_url = service else: # get caps from the CADC registry if not get_access_url.caps: try: response = requests.get(conf.CADC_REGISTRY_URL) response.raise_for_status() except requests.exceptions.HTTPError as err: log.debug("ERROR getting the CADC registry: {}".format( str(err))) raise err for line in response.text.splitlines(): if len(line) > 0 and not line.startswith('#'): service_id, capabilies_url = line.split('=') get_access_url.caps[service_id.strip()] = \ capabilies_url.strip() # lookup the service service_uri = service if not service.startswith('ivo'): # assume short form of CADC service service_uri = 'ivo://cadc.nrc.ca/{}'.format(service) if service_uri not in get_access_url.caps: raise AttributeError( "Cannot find the capabilities of service {}".format(service)) # look up in the CADC reg for the service capabilities caps_url = get_access_url.caps[service_uri] if not capability: return caps_url try: response2 = requests.get(caps_url) response2.raise_for_status() except Exception as e: log.debug("ERROR getting the service capabilities: {}".format(str(e))) raise e soup = BeautifulSoup(response2.text, features="html5lib") for cap in soup.find_all('capability'): if cap.get("standardid", None) == capability: if len(cap.find_all('interface')) == 1: return cap.find_all('interface')[0].accessurl.text for i in cap.find_all('interface'): if hasattr(i, 'securitymethod'): sm = i.securitymethod if not sm or sm.get("standardid", None) is None or\ sm['standardid'] == "ivo://ivoa.net/sso#cookie": return i.accessurl.text raise RuntimeError("ERROR - capability {} not found or not working with " "anonymous or cookie access".format(capability))
def _activate_form(self, response, form_index=0, form_id=None, inputs={}, cache=True, method=None): """ Parameters ---------- method: None or str Can be used to override the form-specified method """ # Extract form from response root = BeautifulSoup(response.content, 'html5lib') if form_id is None: form = root.find_all('form')[form_index] else: form = root.find_all('form', id=form_id)[form_index] # Construct base url form_action = form.get('action') if "://" in form_action: url = form_action elif form_action.startswith('/'): url = '/'.join(response.url.split('/', 3)[:3]) + form_action else: url = response.url.rsplit('/', 1)[0] + '/' + form_action # Identify payload format fmt = None form_method = form.get('method').lower() if form_method == 'get': fmt = 'get' # get(url, params=payload) elif form_method == 'post': if 'enctype' in form.attrs: if form.attrs['enctype'] == 'multipart/form-data': fmt = 'multipart/form-data' # post(url, files=payload) elif form.attrs['enctype'] == 'application/x-www-form-urlencoded': fmt = 'application/x-www-form-urlencoded' # post(url, data=payload) else: raise Exception("enctype={0} is not supported!".format(form.attrs['enctype'])) else: fmt = 'application/x-www-form-urlencoded' # post(url, data=payload) # Extract payload from form payload = [] for form_elem in form.find_all(['input', 'select', 'textarea']): value = None is_file = False tag_name = form_elem.name key = form_elem.get('name') if tag_name == 'input': is_file = (form_elem.get('type') == 'file') value = form_elem.get('value') if form_elem.get('type') in ['checkbox', 'radio']: if form_elem.has_attr('checked'): if not value: value = 'on' else: value = None elif tag_name == 'select': if form_elem.get('multiple') is not None: value = [] if form_elem.select('option[value]'): for option in form_elem.select('option[value]'): if option.get('selected') is not None: value.append(option.get('value')) else: for option in form_elem.select('option'): if option.get('selected') is not None: # bs4 NavigableString types have bad, # undesirable properties that result # in recursion errors when caching value.append(str(option.string)) else: if form_elem.select('option[value]'): for option in form_elem.select('option[value]'): if option.get('selected') is not None: value = option.get('value') # select the first option field if none is selected if value is None: value = form_elem.select( 'option[value]')[0].get('value') else: # survey form just uses text, not value for option in form_elem.select('option'): if option.get('selected') is not None: value = str(option.string) # select the first option field if none is selected if value is None: value = str(form_elem.select('option')[0].string) if key in inputs: if isinstance(inputs[key], list): # list input is accepted (for array uploads) value = inputs[key] else: value = str(inputs[key]) if (key is not None): # and (value is not None): if fmt == 'multipart/form-data': if is_file: payload.append( (key, ('', '', 'application/octet-stream'))) else: if type(value) is list: for v in value: entry = (key, ('', v)) # Prevent redundant key, value pairs # (can happen if the form repeats them) if entry not in payload: payload.append(entry) elif value is None: entry = (key, ('', '')) if entry not in payload: payload.append(entry) else: entry = (key, ('', value)) if entry not in payload: payload.append(entry) else: if type(value) is list: for v in value: entry = (key, v) if entry not in payload: payload.append(entry) else: entry = (key, value) if entry not in payload: payload.append(entry) # for future debugging self._payload = payload log.debug("Form: payload={0}".format(payload)) if method is not None: fmt = method log.debug("Method/format = {0}".format(fmt)) # Send payload if fmt == 'get': response = self._request("GET", url, params=payload, cache=cache) elif fmt == 'multipart/form-data': response = self._request("POST", url, files=payload, cache=cache) elif fmt == 'application/x-www-form-urlencoded': response = self._request("POST", url, data=payload, cache=cache) return response
def _download_file(self, url, local_filepath, timeout=None, auth=None, continuation=True, cache=False, method="GET", head_safe=False, **kwargs): """ Download a file. Resembles `astropy.utils.data.download_file` but uses the local ``_session`` Parameters ---------- url : string local_filepath : string timeout : int auth : dict or None continuation : bool If the file has already been partially downloaded *and* the server supports HTTP "range" requests, the download will be continued where it left off. cache : bool method : "GET" or "POST" head_safe : bool """ if head_safe: response = self._session.request("HEAD", url, timeout=timeout, stream=True, auth=auth, **kwargs) else: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() if 'content-length' in response.headers: length = int(response.headers['content-length']) if length == 0: log.warn('URL {0} has length=0'.format(url)) else: length = None if ((os.path.exists(local_filepath) and ('Accept-Ranges' in response.headers) and continuation)): open_mode = 'ab' existing_file_length = os.stat(local_filepath).st_size if length is not None and existing_file_length >= length: # all done! log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, existing_file_length)) return elif existing_file_length == 0: open_mode = 'wb' else: log.info("Continuing download of file {0}, with {1} bytes to " "go ({2}%)".format( local_filepath, length - existing_file_length, (length - existing_file_length) / length * 100)) # bytes are indexed from 0: # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header end = "{0}".format(length - 1) if length is not None else "" self._session.headers['Range'] = "bytes={0}-{1}".format( existing_file_length, end) response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() del self._session.headers['Range'] elif cache and os.path.exists(local_filepath): if length is not None: statinfo = os.stat(local_filepath) if statinfo.st_size != length: log.warning("Found cached file {0} with size {1} that is " "different from expected size {2}".format( local_filepath, statinfo.st_size, length)) open_mode = 'wb' else: log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, statinfo.st_size)) response.close() return else: log.info("Found cached file {0}.".format(local_filepath)) response.close() return else: open_mode = 'wb' if head_safe: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() blocksize = astropy.utils.data.conf.download_block_size log.debug( f"Downloading URL {url} to {local_filepath} with size {length} " f"by blocks of {blocksize}") bytes_read = 0 # Only show progress bar if logging level is INFO or lower. if log.getEffectiveLevel() <= 20: progress_stream = None # Astropy default else: progress_stream = io.StringIO() with ProgressBarOrSpinner( length, ('Downloading URL {0} to {1} ...'.format(url, local_filepath)), file=progress_stream) as pb: with open(local_filepath, open_mode) as f: for block in response.iter_content(blocksize): f.write(block) bytes_read += blocksize if length is not None: pb.update( bytes_read if bytes_read <= length else length) else: pb.update(bytes_read) response.close() return response
def load_data(self, ids, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL", valid_data=False, band=None, avoid_datatype_check=False, format="votable", output_file=None, overwrite_output_file=False, verbose=False): """Loads the specified table TAP+ only Parameters ---------- ids : str list, mandatory list of identifiers data_release: str, optional, default None data release from which data should be taken. E.g. 'Gaia DR2' By default, it takes the current default one. data_structure: str, optional, default 'INDIVIDUAL' it can be 'INDIVIDUAL', 'COMBINED', 'RAW': 'INDIVIDUAL' means products are provided in separate files for each sourceId. All files are zipped in a single bundle, even if only one source/file is considered 'COMBINED' means products are provided in a single file concatenating the data of all sourceIds together. How this is organised depends on the chosen format 'RAW' means products are provided following a Data Model similar to that used in the MDB, meaning in particular that parameters stored as arrays will remain as such. Like in the COMBINED structure, a single file is provided for the data of all sourceIds together, but in this case there will be always be one row per sourceId retrieval_type : str, optional, default 'ALL' to retrieve all data from the list of sources retrieval type identifier. For GAIA DR2 possible values are ['EPOCH_PHOTOMETRY'] For future GAIA DR3 (Once published), possible values will be ['EPOC_PHOTOMETRY', 'RVS', 'XP_CONTINUOUS', 'XP_SAMPLED', 'MCMC_GSPPHOT' or 'MCMC_MSC'] valid_data : bool, optional, default False By default, the epoch photometry service returns all available data, including data rows where flux is null and/or the rejected_by_photometry flag is set to True. In order to retrieve only valid data (data rows where flux is not null and/or the rejected_by_photometry flag is set to False) this request parameter should be included with valid_data=True. band : str, optional, default None, valid values: G, BP, RP By default, the epoch photometry service returns all the available photometry bands for the requested source. This parameter allows to filter the output lightcurve by its band. avoid_datatype_check: boolean, optional, default False. By default, this value will be set to False. If it is set to 'true' the Datalink items tags will not be checked. format : str, optional, default 'votable' loading format. Other available formats are 'csv', 'ecsv','json','votable_plain' and 'fits' output_file : string, optional, default None file where the results are saved. If it is not provided, the http response contents are returned. overwrite_output_file : boolean, optional, default False To overwrite the output_file if it already exists. verbose : bool, optional, default 'False' flag to display information about the process Returns ------- A table object """ if retrieval_type is None: raise ValueError("Missing mandatory argument 'retrieval_type'") now = datetime.now() now_formatted = now.strftime("%Y%m%d_%H%M%S") temp_dirname = "temp_" + now_formatted downloadname_formated = "download_" + now_formatted output_file_specified = False if output_file is None: output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated) else: output_file_specified = True output_file = os.path.abspath(output_file) if not overwrite_output_file and os.path.exists(output_file): raise ValueError( f"{output_file} file already exists. Please use overwrite_output_file='False' to " f"overwrite output file.") path = os.path.dirname(output_file) if ids is None: raise ValueError("Missing mandatory argument 'ids'") if avoid_datatype_check is False: # we need to check params rt = str(retrieval_type).upper() if rt != 'ALL' and rt not in self.VALID_DATALINK_RETRIEVAL_TYPES: raise ValueError( f"Invalid mandatory argument 'retrieval_type'. Found {retrieval_type}, " f"expected: 'ALL' or any of {self.VALID_DATALINK_RETRIEVAL_TYPES}" ) params_dict = {} if not valid_data or str(retrieval_type) == 'ALL': params_dict['VALID_DATA'] = "false" elif valid_data: params_dict['VALID_DATA'] = "true" if band is not None: if band != 'G' and band != 'BP' and band != 'RP': raise ValueError("Invalid band value '%s' (Valid values: " + "'G', 'BP' and 'RP)" % band) else: params_dict['BAND'] = band if isinstance(ids, str): ids_arg = ids else: if isinstance(ids, int): ids_arg = str(ids) else: ids_arg = ','.join(str(item) for item in ids) params_dict['ID'] = ids_arg if data_release is not None: params_dict['RELEASE'] = data_release params_dict['DATA_STRUCTURE'] = data_structure params_dict['FORMAT'] = str(format) params_dict['RETRIEVAL_TYPE'] = str(retrieval_type) params_dict['USE_ZIP_ALWAYS'] = 'true' if path != '': try: os.mkdir(path) except FileExistsError: log.error("Path %s already exist" % path) except OSError: log.error("Creation of the directory %s failed" % path) try: self.__gaiadata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose) files = Gaia.__get_data_files(output_file=output_file, path=path) except Exception as err: raise err finally: if not output_file_specified: shutil.rmtree(path) if verbose: if output_file_specified: log.info("output_file = %s" % output_file) log.debug("List of products available:") # for key, value in files.items(): # print("Product =", key) items = [key for key in files.keys()] items.sort() for item in items: # print(f'* {item}') if verbose: log.debug("Product = " + item) return files
def retrieve_data(self, datasets, continuation=False, destination=None, with_calib='none', request_all_objects=False, unzip=True, request_id=None): """ Retrieve a list of datasets form the ESO archive. Parameters ---------- datasets : list of strings or string List of datasets strings to retrieve from the archive. destination: string Directory where the files are copied. Files already found in the destination directory are skipped, unless continuation=True. Default to astropy cache. continuation : bool Force the retrieval of data that are present in the destination directory. with_calib : string Retrieve associated calibration files: 'none' (default), 'raw' for raw calibrations, or 'processed' for processed calibrations. request_all_objects : bool When retrieving associated calibrations (``with_calib != 'none'``), this allows to request all the objects included the already downloaded ones, to be sure to retrieve all calibration files. This is useful when the download was interrupted. `False` by default. unzip : bool Unzip compressed files from the archive after download. `True` by default. request_id : str, int Retrieve from an existing request number rather than sending a new query, with the identifier from the URL in the email sent from the archive from the earlier request as in: https://dataportal.eso.org/rh/requests/[USERNAME]/[request_id] Returns ------- files : list of strings or string List of files that have been locally downloaded from the archive. Examples -------- >>> dptbl = Eso.query_instrument('apex', pi_coi='ginsburg') >>> dpids = [row['DP.ID'] for row in dptbl if 'Map' in row['Object']] >>> files = Eso.retrieve_data(dpids) """ calib_options = {'none': '', 'raw': 'CalSelectorRaw2Raw', 'processed': 'CalSelectorRaw2Master'} if with_calib not in calib_options: raise ValueError("invalid value for 'with_calib', " "it must be 'none', 'raw' or 'processed'") if isinstance(datasets, str): return_list = False datasets = [datasets] else: return_list = True if not isinstance(datasets, (list, tuple, np.ndarray)): raise TypeError("Datasets must be given as a list of strings.") # First: Detect datasets already downloaded if with_calib != 'none' and request_all_objects: datasets_to_download, files = list(datasets), [] else: log.info("Detecting already downloaded datasets...") datasets_to_download, files = self._check_existing_files( datasets, continuation=continuation, destination=destination) # Second: Check that the datasets to download are in the archive if request_id is None: log.info("Checking availability of datasets to download...") valid_datasets = [self.verify_data_exists(ds) for ds in datasets_to_download] else: # Assume all valid if a request_id was provided valid_datasets = [(ds, True) for ds in datasets_to_download] if not all(valid_datasets): invalid_datasets = [ds for ds, v in zip(datasets_to_download, valid_datasets) if not v] raise ValueError("The following data sets were not found on the " "ESO servers: {0}".format(invalid_datasets)) # Third: Download the other datasets log.info("Downloading datasets...") if datasets_to_download: if not self.authenticated(): self.login() url = "http://archive.eso.org/cms/eso-data/eso-data-direct-retrieval.html" with suspend_cache(self): # Never cache staging operations if request_id is None: log.info("Contacting retrieval server...") retrieve_data_form = self._request("GET", url, cache=False) retrieve_data_form.raise_for_status() log.info("Staging request...") inputs = {"list_of_datasets": "\n".join(datasets_to_download)} data_confirmation_form = self._activate_form( retrieve_data_form, form_index=-1, inputs=inputs, cache=False) data_confirmation_form.raise_for_status() root = BeautifulSoup(data_confirmation_form.content, 'html5lib') login_button = root.select('input[value=LOGIN]') if login_button: raise LoginError("Not logged in. " "You must be logged in to download data.") inputs = {} if with_calib != 'none': inputs['requestCommand'] = calib_options[with_calib] # TODO: There may be another screen for Not Authorized; # that should be included too # form name is "retrieve"; no id data_download_form = self._activate_form( data_confirmation_form, form_index=-1, inputs=inputs, cache=False) else: # Build URL by hand request_url = 'https://dataportal.eso.org/rh/requests/' request_url += f'{self.USERNAME}/{request_id}' data_download_form = self._request("GET", request_url, cache=False) _content = data_download_form.content.decode('utf-8') if ('Request Handler - Error' in _content): # Likely a problem with the request_url msg = (f"The form at {request_url} returned an error." " See your recent requests at " "https://dataportal.eso.org/rh/requests/" f"{self.USERNAME}/recentRequests") raise RemoteServiceError(msg) log.info("Staging form is at {0}" .format(data_download_form.url)) root = BeautifulSoup(data_download_form.content, 'html5lib') state = root.select('span[id=requestState]')[0].text t0 = time.time() while state not in ('COMPLETE', 'ERROR'): time.sleep(2.0) data_download_form = self._request("GET", data_download_form.url, cache=False) root = BeautifulSoup(data_download_form.content, 'html5lib') state = root.select('span[id=requestState]')[0].text print("{0:20.0f}s elapsed" .format(time.time() - t0), end='\r') sys.stdout.flush() if state == 'ERROR': raise RemoteServiceError("There was a remote service " "error; perhaps the requested " "file could not be found?") if with_calib != 'none': # when requested files with calibrations, some javascript is # used to display the files, which prevent retrieving the files # directly. So instead we retrieve the download script provided # in the web page, and use it to extract the list of files. # The benefit of this is also that in the download script the # list of files is de-duplicated, whereas on the web page the # calibration files would be duplicated for each exposure. link = root.select('a[href$="/script"]')[0] if 'downloadRequest' not in link.text: # Make sure that we found the correct link raise RemoteServiceError( "A link was found in the download file for the " "calibrations that is not a downloadRequest link " "and therefore appears invalid.") href = link.attrs['href'] script = self._request("GET", href, cache=False) fileLinks = re.findall( r'"(https://dataportal\.eso\.org/dataPortal/api/requests/.*)"', script.text) # urls with api/ require using Basic Authentication, though # it's easier for us to reuse the existing requests session (to # avoid asking agin for a username/password if it is not # stored). So we remove api/ from the urls: fileLinks = [ f.replace('https://dataportal.eso.org/dataPortal/api/requests', 'https://dataportal.eso.org/dataPortal/requests') for f in fileLinks] log.info("Detecting already downloaded datasets, " "including calibrations...") fileIds = [f.rsplit('/', maxsplit=1)[1] for f in fileLinks] filteredIds, files = self._check_existing_files( fileIds, continuation=continuation, destination=destination) fileLinks = [f for f, fileId in zip(fileLinks, fileIds) if fileId in filteredIds] else: fileIds = root.select('input[name=fileId]') fileLinks = ["http://dataportal.eso.org/dataPortal" + fileId.attrs['value'].split()[1] for fileId in fileIds] nfiles = len(fileLinks) log.info("Downloading {} files...".format(nfiles)) log.debug("Files:\n{}".format('\n'.join(fileLinks))) for i, fileLink in enumerate(fileLinks, 1): fileId = fileLink.rsplit('/', maxsplit=1)[1] if request_id is not None: # Since we fetched the script directly without sending # a new request, check here that the file in the list # is among those requested in the input list if fileId.split('.fits')[0] not in datasets_to_download: continue log.info("Downloading file {}/{}: {}..." .format(i, nfiles, fileId)) filename = self._request("GET", fileLink, save=True, continuation=True) if filename.endswith(('.gz', '.7z', '.bz2', '.xz', '.Z')) and unzip: log.info("Unzipping file {0}...".format(fileId)) filename = system_tools.gunzip(filename) if destination is not None: log.info("Copying file {0} to {1}...".format(fileId, destination)) destfile = os.path.join(destination, os.path.basename(filename)) shutil.move(filename, destfile) files.append(destfile) else: files.append(filename) # Empty the redirect cache of this request session # Only available and needed for requests versions < 2.17 try: self._session.redirect_cache.clear() except AttributeError: pass log.info("Done!") if (not return_list) and (len(files) == 1): files = files[0] return files
def download_files(self, files, *, savedir=None, cache=True, continuation=True, skip_unauthorized=True, verify_only=False): """ Given a list of file URLs, download them Note: Given a list with repeated URLs, each will only be downloaded once, so the return may have a different length than the input list Parameters ---------- files : list List of URLs to download savedir : None or str The directory to save to. Default is the cache location. cache : bool Cache the download? continuation : bool Attempt to continue where the download left off (if it was broken) skip_unauthorized : bool If you receive "unauthorized" responses for some of the download requests, skip over them. If this is False, an exception will be raised. verify_only : bool Option to go through the process of checking the files to see if they're the right size, but not actually download them. This option may be useful if a previous download run failed partway. """ if self.USERNAME: auth = self._get_auth_info(self.USERNAME) else: auth = None downloaded_files = [] if savedir is None: savedir = self.cache_location for file_link in unique(files): log.debug("Downloading {0} to {1}".format(file_link, savedir)) try: check_filename = self._request('HEAD', file_link, auth=auth) check_filename.raise_for_status() except requests.HTTPError as ex: if ex.response.status_code == 401: if skip_unauthorized: log.info("Access denied to {url}. Skipping to" " next file".format(url=file_link)) continue else: raise(ex) try: filename = re.search("filename=(.*)", check_filename.headers['Content-Disposition']).groups()[0] except KeyError: log.info(f"Unable to find filename for {file_link} " "(missing Content-Disposition in header). " "Skipping to next file.") continue if savedir is not None: filename = os.path.join(savedir, filename) if verify_only: existing_file_length = os.stat(filename).st_size if 'content-length' in check_filename.headers: length = int(check_filename.headers['content-length']) if length == 0: warnings.warn('URL {0} has length=0'.format(url)) elif existing_file_length == length: log.info(f"Found cached file {filename} with expected size {existing_file_length}.") elif existing_file_length < length: log.info(f"Found cached file {filename} with size {existing_file_length} < expected " f"size {length}. The download should be continued.") elif existing_file_length > length: warnings.warn(f"Found cached file {filename} with size {existing_file_length} > expected " f"size {length}. The download is likely corrupted.", CorruptDataWarning) else: warnings.warn(f"Could not verify {url} because it has no 'content-length'") try: if not verify_only: self._download_file(file_link, filename, timeout=self.TIMEOUT, auth=auth, cache=cache, method='GET', head_safe=False, continuation=continuation) downloaded_files.append(filename) except requests.HTTPError as ex: if ex.response.status_code == 401: if skip_unauthorized: log.info("Access denied to {url}. Skipping to" " next file".format(url=file_link)) continue else: raise(ex) elif ex.response.status_code == 403: log.error("Access denied to {url}".format(url=file_link)) if 'dataPortal' in file_link and 'sso' not in file_link: log.error("The URL may be incorrect. Try using " "{0} instead of {1}" .format(file_link.replace('dataPortal/', 'dataPortal/sso/'), file_link)) raise ex elif ex.response.status_code == 500: # empirically, this works the second time most of the time... self._download_file(file_link, filename, timeout=self.TIMEOUT, auth=auth, cache=cache, method='GET', head_safe=False, continuation=continuation) downloaded_files.append(filename) else: raise ex return downloaded_files