Example #1
0
    def check_one_url(self, link):
        """Get list of URLs linked to from the current URL (if any).

        Args:
            link (:obj:`Link`): Instance of :class:`Link`. Only the URL in
                link.target is examined.

        Returns:
            Either

                #. None if link.target can't be opened,
                #. the empty list if link.target is not an html document, or
                #. a list of links contained in link.target, expressed as
                    :class:`Link` objects.
        """
        if hasattr(link, 'target'):
            url = link.target
        else:
            return None
        try:
            f = urllib.request.urlopen(url)
        except urllib.error.HTTPError as e:
            self.log.error(f'Error code: {e.code}',
                           tags=util.ObjectLogTag.BANNER)
            return None
        except urllib.error.URLError as e:
            # print('\nFailed to find file or connect to server.')
            # print('Reason: ', e.reason)
            tup = re.split(r"\[Errno 2\] No such file or directory: \'(.*)\'",
                           str(e.reason))
            if len(tup) == 3:
                str_ = util.abbreviate_path(tup[1], self.WK_DIR, '$WK_DIR')
            else:
                str_ = str(e.reason)
            self.log.error("Missing '%s'.",
                           str_,
                           tags=util.ObjectLogTag.BANNER)
            return None
        if f.info().get_content_subtype() != 'html':
            return []
        else:
            parser = LinkParser()
            links = [
                Link(origin=url, target=urllib.parse.urljoin(url, link_out)) \
                    for link_out in self.gen_links(f, parser)
            ]
            f.close()
            return links
Example #2
0
 def write_ds(self, var, ds):
     """Top-level method to write out processed dataset; spun out so
     that child classes can modify it. Calls child class :meth:`write_dataset`.
     """
     path_str = util.abbreviate_path(var.dest_path, self.WK_DIR, '$WK_DIR')
     var.log.info("Writing %d mb to %s", ds.nbytes / (1024 * 1024),
                  path_str)
     try:
         ds = self.clean_output_attrs(var, ds)
         ds = self.log_history_attr(var, ds)
     except Exception as exc:
         raise util.chain_exc(exc, (f"cleaning attributes to "
                                    f"write data for {var.full_name}."),
                              util.DataPreprocessEvent)
     try:
         self.write_dataset(var, ds)
     except Exception as exc:
         raise util.chain_exc(exc, f"writing data for {var.full_name}.",
                              util.DataPreprocessEvent)
     del ds  # shouldn't be necessary
    def write_dataset(self, var, ds):
        # TODO: remove any netcdf Variables that were present in file (and ds) 
        # but not needed for request
        path_str = util.abbreviate_path(var.dest_path, self.WK_DIR, '$WK_DIR')
        _log.info("Writing to %s", path_str)
        os.makedirs(os.path.dirname(var.dest_path), exist_ok=True)
        _log.debug("xr.Dataset.to_netcdf on %s", var.dest_path)
        ds = self.clean_output_encoding(var, ds)
        if var.is_static:
            unlimited_dims = []
        else:
            unlimited_dims = [var.T.name]

        ds.to_netcdf(
            path=var.dest_path,
            mode='w',
            **self.save_dataset_kwargs,
            unlimited_dims=unlimited_dims
        )
        ds.close()
Example #4
0
 def run_msg(self):
     """Log message when execution starts.
     """
     str_ = util.abbreviate_path(self.pod.driver, self.pod.POD_CODE_DIR,
                                 '$POD_CODE_DIR')
     return f"Calling python {str_}"