Exemple #1
0
    def _download_file(self, url, local_filepath, timeout=None, auth=None):
        """
        Download a file.  Resembles `astropy.utils.data.download_file` but uses
        the local ``_session``
        """
        response = self._session.get(url, timeout=timeout, stream=True,
                                      auth=auth)
        if 'content-length' in response.headers:
            length = int(response.headers['content-length'])
        else:
            length = None

        blocksize = astropy.utils.data.conf.download_block_size

        bytes_read = 0

        with ProgressBarOrSpinner(length,
                                  'Downloading URL {0} ...'.format(url)) as pb:
            with open(local_filepath, 'wb') as f:
                for block in response.iter_content(blocksize):
                    f.write(block)
                    bytes_read += blocksize
                    if length is not None:
                        pb.update(bytes_read if bytes_read <= length else length)
                    else:
                        pb.update(bytes_read)

        response.close()
Exemple #2
0
    def start(self):
        desc = "Filling Charge Resolution"
        with ProgressBarOrSpinner(None, message=desc) as pbar:
            source = self.file_reader.read()
            for event in source:
                pbar.update(event.count)
                tels = list(event.dl0.tels_with_data)

                # Check events have true charge included
                if event.count == 0:
                    try:
                        if np.all(event.mc.tel[tels[0]].photo_electron_image ==
                                  0):
                            raise KeyError
                    except KeyError:
                        self.log.exception('Source does not contain '
                                           'true charge!')
                        raise

                self.r1.calibrate(event)
                self.dl0.reduce(event)
                self.dl1.calibrate(event)

                if self.telescopes:
                    tels = []
                    for tel in self.telescopes:
                        if tel in event.dl0.tels_with_data:
                            tels.append(tel)

                for telid in tels:
                    true_charge = event.mc.tel[telid].photo_electron_image
                    measured_charge = event.dl1.tel[telid].image[0]
                    self.calculator.add_charges(true_charge, measured_charge)
Exemple #3
0
    def _download_file(self,
                       url,
                       local_filepath,
                       timeout=None,
                       auth=None,
                       cache=False):
        """
        Download a file.  Resembles `astropy.utils.data.download_file` but uses
        the local ``_session``
        """
        response = self._session.get(url,
                                     timeout=timeout,
                                     stream=True,
                                     auth=auth)
        response.raise_for_status()
        if 'content-length' in response.headers:
            length = int(response.headers['content-length'])
        else:
            length = None

        if cache and os.path.exists(local_filepath):
            if length is not None:
                statinfo = os.stat(local_filepath)
                if statinfo.st_size != length:
                    log.warn("Found cached file {0} with size {1} that is "
                             "different from expected size {2}".format(
                                 local_filepath, statinfo.st_size, length))
                else:
                    log.info(
                        "Found cached file {0} with expected size {1}.".format(
                            local_filepath, statinfo.st_size))
                    response.close()
                    return
            else:
                log.info("Found cached file {0}.".format(local_filepath))
                response.close()
                return

        blocksize = astropy.utils.data.conf.download_block_size

        bytes_read = 0

        with ProgressBarOrSpinner(
                length,
                'Downloading URL {0} to {1} ...'.format(url,
                                                        local_filepath)) as pb:
            with open(local_filepath, 'wb') as f:
                for block in response.iter_content(blocksize):
                    f.write(block)
                    bytes_read += blocksize
                    if length is not None:
                        pb.update(
                            bytes_read if bytes_read <= length else length)
                    else:
                        pb.update(bytes_read)

        response.close()
Exemple #4
0
def _download_file(remote_url, target):
    """
    Accepts a URL, downloads the file to a given open file object.

    This is a modified version of astropy.utils.data.download_file that
    downloads to an open file object instead of a cache directory.
    """

    from contextlib import closing
    from six.moves.urllib.request import urlopen, Request
    from six.moves.urllib.error import URLError
    from astropy.utils.console import ProgressBarOrSpinner
    from astropy.utils.data import conf

    timeout = conf.remote_timeout

    try:
        # Pretend to be a web browser (IE 6.0). Some servers that we download
        # from forbid access from programs.
        headers = {
            'User-Agent':
            'Mozilla/5.0',
            'Accept': ('text/html,application/xhtml+xml,'
                       'application/xml;q=0.9,*/*;q=0.8')
        }
        req = Request(remote_url, headers=headers)
        with closing(urlopen(req, timeout=timeout)) as remote:

            # get size of remote if available (for use in progress bar)
            info = remote.info()
            size = None
            if 'Content-Length' in info:
                try:
                    size = int(info['Content-Length'])
                except ValueError:
                    pass

            dlmsg = "Downloading {0}".format(remote_url)
            with ProgressBarOrSpinner(size, dlmsg) as p:
                bytes_read = 0
                block = remote.read(conf.download_block_size)
                while block:
                    target.write(block)
                    bytes_read += len(block)
                    p.update(bytes_read)
                    block = remote.read(conf.download_block_size)

    # Append a more informative error message to URLErrors.
    except URLError as e:
        append_msg = (hasattr(e, 'reason') and hasattr(e.reason, 'errno')
                      and e.reason.errno == 8)
        if append_msg:
            msg = "{0}. requested URL: {1}".format(e.reason.strerror,
                                                   remote_url)
            e.reason.strerror = msg
            e.reason.args = (e.reason.errno, msg)
        raise e

    # This isn't supposed to happen, but occasionally a socket.timeout gets
    # through.  It's supposed to be caught in `urrlib2` and raised in this
    # way, but for some reason in mysterious circumstances it doesn't. So
    # we'll just re-raise it here instead.
    except socket.timeout as e:
        raise URLError(e)
Exemple #5
0
    def download_file(self, data_product, local_path, cache=True):
        """
        Takes a data product in the form of an  `~astropy.table.Row` and downloads it from the cloud into
        the given directory.

        Parameters
        ----------
        data_product :  `~astropy.table.Row`
            Product to download.
        local_path : str
            The local filename to which toe downloaded file will be saved.
        cache : bool
            Default is True. If file is found on disc it will not be downloaded again.
        """

        s3 = self.boto3.resource('s3', config=self.config)
        s3_client = self.boto3.client('s3', config=self.config)
        bkt = s3.Bucket(self.pubdata_bucket)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            bucket_path = self.get_cloud_uri(data_product, False)
        if not bucket_path:
            raise Exception("Unable to locate file {}.".format(
                data_product['productFilename']))

        # Ask the webserver (in this case S3) what the expected content length is and use that.
        info_lookup = s3_client.head_object(Bucket=self.pubdata_bucket,
                                            Key=bucket_path)
        length = info_lookup["ContentLength"]

        if cache and os.path.exists(local_path):
            if length is not None:
                statinfo = os.stat(local_path)
                if statinfo.st_size != length:
                    log.warning("Found cached file {0} with size {1} that is "
                                "different from expected size {2}".format(
                                    local_path, statinfo.st_size, length))
                else:
                    log.info(
                        "Found cached file {0} with expected size {1}.".format(
                            local_path, statinfo.st_size))
                    return

        with ProgressBarOrSpinner(
                length, ('Downloading URL s3://{0}/{1} to {2} ...'.format(
                    self.pubdata_bucket, bucket_path, local_path))) as pb:

            # Bytes read tracks how much data has been received so far
            # This variable will be updated in multiple threads below
            global bytes_read
            bytes_read = 0

            progress_lock = threading.Lock()

            def progress_callback(numbytes):
                # Boto3 calls this from multiple threads pulling the data from S3
                global bytes_read

                # This callback can be called in multiple threads
                # Access to updating the console needs to be locked
                with progress_lock:
                    bytes_read += numbytes
                    pb.update(bytes_read)

            bkt.download_file(bucket_path,
                              local_path,
                              Callback=progress_callback)
Exemple #6
0
    def _download_file(self,
                       url,
                       local_filepath,
                       timeout=None,
                       auth=None,
                       continuation=True,
                       cache=False,
                       **kwargs):
        """
        Download a file.  Resembles `astropy.utils.data.download_file` but uses
        the local ``_session``
        """
        response = self._session.get(url,
                                     timeout=timeout,
                                     stream=True,
                                     auth=auth,
                                     **kwargs)
        response.raise_for_status()
        if 'content-length' in response.headers:
            length = int(response.headers['content-length'])
        else:
            length = None

        if ((os.path.exists(local_filepath)
             and ('Accept-Ranges' in response.headers) and continuation)):
            open_mode = 'ab'

            existing_file_length = os.stat(local_filepath).st_size
            if length is not None and existing_file_length >= length:
                # all done!
                log.info(
                    "Found cached file {0} with expected size {1}.".format(
                        local_filepath, existing_file_length))
                return
            elif existing_file_length == 0:
                open_mode = 'wb'
            else:
                log.info("Continuing download of file {0}, with {1} bytes to "
                         "go ({2}%)".format(
                             local_filepath, length - existing_file_length,
                             (length - existing_file_length) / length * 100))

                # bytes are indexed from 0:
                # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header
                end = "{0}".format(length - 1) if length is not None else ""
                self._session.headers['Range'] = "bytes={0}-{1}".format(
                    existing_file_length, end)

                response = self._session.get(url,
                                             timeout=timeout,
                                             stream=True,
                                             auth=auth,
                                             **kwargs)

        elif cache and os.path.exists(local_filepath):
            if length is not None:
                statinfo = os.stat(local_filepath)
                if statinfo.st_size != length:
                    log.warning("Found cached file {0} with size {1} that is "
                                "different from expected size {2}".format(
                                    local_filepath, statinfo.st_size, length))
                    open_mode = 'wb'
                else:
                    log.info(
                        "Found cached file {0} with expected size {1}.".format(
                            local_filepath, statinfo.st_size))
                    response.close()
                    return
            else:
                log.info("Found cached file {0}.".format(local_filepath))
                response.close()
                return
        else:
            open_mode = 'wb'

        blocksize = astropy.utils.data.conf.download_block_size

        bytes_read = 0

        # Only show progress bar if logging level is INFO or lower.
        if log.getEffectiveLevel() <= 20:
            progress_stream = None  # Astropy default
        else:
            progress_stream = io.StringIO()

        with ProgressBarOrSpinner(
                length,
            ('Downloading URL {0} to {1} ...'.format(url, local_filepath)),
                file=progress_stream) as pb:
            with open(local_filepath, open_mode) as f:
                for block in response.iter_content(blocksize):
                    f.write(block)
                    bytes_read += blocksize
                    if length is not None:
                        pb.update(
                            bytes_read if bytes_read <= length else length)
                    else:
                        pb.update(bytes_read)

        response.close()
Exemple #7
0
def fetch(channels, start, end, type=None, dtype=None, allow_tape=None,
          connection=None, host=None, port=None, pad=None, verbose=False,
          series_class=TimeSeries):
    # host and port keywords are used by the decorator only
    # pylint: disable=unused-argument
    """Fetch a dict of data series from NDS2

    This method sits underneath `TimeSeries.fetch` and related methods,
    and isn't really designed to be called directly.
    """
    # set ALLOW_DATA_ON_TAPE
    if allow_tape is not None:
        set_parameter(connection, 'ALLOW_DATA_ON_TAPE', str(allow_tape),
                      verbose=verbose)

    type = _parse_nds_enum_dict_param(channels, 'type', type)
    dtype = _parse_nds_enum_dict_param(channels, 'dtype', dtype)

    # verify channels exist
    print_verbose("Checking channels list against NDS2 database...", end=' ',
                  verbose=verbose)
    utype = reduce(operator.or_, type.values())  # logical OR of types
    udtype = reduce(operator.or_, dtype.values())
    ndschannels = io_nds2.find_channels(channels, connection=connection,
                                        type=utype, dtype=udtype, unique=True,
                                        epoch=(start, end))
    names = ['%s,%s' % (c.name, c.channel_type_to_string(c.channel_type)) for
             c in ndschannels]
    print_verbose('done', verbose=verbose)

    # handle minute trend timing
    if (any(c.endswith('m-trend') for c in names) and
            (start % 60 or end % 60)):
        warnings.warn("Requested at least one minute trend, but "
                      "start and stop GPS times are not multiples of "
                      "60. Times will be expanded outwards to compensate")
        start, end = io_nds2.minute_trend_times(start, end)

    # get data availability
    span = SegmentList([Segment(start, end)])
    if pad is None:
        qsegs = span
        gap = None
    else:
        print_verbose("Querying for data availability...", end=' ',
                      verbose=verbose)
        gap = 'pad'
        qsegs = io_nds2.get_availability(
            ndschannels, start, end, connection=connection).intersection()
        qsegs &= span
        print_verbose('done\nFound {0} viable segments of data with {1}%% '
                      'coverage'.format(len(qsegs),
                                        abs(qsegs) / abs(span) * 100),
                      verbose=verbose)
        if span - qsegs:
            warnings.warn("Gaps were found in data available from {0}, "
                          "but will be padded with {1}".format(
                              connection.get_host(), pad))

    # query for each segment
    out = series_class.DictClass()
    for seg in qsegs:
        duration = seg[1] - seg[0]
        msg = 'Downloading data ({}-{} | {}s):'.format(
            seg[1], seg[0], duration)
        stream = sys.stdout if verbose else StringIO()
        count = 0
        with ProgressBarOrSpinner(duration, msg, file=stream) as bar:
            for buffers in connection.iterate(int(seg[0]), int(seg[1]), names):
                for buffer_, chan in zip(buffers, channels):
                    series = series_class.from_nds2_buffer(buffer_)
                    out.append({chan: series}, pad=pad, gap=gap)
                count += buffer_.length / buffer_.channel.sample_rate
                bar.update(count)

    return out
Exemple #8
0
def download_file(remote_url, cache=False, show_progress=True, timeout=None):
    """
    Accepts a URL, downloads and optionally caches the result
    returning the filename, with a name determined by the file's MD5
    hash. If ``cache=True`` and the file is present in the cache, just
    returns the filename.

    Parameters
    ----------
    remote_url : str
        The URL of the file to download

    cache : bool, optional
        Whether to use the cache

    show_progress : bool, optional
        Whether to display a progress bar during the download (default
        is `True`). Regardless of this setting, the progress bar is only
        displayed when outputting to a terminal.

    timeout : float, optional
        The timeout, in seconds.  Otherwise, use
        `astropy.utils.data.Conf.remote_timeout`.

    Returns
    -------
    local_path : str
        Returns the local path that the file was download to.

    Raises
    ------
    urllib2.URLError, urllib.error.URLError
        Whenever there's a problem getting the remote file.
    """

    from astropy.utils.console import ProgressBarOrSpinner

    if timeout is None:
        timeout = conf.remote_timeout

    missing_cache = False

    if cache:
        try:
            dldir, urlmapfn = _get_download_cache_locs()
        except OSError as e:
            msg = 'Remote data cache could not be accessed due to '
            estr = '' if len(e.args) < 1 else (': ' + str(e))
            warn(CacheMissingWarning(msg + e.__class__.__name__ + estr))
            cache = False
            missing_cache = True  # indicates that the cache is missing to raise a warning later

    url_key = remote_url

    # Check if URL is Astropy data server, which has alias, and cache it.
    if (url_key.startswith(conf.dataurl)
            and conf.dataurl not in _dataurls_to_alias):
        try:
            with urllib.request.urlopen(conf.dataurl,
                                        timeout=timeout) as remote:
                _dataurls_to_alias[conf.dataurl] = [
                    conf.dataurl, remote.geturl()
                ]
        except urllib.error.URLError:  # Host unreachable
            _dataurls_to_alias[conf.dataurl] = [conf.dataurl]
    try:
        if cache:
            # We don't need to acquire the lock here, since we are only reading
            with shelve.open(urlmapfn) as url2hash:
                if url_key in url2hash:
                    return url2hash[url_key]
                # If there is a cached copy from mirror, use it.
                else:
                    for cur_url in _dataurls_to_alias.get(conf.dataurl, []):
                        if url_key.startswith(cur_url):
                            url_mirror = url_key.replace(
                                cur_url, conf.dataurl_mirror)
                            if url_mirror in url2hash:
                                return url2hash[url_mirror]

        with urllib.request.urlopen(remote_url, timeout=timeout) as remote:
            # keep a hash to rename the local file to the hashed name
            hash = hashlib.md5()

            info = remote.info()
            if 'Content-Length' in info:
                try:
                    size = int(info['Content-Length'])
                except ValueError:
                    size = None
            else:
                size = None

            if size is not None:
                check_free_space_in_dir(gettempdir(), size)
                if cache:
                    check_free_space_in_dir(dldir, size)

            if show_progress and sys.stdout.isatty():
                progress_stream = sys.stdout
            else:
                progress_stream = io.StringIO()

            dlmsg = "Downloading {0}".format(remote_url)
            with ProgressBarOrSpinner(size, dlmsg, file=progress_stream) as p:
                with NamedTemporaryFile(delete=False) as f:
                    try:
                        bytes_read = 0
                        block = remote.read(conf.download_block_size)
                        while block:
                            f.write(block)
                            hash.update(block)
                            bytes_read += len(block)
                            p.update(bytes_read)
                            block = remote.read(conf.download_block_size)
                    except BaseException:
                        if os.path.exists(f.name):
                            os.remove(f.name)
                        raise

        if cache:
            _acquire_download_cache_lock()
            try:
                with shelve.open(urlmapfn) as url2hash:
                    # We check now to see if another process has
                    # inadvertently written the file underneath us
                    # already
                    if url_key in url2hash:
                        return url2hash[url_key]
                    local_path = os.path.join(dldir, hash.hexdigest())
                    shutil.move(f.name, local_path)
                    url2hash[url_key] = local_path
            finally:
                _release_download_cache_lock()
        else:
            local_path = f.name
            if missing_cache:
                msg = ('File downloaded to temporary location due to problem '
                       'with cache directory and will not be cached.')
                warn(CacheMissingWarning(msg, local_path))
            if conf.delete_temporary_downloads_at_exit:
                global _tempfilestodel
                _tempfilestodel.append(local_path)
    except urllib.error.URLError as e:
        if hasattr(e, 'reason') and hasattr(e.reason,
                                            'errno') and e.reason.errno == 8:
            e.reason.strerror = e.reason.strerror + '. requested URL: ' + remote_url
            e.reason.args = (e.reason.errno, e.reason.strerror)
        raise e
    except socket.timeout as e:
        # this isn't supposed to happen, but occasionally a socket.timeout gets
        # through.  It's supposed to be caught in `urrlib2` and raised in this
        # way, but for some reason in mysterious circumstances it doesn't. So
        # we'll just re-raise it here instead
        raise urllib.error.URLError(e)

    return local_path
Exemple #9
0
    def _download_file(self,
                       url,
                       local_filepath,
                       timeout=None,
                       auth=None,
                       continuation=True,
                       cache=False,
                       method="GET",
                       head_safe=False,
                       **kwargs):
        """
        Download a file.  Resembles `astropy.utils.data.download_file` but uses
        the local ``_session``

        Parameters
        ----------
        url : string
        local_filepath : string
        timeout : int
        auth : dict or None
        continuation : bool
            If the file has already been partially downloaded *and* the server
            supports HTTP "range" requests, the download will be continued
            where it left off.
        cache : bool
        method : "GET" or "POST"
        head_safe : bool
        """

        if head_safe:
            response = self._session.request("HEAD",
                                             url,
                                             timeout=timeout,
                                             stream=True,
                                             auth=auth,
                                             **kwargs)
        else:
            response = self._session.request(method,
                                             url,
                                             timeout=timeout,
                                             stream=True,
                                             auth=auth,
                                             **kwargs)

        response.raise_for_status()
        if 'content-length' in response.headers:
            length = int(response.headers['content-length'])
            if length == 0:
                log.warn('URL {0} has length=0'.format(url))
        else:
            length = None

        if ((os.path.exists(local_filepath)
             and ('Accept-Ranges' in response.headers) and continuation)):
            open_mode = 'ab'

            existing_file_length = os.stat(local_filepath).st_size
            if length is not None and existing_file_length >= length:
                # all done!
                log.info(
                    "Found cached file {0} with expected size {1}.".format(
                        local_filepath, existing_file_length))
                return
            elif existing_file_length == 0:
                open_mode = 'wb'
            else:
                log.info("Continuing download of file {0}, with {1} bytes to "
                         "go ({2}%)".format(
                             local_filepath, length - existing_file_length,
                             (length - existing_file_length) / length * 100))

                # bytes are indexed from 0:
                # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header
                end = "{0}".format(length - 1) if length is not None else ""
                self._session.headers['Range'] = "bytes={0}-{1}".format(
                    existing_file_length, end)

                response = self._session.request(method,
                                                 url,
                                                 timeout=timeout,
                                                 stream=True,
                                                 auth=auth,
                                                 **kwargs)
                response.raise_for_status()
                del self._session.headers['Range']

        elif cache and os.path.exists(local_filepath):
            if length is not None:
                statinfo = os.stat(local_filepath)
                if statinfo.st_size != length:
                    log.warning("Found cached file {0} with size {1} that is "
                                "different from expected size {2}".format(
                                    local_filepath, statinfo.st_size, length))
                    open_mode = 'wb'
                else:
                    log.info(
                        "Found cached file {0} with expected size {1}.".format(
                            local_filepath, statinfo.st_size))
                    response.close()
                    return
            else:
                log.info("Found cached file {0}.".format(local_filepath))
                response.close()
                return
        else:
            open_mode = 'wb'
            if head_safe:
                response = self._session.request(method,
                                                 url,
                                                 timeout=timeout,
                                                 stream=True,
                                                 auth=auth,
                                                 **kwargs)
                response.raise_for_status()

        blocksize = astropy.utils.data.conf.download_block_size

        log.debug(
            f"Downloading URL {url} to {local_filepath} with size {length} "
            f"by blocks of {blocksize}")

        bytes_read = 0

        # Only show progress bar if logging level is INFO or lower.
        if log.getEffectiveLevel() <= 20:
            progress_stream = None  # Astropy default
        else:
            progress_stream = io.StringIO()

        with ProgressBarOrSpinner(
                length,
            ('Downloading URL {0} to {1} ...'.format(url, local_filepath)),
                file=progress_stream) as pb:
            with open(local_filepath, open_mode) as f:
                for block in response.iter_content(blocksize):
                    f.write(block)
                    bytes_read += blocksize
                    if length is not None:
                        pb.update(
                            bytes_read if bytes_read <= length else length)
                    else:
                        pb.update(bytes_read)

        response.close()
        return response