def _download_file(self, url, local_filepath, timeout=None, auth=None): """ Download a file. Resembles `astropy.utils.data.download_file` but uses the local ``_session`` """ response = self._session.get(url, timeout=timeout, stream=True, auth=auth) if 'content-length' in response.headers: length = int(response.headers['content-length']) else: length = None blocksize = astropy.utils.data.conf.download_block_size bytes_read = 0 with ProgressBarOrSpinner(length, 'Downloading URL {0} ...'.format(url)) as pb: with open(local_filepath, 'wb') as f: for block in response.iter_content(blocksize): f.write(block) bytes_read += blocksize if length is not None: pb.update(bytes_read if bytes_read <= length else length) else: pb.update(bytes_read) response.close()
def start(self): desc = "Filling Charge Resolution" with ProgressBarOrSpinner(None, message=desc) as pbar: source = self.file_reader.read() for event in source: pbar.update(event.count) tels = list(event.dl0.tels_with_data) # Check events have true charge included if event.count == 0: try: if np.all(event.mc.tel[tels[0]].photo_electron_image == 0): raise KeyError except KeyError: self.log.exception('Source does not contain ' 'true charge!') raise self.r1.calibrate(event) self.dl0.reduce(event) self.dl1.calibrate(event) if self.telescopes: tels = [] for tel in self.telescopes: if tel in event.dl0.tels_with_data: tels.append(tel) for telid in tels: true_charge = event.mc.tel[telid].photo_electron_image measured_charge = event.dl1.tel[telid].image[0] self.calculator.add_charges(true_charge, measured_charge)
def _download_file(self, url, local_filepath, timeout=None, auth=None, cache=False): """ Download a file. Resembles `astropy.utils.data.download_file` but uses the local ``_session`` """ response = self._session.get(url, timeout=timeout, stream=True, auth=auth) response.raise_for_status() if 'content-length' in response.headers: length = int(response.headers['content-length']) else: length = None if cache and os.path.exists(local_filepath): if length is not None: statinfo = os.stat(local_filepath) if statinfo.st_size != length: log.warn("Found cached file {0} with size {1} that is " "different from expected size {2}".format( local_filepath, statinfo.st_size, length)) else: log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, statinfo.st_size)) response.close() return else: log.info("Found cached file {0}.".format(local_filepath)) response.close() return blocksize = astropy.utils.data.conf.download_block_size bytes_read = 0 with ProgressBarOrSpinner( length, 'Downloading URL {0} to {1} ...'.format(url, local_filepath)) as pb: with open(local_filepath, 'wb') as f: for block in response.iter_content(blocksize): f.write(block) bytes_read += blocksize if length is not None: pb.update( bytes_read if bytes_read <= length else length) else: pb.update(bytes_read) response.close()
def _download_file(remote_url, target): """ Accepts a URL, downloads the file to a given open file object. This is a modified version of astropy.utils.data.download_file that downloads to an open file object instead of a cache directory. """ from contextlib import closing from six.moves.urllib.request import urlopen, Request from six.moves.urllib.error import URLError from astropy.utils.console import ProgressBarOrSpinner from astropy.utils.data import conf timeout = conf.remote_timeout try: # Pretend to be a web browser (IE 6.0). Some servers that we download # from forbid access from programs. headers = { 'User-Agent': 'Mozilla/5.0', 'Accept': ('text/html,application/xhtml+xml,' 'application/xml;q=0.9,*/*;q=0.8') } req = Request(remote_url, headers=headers) with closing(urlopen(req, timeout=timeout)) as remote: # get size of remote if available (for use in progress bar) info = remote.info() size = None if 'Content-Length' in info: try: size = int(info['Content-Length']) except ValueError: pass dlmsg = "Downloading {0}".format(remote_url) with ProgressBarOrSpinner(size, dlmsg) as p: bytes_read = 0 block = remote.read(conf.download_block_size) while block: target.write(block) bytes_read += len(block) p.update(bytes_read) block = remote.read(conf.download_block_size) # Append a more informative error message to URLErrors. except URLError as e: append_msg = (hasattr(e, 'reason') and hasattr(e.reason, 'errno') and e.reason.errno == 8) if append_msg: msg = "{0}. requested URL: {1}".format(e.reason.strerror, remote_url) e.reason.strerror = msg e.reason.args = (e.reason.errno, msg) raise e # This isn't supposed to happen, but occasionally a socket.timeout gets # through. It's supposed to be caught in `urrlib2` and raised in this # way, but for some reason in mysterious circumstances it doesn't. So # we'll just re-raise it here instead. except socket.timeout as e: raise URLError(e)
def download_file(self, data_product, local_path, cache=True): """ Takes a data product in the form of an `~astropy.table.Row` and downloads it from the cloud into the given directory. Parameters ---------- data_product : `~astropy.table.Row` Product to download. local_path : str The local filename to which toe downloaded file will be saved. cache : bool Default is True. If file is found on disc it will not be downloaded again. """ s3 = self.boto3.resource('s3', config=self.config) s3_client = self.boto3.client('s3', config=self.config) bkt = s3.Bucket(self.pubdata_bucket) with warnings.catch_warnings(): warnings.simplefilter("ignore") bucket_path = self.get_cloud_uri(data_product, False) if not bucket_path: raise Exception("Unable to locate file {}.".format( data_product['productFilename'])) # Ask the webserver (in this case S3) what the expected content length is and use that. info_lookup = s3_client.head_object(Bucket=self.pubdata_bucket, Key=bucket_path) length = info_lookup["ContentLength"] if cache and os.path.exists(local_path): if length is not None: statinfo = os.stat(local_path) if statinfo.st_size != length: log.warning("Found cached file {0} with size {1} that is " "different from expected size {2}".format( local_path, statinfo.st_size, length)) else: log.info( "Found cached file {0} with expected size {1}.".format( local_path, statinfo.st_size)) return with ProgressBarOrSpinner( length, ('Downloading URL s3://{0}/{1} to {2} ...'.format( self.pubdata_bucket, bucket_path, local_path))) as pb: # Bytes read tracks how much data has been received so far # This variable will be updated in multiple threads below global bytes_read bytes_read = 0 progress_lock = threading.Lock() def progress_callback(numbytes): # Boto3 calls this from multiple threads pulling the data from S3 global bytes_read # This callback can be called in multiple threads # Access to updating the console needs to be locked with progress_lock: bytes_read += numbytes pb.update(bytes_read) bkt.download_file(bucket_path, local_path, Callback=progress_callback)
def _download_file(self, url, local_filepath, timeout=None, auth=None, continuation=True, cache=False, **kwargs): """ Download a file. Resembles `astropy.utils.data.download_file` but uses the local ``_session`` """ response = self._session.get(url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() if 'content-length' in response.headers: length = int(response.headers['content-length']) else: length = None if ((os.path.exists(local_filepath) and ('Accept-Ranges' in response.headers) and continuation)): open_mode = 'ab' existing_file_length = os.stat(local_filepath).st_size if length is not None and existing_file_length >= length: # all done! log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, existing_file_length)) return elif existing_file_length == 0: open_mode = 'wb' else: log.info("Continuing download of file {0}, with {1} bytes to " "go ({2}%)".format( local_filepath, length - existing_file_length, (length - existing_file_length) / length * 100)) # bytes are indexed from 0: # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header end = "{0}".format(length - 1) if length is not None else "" self._session.headers['Range'] = "bytes={0}-{1}".format( existing_file_length, end) response = self._session.get(url, timeout=timeout, stream=True, auth=auth, **kwargs) elif cache and os.path.exists(local_filepath): if length is not None: statinfo = os.stat(local_filepath) if statinfo.st_size != length: log.warning("Found cached file {0} with size {1} that is " "different from expected size {2}".format( local_filepath, statinfo.st_size, length)) open_mode = 'wb' else: log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, statinfo.st_size)) response.close() return else: log.info("Found cached file {0}.".format(local_filepath)) response.close() return else: open_mode = 'wb' blocksize = astropy.utils.data.conf.download_block_size bytes_read = 0 # Only show progress bar if logging level is INFO or lower. if log.getEffectiveLevel() <= 20: progress_stream = None # Astropy default else: progress_stream = io.StringIO() with ProgressBarOrSpinner( length, ('Downloading URL {0} to {1} ...'.format(url, local_filepath)), file=progress_stream) as pb: with open(local_filepath, open_mode) as f: for block in response.iter_content(blocksize): f.write(block) bytes_read += blocksize if length is not None: pb.update( bytes_read if bytes_read <= length else length) else: pb.update(bytes_read) response.close()
def fetch(channels, start, end, type=None, dtype=None, allow_tape=None, connection=None, host=None, port=None, pad=None, verbose=False, series_class=TimeSeries): # host and port keywords are used by the decorator only # pylint: disable=unused-argument """Fetch a dict of data series from NDS2 This method sits underneath `TimeSeries.fetch` and related methods, and isn't really designed to be called directly. """ # set ALLOW_DATA_ON_TAPE if allow_tape is not None: set_parameter(connection, 'ALLOW_DATA_ON_TAPE', str(allow_tape), verbose=verbose) type = _parse_nds_enum_dict_param(channels, 'type', type) dtype = _parse_nds_enum_dict_param(channels, 'dtype', dtype) # verify channels exist print_verbose("Checking channels list against NDS2 database...", end=' ', verbose=verbose) utype = reduce(operator.or_, type.values()) # logical OR of types udtype = reduce(operator.or_, dtype.values()) ndschannels = io_nds2.find_channels(channels, connection=connection, type=utype, dtype=udtype, unique=True, epoch=(start, end)) names = ['%s,%s' % (c.name, c.channel_type_to_string(c.channel_type)) for c in ndschannels] print_verbose('done', verbose=verbose) # handle minute trend timing if (any(c.endswith('m-trend') for c in names) and (start % 60 or end % 60)): warnings.warn("Requested at least one minute trend, but " "start and stop GPS times are not multiples of " "60. Times will be expanded outwards to compensate") start, end = io_nds2.minute_trend_times(start, end) # get data availability span = SegmentList([Segment(start, end)]) if pad is None: qsegs = span gap = None else: print_verbose("Querying for data availability...", end=' ', verbose=verbose) gap = 'pad' qsegs = io_nds2.get_availability( ndschannels, start, end, connection=connection).intersection() qsegs &= span print_verbose('done\nFound {0} viable segments of data with {1}%% ' 'coverage'.format(len(qsegs), abs(qsegs) / abs(span) * 100), verbose=verbose) if span - qsegs: warnings.warn("Gaps were found in data available from {0}, " "but will be padded with {1}".format( connection.get_host(), pad)) # query for each segment out = series_class.DictClass() for seg in qsegs: duration = seg[1] - seg[0] msg = 'Downloading data ({}-{} | {}s):'.format( seg[1], seg[0], duration) stream = sys.stdout if verbose else StringIO() count = 0 with ProgressBarOrSpinner(duration, msg, file=stream) as bar: for buffers in connection.iterate(int(seg[0]), int(seg[1]), names): for buffer_, chan in zip(buffers, channels): series = series_class.from_nds2_buffer(buffer_) out.append({chan: series}, pad=pad, gap=gap) count += buffer_.length / buffer_.channel.sample_rate bar.update(count) return out
def download_file(remote_url, cache=False, show_progress=True, timeout=None): """ Accepts a URL, downloads and optionally caches the result returning the filename, with a name determined by the file's MD5 hash. If ``cache=True`` and the file is present in the cache, just returns the filename. Parameters ---------- remote_url : str The URL of the file to download cache : bool, optional Whether to use the cache show_progress : bool, optional Whether to display a progress bar during the download (default is `True`). Regardless of this setting, the progress bar is only displayed when outputting to a terminal. timeout : float, optional The timeout, in seconds. Otherwise, use `astropy.utils.data.Conf.remote_timeout`. Returns ------- local_path : str Returns the local path that the file was download to. Raises ------ urllib2.URLError, urllib.error.URLError Whenever there's a problem getting the remote file. """ from astropy.utils.console import ProgressBarOrSpinner if timeout is None: timeout = conf.remote_timeout missing_cache = False if cache: try: dldir, urlmapfn = _get_download_cache_locs() except OSError as e: msg = 'Remote data cache could not be accessed due to ' estr = '' if len(e.args) < 1 else (': ' + str(e)) warn(CacheMissingWarning(msg + e.__class__.__name__ + estr)) cache = False missing_cache = True # indicates that the cache is missing to raise a warning later url_key = remote_url # Check if URL is Astropy data server, which has alias, and cache it. if (url_key.startswith(conf.dataurl) and conf.dataurl not in _dataurls_to_alias): try: with urllib.request.urlopen(conf.dataurl, timeout=timeout) as remote: _dataurls_to_alias[conf.dataurl] = [ conf.dataurl, remote.geturl() ] except urllib.error.URLError: # Host unreachable _dataurls_to_alias[conf.dataurl] = [conf.dataurl] try: if cache: # We don't need to acquire the lock here, since we are only reading with shelve.open(urlmapfn) as url2hash: if url_key in url2hash: return url2hash[url_key] # If there is a cached copy from mirror, use it. else: for cur_url in _dataurls_to_alias.get(conf.dataurl, []): if url_key.startswith(cur_url): url_mirror = url_key.replace( cur_url, conf.dataurl_mirror) if url_mirror in url2hash: return url2hash[url_mirror] with urllib.request.urlopen(remote_url, timeout=timeout) as remote: # keep a hash to rename the local file to the hashed name hash = hashlib.md5() info = remote.info() if 'Content-Length' in info: try: size = int(info['Content-Length']) except ValueError: size = None else: size = None if size is not None: check_free_space_in_dir(gettempdir(), size) if cache: check_free_space_in_dir(dldir, size) if show_progress and sys.stdout.isatty(): progress_stream = sys.stdout else: progress_stream = io.StringIO() dlmsg = "Downloading {0}".format(remote_url) with ProgressBarOrSpinner(size, dlmsg, file=progress_stream) as p: with NamedTemporaryFile(delete=False) as f: try: bytes_read = 0 block = remote.read(conf.download_block_size) while block: f.write(block) hash.update(block) bytes_read += len(block) p.update(bytes_read) block = remote.read(conf.download_block_size) except BaseException: if os.path.exists(f.name): os.remove(f.name) raise if cache: _acquire_download_cache_lock() try: with shelve.open(urlmapfn) as url2hash: # We check now to see if another process has # inadvertently written the file underneath us # already if url_key in url2hash: return url2hash[url_key] local_path = os.path.join(dldir, hash.hexdigest()) shutil.move(f.name, local_path) url2hash[url_key] = local_path finally: _release_download_cache_lock() else: local_path = f.name if missing_cache: msg = ('File downloaded to temporary location due to problem ' 'with cache directory and will not be cached.') warn(CacheMissingWarning(msg, local_path)) if conf.delete_temporary_downloads_at_exit: global _tempfilestodel _tempfilestodel.append(local_path) except urllib.error.URLError as e: if hasattr(e, 'reason') and hasattr(e.reason, 'errno') and e.reason.errno == 8: e.reason.strerror = e.reason.strerror + '. requested URL: ' + remote_url e.reason.args = (e.reason.errno, e.reason.strerror) raise e except socket.timeout as e: # this isn't supposed to happen, but occasionally a socket.timeout gets # through. It's supposed to be caught in `urrlib2` and raised in this # way, but for some reason in mysterious circumstances it doesn't. So # we'll just re-raise it here instead raise urllib.error.URLError(e) return local_path
def _download_file(self, url, local_filepath, timeout=None, auth=None, continuation=True, cache=False, method="GET", head_safe=False, **kwargs): """ Download a file. Resembles `astropy.utils.data.download_file` but uses the local ``_session`` Parameters ---------- url : string local_filepath : string timeout : int auth : dict or None continuation : bool If the file has already been partially downloaded *and* the server supports HTTP "range" requests, the download will be continued where it left off. cache : bool method : "GET" or "POST" head_safe : bool """ if head_safe: response = self._session.request("HEAD", url, timeout=timeout, stream=True, auth=auth, **kwargs) else: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() if 'content-length' in response.headers: length = int(response.headers['content-length']) if length == 0: log.warn('URL {0} has length=0'.format(url)) else: length = None if ((os.path.exists(local_filepath) and ('Accept-Ranges' in response.headers) and continuation)): open_mode = 'ab' existing_file_length = os.stat(local_filepath).st_size if length is not None and existing_file_length >= length: # all done! log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, existing_file_length)) return elif existing_file_length == 0: open_mode = 'wb' else: log.info("Continuing download of file {0}, with {1} bytes to " "go ({2}%)".format( local_filepath, length - existing_file_length, (length - existing_file_length) / length * 100)) # bytes are indexed from 0: # https://en.wikipedia.org/wiki/List_of_HTTP_header_fields#range-request-header end = "{0}".format(length - 1) if length is not None else "" self._session.headers['Range'] = "bytes={0}-{1}".format( existing_file_length, end) response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() del self._session.headers['Range'] elif cache and os.path.exists(local_filepath): if length is not None: statinfo = os.stat(local_filepath) if statinfo.st_size != length: log.warning("Found cached file {0} with size {1} that is " "different from expected size {2}".format( local_filepath, statinfo.st_size, length)) open_mode = 'wb' else: log.info( "Found cached file {0} with expected size {1}.".format( local_filepath, statinfo.st_size)) response.close() return else: log.info("Found cached file {0}.".format(local_filepath)) response.close() return else: open_mode = 'wb' if head_safe: response = self._session.request(method, url, timeout=timeout, stream=True, auth=auth, **kwargs) response.raise_for_status() blocksize = astropy.utils.data.conf.download_block_size log.debug( f"Downloading URL {url} to {local_filepath} with size {length} " f"by blocks of {blocksize}") bytes_read = 0 # Only show progress bar if logging level is INFO or lower. if log.getEffectiveLevel() <= 20: progress_stream = None # Astropy default else: progress_stream = io.StringIO() with ProgressBarOrSpinner( length, ('Downloading URL {0} to {1} ...'.format(url, local_filepath)), file=progress_stream) as pb: with open(local_filepath, open_mode) as f: for block in response.iter_content(blocksize): f.write(block) bytes_read += blocksize if length is not None: pb.update( bytes_read if bytes_read <= length else length) else: pb.update(bytes_read) response.close() return response