def get_us_midlatitude_cyclone_abi(base_dir=None, method=None, force=False): """Get GOES-16 ABI (CONUS sector) data from 2019-03-14 00:00Z. Args: base_dir (str): Base directory for downloaded files. method (str): Force download method for the data if not already cached. Allowed options are: 'gcsfs'. Default of ``None`` will choose the best method based on environment settings. force (bool): Force re-download of data regardless of its existence on the local system. Warning: May delete non-demo files stored in download directory. Total size: ~110MB """ base_dir = base_dir or config.get('demo_data_dir', '.') if method is None: method = 'gcsfs' if method not in ['gcsfs']: raise NotImplementedError("Demo data download method '{}' not " "implemented yet.".format(method)) from ._google_cloud_platform import get_bucket_files patterns = [ 'gs://gcp-public-data-goes-16/ABI-L1b-RadC/2019/073/00/*s20190730002*.nc' ] subdir = os.path.join(base_dir, 'abi_l1b', '20190314_us_midlatitude_cyclone') os.makedirs(subdir, exist_ok=True) filenames = get_bucket_files(patterns, subdir, force=force) assert len(filenames) == 16, "Not all files could be downloaded" return filenames
def download_typhoon_surigae_ahi(base_dir=None, channels=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), segments=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)): """Download Himawari 8 data. This scene shows the Typhoon Surigae. """ import s3fs base_dir = base_dir or config.get('demo_data_dir', '.') channel_resolution = {1: 10, 2: 10, 3: 5, 4: 10} data_files = [] for channel in channels: resolution = channel_resolution.get(channel, 20) for segment in segments: data_files.append( f"HS_H08_20210417_0500_B{channel:02d}_FLDK_R{resolution:02d}_S{segment:02d}10.DAT.bz2" ) subdir = os.path.join(base_dir, 'ahi_hsd', '20210417_0500_typhoon_surigae') os.makedirs(subdir, exist_ok=True) fs = s3fs.S3FileSystem(anon=True) result = [] for filename in data_files: destination_filename = os.path.join(subdir, filename) result.append(destination_filename) if os.path.exists(destination_filename): continue to_get = 'noaa-himawari8/AHI-L1b-FLDK/2021/04/17/0500/' + filename fs.get_file(to_get, destination_filename) return result
def get_viirs_sdr_20170128_1229( base_dir=None, channels=("I01", "I02", "I03", "I04", "I05", "M01", "M02", "M03", "M04", "M05", "M06", "M07", "M08", "M09", "M10", "M11", "M12", "M13", "M14", "M15", "M16", "DNB"), granules=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)): r"""Get VIIRS SDR files for 2017-01-28 12:29 to 12:43. These files are downloaded from Zenodo. You can see the full file listing here: https://zenodo.org/record/263296 Specific channels can be specified with the ``channels`` keyword argument. By default, all channels (all I bands, M bands, and DNB bands) will be downloaded. Channels are referred to by their band type and channel number (ex. "I01" or "M16" or "DNB"). Terrain-corrected geolocation files are always downloaded when the corresponding band data is specified. The ``granules`` argument will control which granules ("time steps") are downloaded. There are 10 available and the keyword argument can be specified as a tuple of integers from 1 to 10. This full dataset is ~10.1GB. Notes: File list was retrieved using the zenodo API. .. code-block:: python import requests viirs_listing = requests.get("https://zenodo.org/api/records/263296") viirs_dict = json.loads(viirs_listing.content) print("\n".join(sorted(x['links']['self'] for x in viirs_dict['files']))) """ base_dir = base_dir or config.get("demo_data_dir", ".") subdir = os.path.join(base_dir, "viirs_sdr", "20170128_1229") os.makedirs(subdir, exist_ok=True) urls = (ZENODO_BASE_URL + fn for fn in _get_filenames_to_download(channels, granules)) files = [] for url in urls: target = os.path.join(subdir, os.path.basename(url)) files.append(target) if os.path.isfile(target): logger.info(f"File {target} already exists, skipping...") continue logger.info(f"Downloading file to {target}...") download_url(url, target) return files
def download_seviri_hrit_20180228_1500(base_dir=None, subset=None): """Download the SEVIRI HRIT files for 2018-02-28T15:00. *subset* is a dictionary with the channels as keys and granules to download as values, eg:: {"HRV": [1, 2, 3], "IR_108": [1, 2], "EPI": None} """ files = generate_subset_of_filenames(subset) base_dir = base_dir or config.get("demo_data_dir", ".") subdir = os.path.join(base_dir, "seviri_hrit", "20180228_1500") os.makedirs(subdir, exist_ok=True) targets = [] for the_file in files: target = os.path.join(subdir, the_file) targets.append(target) if os.path.isfile(target): continue download_url(ZENODO_BASE_URL + the_file, target) return targets
def get_hurricane_florence_abi(base_dir=None, method=None, force=False, channels=None, num_frames=10): """Get GOES-16 ABI (Meso sector) data from 2018-09-11 13:00Z to 17:00Z. Args: base_dir (str): Base directory for downloaded files. method (str): Force download method for the data if not already cached. Allowed options are: 'gcsfs'. Default of ``None`` will choose the best method based on environment settings. force (bool): Force re-download of data regardless of its existence on the local system. Warning: May delete non-demo files stored in download directory. channels (list): Channels to include in download. Defaults to all 16 channels. num_frames (int or slice): Number of frames to download. Maximum 240 frames. Default 10 frames. Size per frame (all channels): ~15MB Total size (default 10 frames, all channels): ~124MB Total size (240 frames, all channels): ~3.5GB """ base_dir = base_dir or config.get('demo_data_dir', '.') if channels is None: channels = range(1, 17) if method is None: method = 'gcsfs' if method not in ['gcsfs']: raise NotImplementedError("Demo data download method '{}' not " "implemented yet.".format(method)) if isinstance(num_frames, (int, float)): frame_slice = slice(0, num_frames) else: frame_slice = num_frames from ._google_cloud_platform import get_bucket_files patterns = [] for channel in channels: # patterns += ['gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/1[3456]/' # '*C{:02d}*s20182541[3456]*.nc'.format(channel)] patterns += [( 'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/13/*RadM1*C{:02d}*s201825413*.nc' .format(channel), 'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/14/*RadM1*C{:02d}*s201825414*.nc' .format(channel), 'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/15/*RadM1*C{:02d}*s201825415*.nc' .format(channel), 'gs://gcp-public-data-goes-16/ABI-L1b-RadM/2018/254/16/*RadM1*C{:02d}*s201825416*.nc' .format(channel), )] subdir = os.path.join(base_dir, 'abi_l1b', '20180911_hurricane_florence_abi_l1b') os.makedirs(subdir, exist_ok=True) filenames = get_bucket_files(patterns, subdir, force=force, pattern_slice=frame_slice) actual_slice = frame_slice.indices(240) # 240 max frames num_frames = int((actual_slice[1] - actual_slice[0]) / actual_slice[2]) assert len(filenames) == len( channels) * num_frames, "Not all files could be downloaded" return filenames
def get_fci_test_data_dir(base_dir=None): """Get directory for FCI test data.""" base_dir = base_dir or config.get("demo_data_dir", ".") return pathlib.Path(base_dir) / "fci" / "test_data"