def download_and_update_by_brief_info(self, brief_info): # pickle_dir = self.SFMR_CONFIG['dirs']['brief_info'] # pickle_path = f'{pickle_dir}brief_info.pkl' # with open(pickle_path, 'rb') as f: # brief_info = pickle.load(f) # Download all SFMR files during period utils.setup_signal_handler() utils.set_format_custom_text(self.SFMR_CONFIG['data_name_length']) self.download_with_brief_info(brief_info) # Read all of them to update brief information utils.reset_signal_handler() brief_info = self.update_brief_info(brief_info) brief_info_list = [] for year in brief_info: for info in brief_info[year]: if info is not None: brief_info_list.append(info) else: brief_info[year].remove(info) utils.bulk_insert_avoid_duplicate_unique( brief_info_list, self.CONFIG['database']['batch_size']['insert'], SFMRDetail, ['filename'], self.session, check_self=True) self.brief_info = brief_info return True
def download(self, basin): """Download IBTrACS data. """ self.logger.info('Downloading IBTrACS') utils.setup_signal_handler() utils.set_format_custom_text( self.CONFIG['ibtracs']['data_name_length']) # url = self.CONFIG['ibtracs']['urls']['since1980'] url = self.CONFIG['ibtracs']['urls'][basin] file = url.split('/')[-1] file = file[:-3].replace('.', '_') + '.nc' dir = self.CONFIG['ibtracs']['dirs'] os.makedirs(dir, exist_ok=True) self.ibtracs_file_path = f'{dir}{file}' utils.download(url, self.ibtracs_file_path, progress=True)
def _download_all_stdmet_data(self): """Download Continuous Wind data into single directory. """ self.logger.info(self.STDMET_CONFIG['prompt']['info']\ ['download_data']) utils.set_format_custom_text(self.STDMET_CONFIG['data_name_length']) total = 0 count = 0 for year in self.years: total += len(self.year_station[year]) for year in self.years: for stn in self.year_station[year]: self._download_single_stdmet_data(stn, year) count += 1 info = f'Downloading {year} stdmet data of station {stn}' self.logger.debug(info) print((f'\r{info} ({count}/{total})'), end='') utils.delete_last_lines() print('Done')
def _download_sfmr_data(self): """Download SFMR data of hurricanes. Parameters ---------- None Nothing is required by this function. Returns ------- hit_times : dict Times of hurricane NetCDF file's date being in period. """ self.logger.info(self.SFMR_CONFIG['prompt']['info']\ ['download_hurr']) utils.set_format_custom_text(self.SFMR_CONFIG['data_name_length']) suffix = '.nc' save_root_dir = self.SFMR_CONFIG['dirs']['hurr'] os.makedirs(save_root_dir, exist_ok=True) total = 0 count = 0 for year in self.year_hurr.keys(): total += len(self.year_hurr[year]) for year in self.year_hurr.keys(): hurrs = list(self.year_hurr[year]) for hurr in hurrs: count += 1 info = (f'Download SFMR data of hurricane {hurr} ' + f'in {year}') self.logger.debug(info) if count > 1: utils.delete_last_lines() print(f'\r{info} ({count}/{total})', end='') # Create directory to store SFMR files dir_path = f'{save_root_dir}{year}/{hurr}/' os.makedirs(dir_path, exist_ok=True) # Generate keyword to consist url keyword = f'{hurr}{year}' url = (f'{self.SFMR_CONFIG["urls"]["prefix"]}' + f'{keyword}' + f'{self.SFMR_CONFIG["urls"]["suffix"]}') # Get page according to url page = requests.get(url) data = page.text soup = bs4.BeautifulSoup(data, features='lxml') anchors = soup.find_all('a') # Times of NetCDF file's date being in period for link in anchors: href = link.get('href') # Find href of netcdf file if href.endswith(suffix): # Extract file name filename = href.split('/')[-1] tail_half = filename.split('SFMR')[1] try: # There may be NetCDF name format # like 'USAF_SFMR0809221638.nc' # from 'https://www.aoml.noaa.gov/hrd' # '/Storm_pages/kyle2008/sfmr.html' # It is very annoying and there seems # no simple rule to check this problem. # Because it hard to distinguish # 'SFMR20110536' and 'SFMR20110524'. # First one is the case as kyle2008, its # actually date is 2020/11/05. # Second one is a normal case, its # actually date is 2011/05/24. # Before 2020, following rule may work. if (tail_half.startswith('20') or tail_half.startswith('199')): date_str = tail_half[:8] date_ = datetime.date(int(date_str[:4]), int(date_str[4:6]), int(date_str[6:])) else: date_str = tail_half[:6] date_ = datetime.date(int(f'20{date_str[:2]}'), int(date_str[2:4]), int(date_str[4:])) filename = ( f'{filename.split("SFMR")[0]}SFMR20' + f'{filename.split("SFMR")[1]}') except Exception as msg: breakpoint() exit(msg) if not utils.check_period(date_, self.period): continue file_path = dir_path + filename utils.download(href, file_path) utils.delete_last_lines() print('Done')
def __init__(self, CONFIG, period, region, passwd, work_mode): self.CONFIG = CONFIG self.period = period self.region = region self.db_root_passwd = passwd self.engine = None self.session = None self.logger = logging.getLogger(__name__) utils.setup_database(self, Base) self.lat1, self.lat2 = region[0], region[1] self.lon1, self.lon2 = region[2], region[3] self.spa_resolu = dict() self.spa_resolu['ccmp'] = self.CONFIG['ccmp']\ ['spatial_resolution'] self.spa_resolu['grid'] = self.CONFIG['grid']\ ['spatial_resolution'] self.grid_pts = dict() self.grid_pts['ccmp'] = dict() self.grid_pts['ccmp']['lat'] = [ y * self.spa_resolu['ccmp'] - 78.375 for y in range(self.CONFIG['ccmp']['lat_grid_points_number']) ] self.grid_pts['ccmp']['lon'] = [ x * self.spa_resolu['ccmp'] + 0.125 for x in range(self.CONFIG['ccmp']['lon_grid_points_number']) ] self.grid_lons = None self.grid_lats = None self.grid_x = None self.grid_y = None # Load 4 variables above utils.load_grid_lonlat_xy(self) self.zorders = self.CONFIG['plot']['zorders']['scs_basemap'] self._get_region_corners_indices() self.root_url = self.CONFIG['ccmp']['url'] self.filename_prefix = self.CONFIG['ccmp']['filename']\ ['prefix'] self.filename_suffix = self.CONFIG['ccmp']['filename']\ ['suffix'] self.root_dir = self.CONFIG['ccmp']['dir'] utils.set_format_custom_text(self.CONFIG['ccmp']['filename_length']) if work_mode == 'fetch': self.download('tc') self.read() elif work_mode == 'compare': self.compare_ccmp_with_ibtracs() elif work_mode == 'fetch_and_compare': self.download('tc') self.read() self.compare_ccmp_with_ibtracs()