Exemple #1
0
    def download_and_update_by_brief_info(self, brief_info):
        # pickle_dir = self.SFMR_CONFIG['dirs']['brief_info']
        # pickle_path = f'{pickle_dir}brief_info.pkl'

        # with open(pickle_path, 'rb') as f:
        #     brief_info = pickle.load(f)

        # Download all SFMR files during period
        utils.setup_signal_handler()
        utils.set_format_custom_text(self.SFMR_CONFIG['data_name_length'])
        self.download_with_brief_info(brief_info)

        # Read all of them to update brief information
        utils.reset_signal_handler()
        brief_info = self.update_brief_info(brief_info)

        brief_info_list = []
        for year in brief_info:
            for info in brief_info[year]:
                if info is not None:
                    brief_info_list.append(info)
                else:
                    brief_info[year].remove(info)

        utils.bulk_insert_avoid_duplicate_unique(
            brief_info_list,
            self.CONFIG['database']['batch_size']['insert'],
            SFMRDetail, ['filename'],
            self.session,
            check_self=True)

        self.brief_info = brief_info

        return True
Exemple #2
0
    def download(self, basin):
        """Download IBTrACS data.

        """
        self.logger.info('Downloading IBTrACS')
        utils.setup_signal_handler()
        utils.set_format_custom_text(
            self.CONFIG['ibtracs']['data_name_length'])

        # url = self.CONFIG['ibtracs']['urls']['since1980']
        url = self.CONFIG['ibtracs']['urls'][basin]
        file = url.split('/')[-1]
        file = file[:-3].replace('.', '_') + '.nc'
        dir = self.CONFIG['ibtracs']['dirs']
        os.makedirs(dir, exist_ok=True)
        self.ibtracs_file_path = f'{dir}{file}'

        utils.download(url, self.ibtracs_file_path, progress=True)
Exemple #3
0
    def _download_all_stdmet_data(self):
        """Download Continuous Wind data into single directory.

        """
        self.logger.info(self.STDMET_CONFIG['prompt']['info']\
                         ['download_data'])
        utils.set_format_custom_text(self.STDMET_CONFIG['data_name_length'])
        total = 0
        count = 0
        for year in self.years:
            total += len(self.year_station[year])

        for year in self.years:
            for stn in self.year_station[year]:
                self._download_single_stdmet_data(stn, year)
                count += 1
                info = f'Downloading {year} stdmet data of station {stn}'
                self.logger.debug(info)
                print((f'\r{info} ({count}/{total})'), end='')
        utils.delete_last_lines()
        print('Done')
Exemple #4
0
    def _download_sfmr_data(self):
        """Download SFMR data of hurricanes.

        Parameters
        ----------
        None
            Nothing is required by this function.

        Returns
        -------
        hit_times : dict
            Times of hurricane NetCDF file's date being in period.

        """
        self.logger.info(self.SFMR_CONFIG['prompt']['info']\
                         ['download_hurr'])
        utils.set_format_custom_text(self.SFMR_CONFIG['data_name_length'])
        suffix = '.nc'
        save_root_dir = self.SFMR_CONFIG['dirs']['hurr']
        os.makedirs(save_root_dir, exist_ok=True)

        total = 0
        count = 0
        for year in self.year_hurr.keys():
            total += len(self.year_hurr[year])

        for year in self.year_hurr.keys():
            hurrs = list(self.year_hurr[year])
            for hurr in hurrs:
                count += 1
                info = (f'Download SFMR data of hurricane {hurr} ' +
                        f'in {year}')
                self.logger.debug(info)
                if count > 1:
                    utils.delete_last_lines()
                print(f'\r{info} ({count}/{total})', end='')

                # Create directory to store SFMR files
                dir_path = f'{save_root_dir}{year}/{hurr}/'
                os.makedirs(dir_path, exist_ok=True)
                # Generate keyword to consist url
                keyword = f'{hurr}{year}'
                url = (f'{self.SFMR_CONFIG["urls"]["prefix"]}' + f'{keyword}' +
                       f'{self.SFMR_CONFIG["urls"]["suffix"]}')
                # Get page according to url
                page = requests.get(url)
                data = page.text
                soup = bs4.BeautifulSoup(data, features='lxml')
                anchors = soup.find_all('a')

                # Times of NetCDF file's date being in period
                for link in anchors:
                    href = link.get('href')
                    # Find href of netcdf file
                    if href.endswith(suffix):
                        # Extract file name
                        filename = href.split('/')[-1]
                        tail_half = filename.split('SFMR')[1]
                        try:
                            # There may be NetCDF name format
                            # like 'USAF_SFMR0809221638.nc'
                            # from 'https://www.aoml.noaa.gov/hrd'
                            # '/Storm_pages/kyle2008/sfmr.html'
                            # It is very annoying and there seems
                            # no simple rule to check this problem.
                            # Because it hard to distinguish
                            # 'SFMR20110536' and 'SFMR20110524'.
                            # First one is the case as kyle2008, its
                            # actually date is 2020/11/05.
                            # Second one is a normal case, its
                            # actually date is 2011/05/24.
                            # Before 2020, following rule may work.
                            if (tail_half.startswith('20')
                                    or tail_half.startswith('199')):
                                date_str = tail_half[:8]
                                date_ = datetime.date(int(date_str[:4]),
                                                      int(date_str[4:6]),
                                                      int(date_str[6:]))
                            else:
                                date_str = tail_half[:6]
                                date_ = datetime.date(int(f'20{date_str[:2]}'),
                                                      int(date_str[2:4]),
                                                      int(date_str[4:]))
                                filename = (
                                    f'{filename.split("SFMR")[0]}SFMR20' +
                                    f'{filename.split("SFMR")[1]}')
                        except Exception as msg:
                            breakpoint()
                            exit(msg)
                        if not utils.check_period(date_, self.period):
                            continue
                        file_path = dir_path + filename

                        utils.download(href, file_path)

        utils.delete_last_lines()
        print('Done')
Exemple #5
0
    def __init__(self, CONFIG, period, region, passwd, work_mode):
        self.CONFIG = CONFIG
        self.period = period
        self.region = region
        self.db_root_passwd = passwd
        self.engine = None
        self.session = None

        self.logger = logging.getLogger(__name__)
        utils.setup_database(self, Base)

        self.lat1, self.lat2 = region[0], region[1]
        self.lon1, self.lon2 = region[2], region[3]

        self.spa_resolu = dict()

        self.spa_resolu['ccmp'] = self.CONFIG['ccmp']\
                ['spatial_resolution']
        self.spa_resolu['grid'] = self.CONFIG['grid']\
                ['spatial_resolution']

        self.grid_pts = dict()

        self.grid_pts['ccmp'] = dict()
        self.grid_pts['ccmp']['lat'] = [
            y * self.spa_resolu['ccmp'] - 78.375
            for y in range(self.CONFIG['ccmp']['lat_grid_points_number'])
        ]
        self.grid_pts['ccmp']['lon'] = [
            x * self.spa_resolu['ccmp'] + 0.125
            for x in range(self.CONFIG['ccmp']['lon_grid_points_number'])
        ]

        self.grid_lons = None
        self.grid_lats = None
        self.grid_x = None
        self.grid_y = None
        # Load 4 variables above
        utils.load_grid_lonlat_xy(self)

        self.zorders = self.CONFIG['plot']['zorders']['scs_basemap']
        self._get_region_corners_indices()

        self.root_url = self.CONFIG['ccmp']['url']
        self.filename_prefix = self.CONFIG['ccmp']['filename']\
                ['prefix']
        self.filename_suffix = self.CONFIG['ccmp']['filename']\
                ['suffix']
        self.root_dir = self.CONFIG['ccmp']['dir']

        utils.set_format_custom_text(self.CONFIG['ccmp']['filename_length'])

        if work_mode == 'fetch':
            self.download('tc')
            self.read()
        elif work_mode == 'compare':
            self.compare_ccmp_with_ibtracs()
        elif work_mode == 'fetch_and_compare':
            self.download('tc')
            self.read()
            self.compare_ccmp_with_ibtracs()