Exemple #1
0
    def update_brief_info(self, brief_info):
        self.logger.info((f"""Updating brief information of SFMR"""))
        root_dir = self.SFMR_CONFIG['dirs']['hurr']

        for year in brief_info:
            files_num_in_the_year = len(brief_info[year])
            count = 0

            for idx, info in enumerate(brief_info[year]):
                count += 1
                print(f'\r{count}/{files_num_in_the_year} in {year}', end='')
                file_dir = f'{root_dir}{year}/{info.hurr_name}/'
                file_path = f'{file_dir}{info.filename}'

                updated_info = self.update_single_info_with_nc_file(
                    info, file_path)
                brief_info[year][idx] = updated_info

        # Remove none info
        for year in brief_info:
            for idx, info in enumerate(brief_info[year]):
                if info is None:
                    brief_info[year].remove(info)

        utils.delete_last_lines()
        print('Done')

        return brief_info
Exemple #2
0
    def gen_sfmr_brief_info(self):
        self.logger.info((f"""Generating brief information of SFMR"""))
        latest_year = self.get_sfmr_latest_year()
        start_year = max(self.SFMR_CONFIG['period_limit']['start'].year,
                         self.period[0].year)
        end_year = min(self.SFMR_CONFIG['period_limit']['end'].year,
                       self.period[1].year, latest_year)

        if start_year > end_year:
            return None

        brief_info = dict()
        for year in range(start_year, end_year + 1):
            info = f'Finding hurricanes of year {year}'
            self.logger.debug(info)
            print(f'\r{info}', end='')

            if year < 1994:
                year_str = 'prior1994'
            elif year == latest_year:
                year_str = ''
            else:
                year_str = f'{year}'
            url = (f'{self.SFMR_CONFIG["urls"]["hurricane"][:-5]}' +
                   f'{year_str}.html')
            one_year_brief_info = self.get_one_year_sfmr_brief_info(url)
            brief_info[year] = one_year_brief_info

        utils.delete_last_lines()
        print('Done')

        return brief_info
Exemple #3
0
    def _download_all_station_info(self):
        """Download all self.stations' information into single directory.

        """
        self.logger.info(self.STDMET_CONFIG['prompt']['info']\
                         ['download_station'])
        total = len(self.stations)
        count = 0
        for stn in self.stations:
            count += 1
            info = f'Downloading information of stdmet station {stn}'
            self.logger.debug(info)
            print((f'\r{info} ({count}/{total})'), end='')

            i = 0
            while True:
                # download self.stations' information
                result = self._download_single_station_info(stn)
                if result != 'error':
                    break
                else:
                    # Only loop when cannot get html of stdmet station
                    # webpage
                    self.logger.error(self.STDMET_CONFIG['prompt']['error'] \
                          ['fail_download_station'] + stn)
                    i += 1
                    if i <= self.STDMET_CONFIG['retry_times']:
                        self.logger.info('reconnect: %d' % i)
                    else:
                        self.logger.critical(
                            self.STDMET_CONFIG['prompt']['info']\
                            ['skip_download_station'])
                        break
        utils.delete_last_lines()
        print('Done')
Exemple #4
0
    def download(self):
        utils.setup_signal_handler()
        self.no_data_count = dict()
        self.no_data_count['shapefile'] = 0
        self.no_data_count['gridded'] = 0

        self.year_tc = self._create_year_tc()

        self.logger.info(f'Downloading HWind data')

        total = 0
        count = 0
        for year in self.year_tc.keys():
            total += len(self.year_tc[year])

        for year in self.year_tc.keys():
            for tc in self.year_tc[year]:
                count += 1
                info = (f'Download HWind data of TC {tc.name} ' + f'in {year}')
                if count > 1:
                    utils.delete_last_lines()
                print(f'\r{info} ({count}/{total})', end='')

                for format in ['gridded']:
                    res = self._download_single_tc(
                        year, tc, self.CONFIG['hwind']['dirs'][format],
                        self.CONFIG['hwind']['data_link_text'][format])
                    if not res:
                        self.no_data_count[format] += 1

        utils.delete_last_lines()
        print('Done')
        print(self.no_data_count)
Exemple #5
0
    def _add_cwind_station_dis2coast(self):
        self.logger.info(('Adding column of distance to coast to table ' +
                          'of cwind station'))
        col_dis2coast = Column('distance_to_coast', Float())

        cwind_station_class = utils.get_class_by_tablename(
            self.engine, cwind.CwindStation.__tablename__)

        if not hasattr(cwind_station_class, col_dis2coast.name):
            utils.add_column(self.engine, cwind.CwindStation.__tablename__,
                             col_dis2coast)

        # Do NOT directly query cwind.CwindStation
        # Beacause due to some reason, its new column's value cannot
        # be added
        station_query = self.session.query(cwind_station_class)
        total = station_query.count()
        for idx, stn in enumerate(station_query):
            print(f'\r{stn.id} ({idx+1}/{total})', end='')
            stn.distance_to_coast = self._distance_from_coast(
                stn.latitude, stn.longitude)

        self.session.commit()

        utils.delete_last_lines()
        print()
Exemple #6
0
    def setup_grid(self):
        # Create grid table
        # Grid = self.create_grid_table()
        Base.metadata.create_all(self.engine)

        lons, lats = self.gen_lons_lats()

        xs = [x for x in range(len(lons))]
        ys = [y for y in range(len(lats))]

        save_pickle = [
            {'name': 'lons', 'var': lons},
            {'name': 'lats', 'var': lats},
            {'name': 'x', 'var': xs},
            {'name': 'y', 'var': ys}
        ]

        for name_var_pair in save_pickle:
            name = name_var_pair['name']
            var = name_var_pair['var']

            pickle_path = self.CONFIG['grid']['pickle'][name]
            os.makedirs(os.path.dirname(pickle_path), exist_ok=True)
            with open(pickle_path, 'wb') as f:
                pickle.dump(var, f)

        total = len(lons)
        half_edge = 0.5 * self.spa_resolu

        grid_pts = []
        self.logger.info(f'Generating grid')
        # Traverse lon
        for lon_idx, lon in enumerate(lons):
            print(f'\r{lon_idx+1}/{total}', end='')
            # Traverse lat
            for lat_idx, lat in enumerate(lats):
                # Cal y and x
                pt = Grid()
                pt.x = lon_idx
                pt.y = lat_idx
                pt.x_y = f'{pt.x}_{pt.y}'
                pt.lon = lon
                pt.lat = lat
                pt.lon1, pt.lon2 = pt.lon - half_edge, pt.lon + half_edge
                pt.lat1, pt.lat2 = pt.lat - half_edge, pt.lat + half_edge
                # Check whether the point is ocean or not
                pt.land = bool(globe.is_land(lat, lon))

                grid_pts.append(pt)

        utils.delete_last_lines()
        print('Done')
        # Bulk insert
        utils.bulk_insert_avoid_duplicate_unique(
            grid_pts, self.CONFIG['database']\
            ['batch_size']['insert'],
            Grid, ['x_y'], self.session,
            check_self=True)
Exemple #7
0
    def _analysis_and_save_relation(self):
        """Analysis and save relation between all years and stations
        from NDBC's Standard Meteorological Data webpage.

        """
        this_year = datetime.datetime.today().year

        if (os.path.exists(self.STDMET_CONFIG['vars_path']\
                           ['all_year_station'])
            and os.path.exists(self.STDMET_CONFIG['vars_path']\
                               ['all_station_year'])):
            relation_modified_datetime = dict()
            relation_modified_datetime['all_year_station'] = \
                    datetime.datetime.fromtimestamp(os.path.getmtime(
                        self.STDMET_CONFIG['vars_path']['all_year_station']))
            relation_modified_datetime['all_station_year'] = \
                    datetime.datetime.fromtimestamp(os.path.getmtime(
                        self.STDMET_CONFIG['vars_path']['all_station_year']))

            lastest_relation = True
            for key in relation_modified_datetime.keys():
                if relation_modified_datetime[key].year < this_year:
                    lastest_relation = False

            if lastest_relation:
                return

        self.all_year_station = dict()
        self.all_station_year = dict()

        start_year = self.STDMET_CONFIG['period_limit']['start'].year
        end_year = self.STDMET_CONFIG['period_limit']['end'].year
        if end_year > this_year:
            end_year = this_year
        self.all_years = [x for x in range(start_year, end_year+1)]
        self.all_stations = set()
        for year in self.all_years:
            info = f'Finding stations of year {year}'
            self.logger.debug(info)
            print(f'\r{info}', end='')

            stns = self._station_in_a_year(year)
            self.all_year_station[year] = stns
            self.all_stations.update(stns)

            for stn in stns:
                if not stn in self.all_station_year:
                    self.all_station_year[stn] = set()
                self.all_station_year[stn].add(year)
        utils.delete_last_lines()
        print('Done')

        # Save two dicts which store the relation between all years and
        # stations
        utils.save_relation(self.STDMET_CONFIG['vars_path']['all_year_station'],
                            self.all_year_station)
        utils.save_relation(self.STDMET_CONFIG['vars_path']['all_station_year'],
                            self.all_station_year)
Exemple #8
0
    def how_fast_tcs_intensity_change(self):
        self.logger.info('Calculating how fast TCs\' intensity change')

        # create table for recording
        TCIntensityChange = self.create_tc_intensity_change_table()

        table_rows = []
        for idx, tc in enumerate(self.tc_query):
            print(f'\r{idx+1}/{self.tc_query_num}', end='')
            # find next TC
            if idx < self.tc_query_num and tc.wind is not None:
                next_idx = idx + 1
                while ((next_idx < self.tc_query_num
                        and self.tc_query[next_idx].wind is None)):
                    next_idx += 1
                if next_idx == self.tc_query_num:
                    break

                next_tc = self.tc_query[next_idx]

                if tc.sid != next_tc.sid:
                    continue
            else:
                continue

            duration, shift = self.cal_before_speed(tc, next_tc)
            intensity_change, intensity_change_percent = \
                    self.cal_intensity_change(tc, next_tc)
            hours = duration / 60

            # record into table
            row = TCIntensityChange()
            row.sid = tc.sid
            row.name = tc.name
            row.basin = tc.basin
            row.start_datetime = tc.date_time
            row.duration_in_mins = duration
            row.shift_in_kms = shift
            row.intensity_change = intensity_change
            row.intensity_change_percent = intensity_change_percent
            row.intensity_change_per_hour = intensity_change / hours
            row.intensity_change_percent_per_hour = \
                    intensity_change_percent / hours
            row.sid_start_datetime = f'{tc.sid}_{tc.date_time}'

            table_rows.append(row)

        if len(table_rows):
            utils.bulk_insert_avoid_duplicate_unique(
                table_rows, self.CONFIG['database']\
                ['batch_size']['insert'],
                TCIntensityChange, ['sid_start_datetime'], self.session,
                check_self=True)

        utils.delete_last_lines()
        print('Done')
Exemple #9
0
    def _insert_station_info(self, read_all=False):
        self.logger.info(self.STDMET_CONFIG['prompt']['info']\
                         ['read_station'])
        min_lat, max_lat = self.region[0], self.region[1]
        min_lon, max_lon = self.region[2], self.region[3]
        station_info_dir = self.STDMET_CONFIG['dirs']['stations']

        station_files = []
        if not read_all:
            for file in os.listdir(station_info_dir):
                if not file.endswith('.txt'):
                    continue
                for year in self.years:
                    for stn in self.year_station[year]:
                        if file == f'{stn}.txt':
                            station_files.append(file)
                            break
                    if file in station_files:
                        break
        else:
            station_files = [x for x in os.listdir(station_info_dir) if
                             x.endswith('.txt')]

        all_stations = []
        total = len(station_files)
        count = 0
        for filename in station_files:
            count += 1
            station_info_path = station_info_dir + filename

            info = f'Extracting station information from {filename}'
            print((f'\r{info} ({count}/{total})'), end='')

            start = time.process_time()
            station = self._extract_station_info(station_info_path)
            end = time.process_time()

            self.logger.debug(f'{info} in {end-start:.2f} s')
            if station:
                all_stations.append(station)

        utils.delete_last_lines()
        print('Done')

        start = time.process_time()
        utils.bulk_insert_avoid_duplicate_unique(
            all_stations, self.CONFIG['database']['batch_size']['insert'],
            StdmetStation, ['id'], self.session)
        end = time.process_time()

        print('n_ane: ' + str(n_ane))
        print('n_ele: ' + str(n_ele))

        self.logger.debug(('Bulk inserting stdmet station information into '
                           + f'{StdmetStation.__tablename__} '
                           + f'in {end-start:.2f}s'))
Exemple #10
0
    def _insert_data(self, read_all=False):
        self.logger.info(self.CWIND_CONFIG['prompt']['info']['read_data'])
        data_dir = self.CWIND_CONFIG['dirs']['data']
        station_ids = [
            id for id in self.session.query(CwindStation.id).\
            order_by(CwindStation.id)
        ]
        if not read_all:
            data_files = [
                x for x in os.listdir(data_dir)
                if x.endswith('.txt.gz') and int(x[6:10]) in self.years
            ]
        else:
            data_files = [
                x for x in os.listdir(data_dir) if x.endswith('.txt.gz')
            ]

        total = len(data_files)
        count = 0

        for id in station_ids:
            id = id[0]
            DataOfStation = self._create_cwind_data_table(id)
            for file in data_files:
                if file.startswith(id):
                    # cwind data file belong to station in cwind_station
                    # table
                    count += 1
                    data_path = data_dir + file

                    info = f'Extracting cwind data from {file}'
                    print(f'\r{info} ({count}/{total})', end='')

                    start = time.process_time()
                    records = self._extract_data(data_path, DataOfStation)
                    end = time.process_time()

                    self.logger.debug(f'{info} in {end-start:.2f} s')

                    if not records:
                        continue

                    start = time.process_time()
                    utils.bulk_insert_avoid_duplicate_unique(
                        records, int(self.CONFIG['database']\
                                     ['batch_size']['insert']/10),
                        DataOfStation, ['date_time'], self.session,
                        check_self=True)
                    end = time.process_time()

                    self.logger.debug((f'Bulk inserting cwind data into ' +
                                       f'cwind_{id} in {end-start:2f} s'))
        utils.delete_last_lines()
        print('Done')
    def add_dist2coast(self):
        lons = [round(x * 0.04 - 179.98, 2) for x in range(9000)]
        lats = [round(y * 0.04 - 89.98, 2) for y in range(4500)]

        dist2coast_table_name = 'dist2coast_na_sfmr'
        Dist2Coast = utils.get_class_by_tablename(self.engine,
                                                  dist2coast_table_name)

        validation_tablename = utils.gen_validation_tablename(
            self, 'sfmr', 'smap_prediction')
        Validation = utils.get_class_by_tablename(self.engine,
                                                  validation_tablename)

        validation_query = self.session.query(Validation).filter(
            Validation.sfmr_datetime > self.period[0],
            Validation.sfmr_datetime < self.period[1])
        validation_count = validation_query.count()

        for validation_idx, validation_row in enumerate(validation_query):
            print(f'\r{validation_idx+1}/{validation_count}', end='')

        indices_to_drop = []
        for src in self.sources:
            length = len(bias[src])

            for i in range(length):
                print(f'\r{i+1}/{length}', end='')

                lookup_lon, lookup_lon_idx = \
                    utils.get_nearest_element_and_index(
                        lons, bias[src]['sfmr_lon'][i]-360)
                lookup_lat, lookup_lat_idx = \
                    utils.get_nearest_element_and_index(
                        lats, bias[src]['sfmr_lat'][i])
                dist_query = self.session.query(Dist2Coast).filter(
                    Dist2Coast.lon > lookup_lon - 0.01,
                    Dist2Coast.lon < lookup_lon + 0.01,
                    Dist2Coast.lat > lookup_lat - 0.01,
                    Dist2Coast.lat < lookup_lat + 0.01,
                )
                if dist_query.count() != 1:
                    self.logger.error('Dist not found')
                    breakpoint()
                    exit(1)

                if dist_query[0].dist2coast > distance_to_land_threshold:
                    indices_to_drop.append(i)

            utils.delete_last_lines()
            print('Done')

            bias[src].drop(indices_to_drop, inplace=True)
Exemple #12
0
    def download_all_stations_no_limit(self):
        # There are several stations which can be found in
        # https://www.ndbc.noaa.gov/data/historical/stdmet/
        # but do not have station page:
        # ['46a54', '42a02', '42otp', '42a03', '46a35', '47072', '32st2',
        # '51wh2', '41nt1', '41nt2', '51wh1', '32st1', '46074', '4h364',
        # 'a025w', '4h390', '4h361', 'q004w', '4h394', 'b040z', 'a002e',
        # 'et01z']
        if not hasattr(self, 'all_station_year'):
            with open(self.STDMET_CONFIG['vars_path']['all_station_year'],
                      'rb') as file:
                self.all_station_year = pickle.load(file)

        self.all_stations = set()
        for stn in self.all_station_year.keys():
            self.all_stations.add(stn)

        self.logger.info(self.STDMET_CONFIG['prompt']['info']\
                         ['download_all_station'])
        total = len(self.all_stations)
        count = 0
        for stn in self.all_stations:
            count += 1
            info = f'Downloading information of stdmet station {stn}'
            self.logger.debug(info)
            print((f'\r{info} ({count}/{total})'), end='')

            i = 0
            while True:
                # download self.stations' information
                result = self._download_single_station_info(stn)
                if result != 'error':
                    break
                else:
                    # Only loop when cannot get html of stdmet station
                    # webpage
                    self.logger.error(self.STDMET_CONFIG['prompt']['error'] \
                          ['fail_download_station'] + stn)
                    i += 1
                    if i <= self.STDMET_CONFIG['retry_times']:
                        self.logger.info('reconnect: %d' % i)
                    else:
                        self.logger.critical(
                            self.STDMET_CONFIG['prompt']['info']\
                            ['skip_download_station'])
                        break
        utils.delete_last_lines()
        print('Done')
Exemple #13
0
    def read(self):
        self._load_year_tc()

        for year in self.year_tc.keys():
            for tc in self.year_tc[year]:
                # Get gridded file_path
                gridded_paths = self._get_gridded_path(year, tc)
                for gridded_file in gridded_paths:
                    # Get TC center locale
                    lon, lat, x, y = self._get_tc_center(gridded_file)
                    dt = self._get_dt_of_hwind_file(gridded_file)
                    # Get HWind table
                    table_name, sa_table, hwind_table = self.get_hwind_class(
                        tc.sid, dt)

                    # Read shapefile
                    data = self._read_tc_gridded(gridded_file, hwind_table)

                    # Skip this turn of loop if not getting data matrix
                    if not len(data):
                        continue

                    # When ERA5 table doesn't exists, sa_table is None.
                    # So need to create it.
                    if sa_table is not None:
                        # Create table of ERA5 data cube
                        sa_table.create(self.engine)
                        self.session.commit()

                    # Insert into HWind table
                    start = time.process_time()
                    utils.bulk_insert_avoid_duplicate_unique(
                        data,
                        int(self.CONFIG['database']['batch_size']['insert'] /
                            10),
                        hwind_table, ['x_y'],
                        self.session,
                        check_self=True)
                    end = time.process_time()

                    self.logger.debug((f'Bulk inserting HWind data into ' +
                                       f'{table_name} in {end-start:2f} s'))
        utils.delete_last_lines()
        print('Done')
Exemple #14
0
    def _gen_all_year_hurr(self):
        this_year = datetime.datetime.today().year

        self.all_year_hurr = {}

        start_year = self.SFMR_CONFIG['period_limit']['start'].year
        end_year = self.SFMR_CONFIG['period_limit']['end'].year
        if this_year < end_year:
            end_year = this_year

        for year in range(start_year, end_year + 1):
            info = f'Finding hurricanes of year {year}'
            self.logger.debug(info)
            print(f'\r{info}', end='')

            if year < 1994:
                year = 'prior1994'
            if year == this_year:
                year = ''
            url = (f'{self.SFMR_CONFIG["urls"]["hurricane"][:-5]}' +
                   f'{year}.html')
            page = requests.get(url)
            data = page.text
            soup = bs4.BeautifulSoup(data, features='lxml')
            anchors = soup.find_all('a')

            self.all_year_hurr[year] = set()

            for link in anchors:
                if not link.contents:
                    continue
                text = link.contents[0]
                if text != 'SFMR':
                    continue
                href = link.get('href')
                hurr = href.split('/')[-2][:-4]
                self.all_year_hurr[year].add(hurr)
        utils.delete_last_lines()
        print('Done')

        utils.save_relation(self.SFMR_CONFIG['vars_path']['all_year_hurr'],
                            self.all_year_hurr)
Exemple #15
0
    def how_fast_tcs_move(self):
        self.logger.info('Calculating how fast TCs move')

        # create table for recording
        TCMovingSpeed = self.create_tc_moving_speed_table()

        table_rows = []
        for idx, tc in enumerate(self.tc_query):
            print(f'\r{idx+1}/{self.tc_query_num}', end='')
            # find next TC
            if idx < self.tc_query_num:
                next_tc = self.tc_query[idx + 1]
                if tc.sid != next_tc.sid:
                    continue
            else:
                break

            duration, shift = self.cal_before_speed(tc, next_tc)
            speed = shift / (duration / 60)

            # record into table
            row = TCMovingSpeed()
            row.sid = tc.sid
            row.name = tc.name
            row.basin = tc.basin
            row.start_datetime = tc.date_time
            row.duration_in_mins = duration
            row.shift_in_kms = shift
            row.speed_kmph = speed
            row.sid_start_datetime = f'{tc.sid}_{tc.date_time}'

            table_rows.append(row)

        if len(table_rows):
            utils.bulk_insert_avoid_duplicate_unique(
                table_rows, self.CONFIG['database']\
                ['batch_size']['insert'],
                TCMovingSpeed, ['sid_start_datetime'], self.session,
                check_self=True)

        utils.delete_last_lines()
        print('Done')
Exemple #16
0
    def read_scs_oriented(self, vars_mode, file_path, dt_cursor):
        # Open ERA5 grib data and read 6-hourly
        grbidx = pygrib.index(file_path, 'dataTime')

        for hourtime in range(0, 2400,
                              self.CONFIG['product']\
                              ['temporal_resolution']):

            # Create ERA5 table for SCS
            SCSERA5 = self.create_scs_era5_table(dt_cursor, hourtime)

            selected_grbs = grbidx.select(dataTime=hourtime)
            # Generate frame of one 6-hour SCS ERA5 table
            table_entity = self.gen_scs_era5_entity(SCSERA5)

            total = len(selected_grbs)

            for idx, grb in enumerate(selected_grbs):
                info = (f"""\rReading grbs on hour """
                        f"""{int(hourtime/100)} {idx+1}/{total}""")
                print(f'\r{info}', end='')
                # Traverse all data point in ERA5 grib message,
                # find corresponding row in SCS ERA5 table frame
                # and fill its environmental variables
                table_entity = self.fill_scs_era5_table_entity(
                    grb, table_entity)

                # Temporarily not interpolate the space between
                # ERA5 0.25 degree grid points

                # Method_1: conventional interpolation methods

                # Method_2: GAN

            # Insert entity into database
            utils.bulk_insert_avoid_duplicate_unique(
                table_entity, self.CONFIG['database']\
                ['batch_size']['insert'],
                SCSERA5, ['x_y'], self.session,
                check_self=True)
            utils.delete_last_lines()
        print('Done')
Exemple #17
0
    def _download_all_stdmet_data(self):
        """Download Continuous Wind data into single directory.

        """
        self.logger.info(self.STDMET_CONFIG['prompt']['info']\
                         ['download_data'])
        utils.set_format_custom_text(self.STDMET_CONFIG['data_name_length'])
        total = 0
        count = 0
        for year in self.years:
            total += len(self.year_station[year])

        for year in self.years:
            for stn in self.year_station[year]:
                self._download_single_stdmet_data(stn, year)
                count += 1
                info = f'Downloading {year} stdmet data of station {stn}'
                self.logger.debug(info)
                print((f'\r{info} ({count}/{total})'), end='')
        utils.delete_last_lines()
        print('Done')
Exemple #18
0
    def download_and_read_scs_data(self):
        self.logger.info(f'Downloading ISD data')
        ISDStation = self.create_isd_station_table()

        year_csv_paths = dict()
        for year in self.years:
            year_csv_paths[year] = []
            year_dir = f"{self.CONFIG['isd']['dirs']['csvs']}{year}/"
            os.makedirs(year_dir, exist_ok=True)

            stn_query = self.session.query(ISDStation).filter(
                extract('year', ISDStation.begin_date) <= year,
                extract('year', ISDStation.end_date) >= year)
            total = stn_query.count()
            count = 0

            ISDWind = self.create_isd_wind_table(year)

            for stn in stn_query:
                count += 1
                if self.work_mode == 'fetch_and_read':
                    print((f"""\rDownloading and reading """
                           f"""{stn.station_id} """
                           f"""in {year} {count}/{total}"""),
                          end='')
                else:
                    print((f"""\rDownloading {stn.station_id} """
                           f"""in {year} {count}/{total}"""),
                          end='')
                csv_path = self.download_stn_data_in_a_year(
                    stn, year, year_dir)
                year_csv_paths[year].append(csv_path)

                if self.work_mode == 'fetch_and_read':
                    self.read_isd_csv(ISDWind, csv_path, year)

            utils.delete_last_lines()
            print(f'{year} done')

        return year_csv_paths
Exemple #19
0
    def read(self):
        utils.reset_signal_handler()
        self.logger.info((f"""Reading CCMP files"""))

        # Traverse file path
        for file_path in self.files_path:
            date_str = file_path.split('_')[3]
            vars = Dataset(file_path).variables
            date_ = datetime.datetime.strptime(date_str, '%Y%m%d').date()
            CCMP = self.create_scs_ccmp_table(date_)
            info = f"""Reading {file_path.split('/')[-1]}"""
            # Traverse 4 time in one day
            for hour_idx, hour in enumerate(range(0, 24, 6)):
                print(f"""\r{info} on {str(hour).zfill(2)}:00""", end='')
                one_hour_scs_ccmp = []
                time = datetime.time(hour, 0, 0)
                dt = datetime.datetime.combine(date_, time)

                subset = dict()
                var_names = ['nobs', 'uwnd', 'vwnd']

                for var_name in var_names:
                    subset[var_name] = vars[var_name][hour_idx][
                        self.lat1_index:self.lat2_index + 1,
                        self.lon1_index:self.lon2_index + 1]

                one_hour_scs_ccmp = self.get_ccmp_of_one_hour(
                    dt, CCMP, subset, var_names)

                # Insert into table
                utils.bulk_insert_avoid_duplicate_unique(
                    one_hour_scs_ccmp, self.CONFIG['database']\
                    ['batch_size']['insert'],
                    CCMP, ['datetime_x_y'], self.session,
                    check_self=True)
            utils.delete_last_lines()
            print(f"""{info}: Done""")
Exemple #20
0
    def download_with_brief_info(self, brief_info):
        self.logger.info((f"""Downloading SFMR files"""))
        root_dir = self.SFMR_CONFIG['dirs']['hurr']

        for year in brief_info:
            if year < self.period[0].year or year > self.period[1].year:
                continue
            files_num_in_the_year = len(brief_info[year])
            count = 0

            for info in brief_info[year]:
                count += 1
                print(f'\r{count}/{files_num_in_the_year} in {year}', end='')
                file_dir = f'{root_dir}{year}/{info.hurr_name}/'
                os.makedirs(file_dir, exist_ok=True)
                file_path = f'{file_dir}{info.filename}'
                file_url = info.file_url

                utils.download(file_url, file_path, True)

        utils.delete_last_lines()
        print('Done')

        return
Exemple #21
0
output_path = ('/Users/lujingze/Programming/SWFusion/data/'
               'dist2coast/dist2coast_na_sfmr.txt')

with open(input_path, 'r') as f:
    txt_lines = f.readlines()

north = 50
south = 0
west = 254 - 360
east = 325 - 360
focus_lines = []
total = len(txt_lines)

for idx, line in enumerate(txt_lines):
    print(f'\r{idx+1}/{total}', end='')
    numbers_str = line.split('\t')
    lon = float(numbers_str[0])
    lat = float(numbers_str[1])

    if lon < west or lon > east:
        continue
    if lat < south or lat > north:
        continue
    focus_lines.append(line)

with open(output_path, 'w') as f:
    f.writelines(focus_lines)

utils.delete_last_lines()
print('Done')
Exemple #22
0
    def get_satel_coverage(self, satel_name, SatelERA5, this_hour, next_hour):
        self.logger.info((f"""Getting coverge of {satel_name} """
                          f"""from {this_hour} to {next_hour}"""))

        satel_coverage = dict()
        satel_coverage['lon'] = []
        satel_coverage['lat'] = []
        satel_coverage['windspd'] = []

        satel_windspd_col_name = {
            'ascat': 'windspd',
            'wsat': 'w_aw',
            'amsr2': 'wind_lf',
            'smap': 'windspd',
            'sentinel_1': 'windspd'
        }

        grid_lons_lats = dict()

        for name in ['lons', 'lats']:
            pickle_path = self.CONFIG['grid']['pickle'][name]
            with open(pickle_path, 'rb') as f:
                grid_lons_lats[name] = pickle.load(f)

        query_for_count = self.session.query(SatelERA5).filter(
            SatelERA5.satel_datetime >= this_hour,
            SatelERA5.satel_datetime < next_hour)
        total = query_for_count.count()
        del query_for_count
        count = 0

        if not total:
            return 0, [], [], 0

        min_lon, max_lon = 999, -999
        min_lat, max_lat = 999, -999

        for row in self.session.query(SatelERA5).filter(
                SatelERA5.satel_datetime >= this_hour,
                SatelERA5.satel_datetime < next_hour).yield_per(
                    self.CONFIG['database']['batch_size']['query']):

            count += 1
            print(f'\rTraversing data: {count}/{total}', end='')

            lon = grid_lons_lats['lons'][row.x]
            satel_coverage['lon'].append(lon)
            if lon < min_lon:
                min_lon = lon
            if lon > max_lon:
                max_lon = lon

            lat = grid_lons_lats['lats'][row.y]
            satel_coverage['lat'].append(lat)
            if lat < min_lat:
                min_lat = lat
            if lat > max_lat:
                max_lat = lat

            satel_coverage['windspd'].append(
                getattr(row, satel_windspd_col_name[satel_name]))

        print('Done')
        utils.delete_last_lines()

        if min_lon > max_lon or min_lat > max_lat:
            return 0, [], [], 0

        grid_spa_resolu = self.CONFIG['grid']['spatial_resolution']
        # DO NOT use np.linspace, because the round error is larger than
        # 0.01
        lons = list(
            np.arange(min_lon, max_lon + 0.5 * grid_spa_resolu,
                      grid_spa_resolu))
        lats = list(
            np.arange(min_lat, max_lat + 0.5 * grid_spa_resolu,
                      grid_spa_resolu))
        lons = [round(x, 2) for x in lons]
        lats = [round(y, 2) for y in lats]

        windspd = np.zeros(shape=(len(lats), len(lons)), dtype=float)

        if satel_name != 'sentinel_1':
            for i in range(total):
                count += 1
                try:
                    lon_idx = lons.index(satel_coverage['lon'][i])
                    lat_idx = lats.index(satel_coverage['lat'][i])
                except Exception as msg:
                    breakpoint()
                    exit(msg)

                # Only for display wind cell according to satellite's
                # spatial resolution
                for y_offset in range(-2, 3):
                    sub_lat_idx = lat_idx + y_offset
                    if sub_lat_idx < 0 or sub_lat_idx >= len(lats):
                        continue

                    for x_offset in range(-2, 3):
                        sub_lon_idx = lon_idx + x_offset
                        if sub_lon_idx < 0 or sub_lon_idx >= len(lons):
                            continue

                        windspd[sub_lat_idx][sub_lon_idx] = \
                                satel_coverage['windspd'][i]
        else:
            for i in range(total):
                count += 1
                lon_idx = lons.index(satel_coverage['lon'][i])
                lat_idx = lats.index(satel_coverage['lat'][i])

                windspd[lat_idx][lon_idx] = satel_coverage['windspd'][i]

        return total, lons, lats, windspd
Exemple #23
0
    def _read_detail(self, basin, region_restriction, vars, storm_num,
                     date_time_num, have_read, info):
        """Read detail of IBTrACS data.

        """
        total = storm_num
        # List to record all details
        tc_list = []
        IBTrACSTable = self.create_tc_table(basin)

        season_check_offset = self.CONFIG['ibtracs']\
                ['season_check_offset']
        for i in range(storm_num):
            print(f'\r{info} {i+1}/{total}', end='')
            # Season is not just the year, so to ensure correctly
            # skipping loop by checking season, we need to set an offset
            # for checking season
            if int(vars['season'][i]) < (self.period[0].year -
                                         season_check_offset):
                continue
            if int(vars['season'][i]) > (self.period[1].year +
                                         season_check_offset):
                continue

            # Skip this loop if datetime of first record is earlier than
            # start date of period of more than 60 days,
            # or datetime of first record is later than end date of
            # period
            iso_times = vars['iso_time'][i]
            not_masked_count = np.count_nonzero(iso_times.count(1))

            if not not_masked_count:
                self.logger.debug((f'Skipping No.{i+1} TC because its ' +
                                   f'iso_time field is all masked'))
                continue

            last_iso_time = iso_times[not_masked_count - 1]
            last_datetime = datetime.datetime.strptime(
                last_iso_time.tostring().decode('utf-8'), '%Y-%m-%d %H:%M:%S')
            if last_datetime < self.period[0]:
                self.logger.debug(
                    (f'Skipping No.{i+1} TC because its ' +
                     f'last datetime is earlier than ' +
                     f'starting datetime of period: ' + f'{last_datetime}'))
                continue

            first_iso_time = iso_times[0]
            first_datetime = datetime.datetime.strptime(
                first_iso_time.tostring().decode('utf-8'), '%Y-%m-%d %H:%M:%S')
            if first_datetime > self.period[1]:
                self.logger.debug(
                    (f'Skipping No.{i+1} TC because its ' +
                     f'first datetime is later than ' +
                     f'ending datetime of period: ' + f'{first_datetime}'))
                continue

            self.logger.debug((f'Reading No.{i+1} TC which lived from ' +
                               f'{first_datetime} to {last_datetime}'))

            sid = vars['sid'][i].tostring().decode('utf-8')
            name = vars['name'][i]
            name = name[name.mask == False].tostring().decode('utf-8')

            for j in range(date_time_num):
                row = IBTrACSTable()

                # Read ISO time and check whether record is in period
                iso_time = vars['iso_time'][i][j]
                if iso_time[0] is MASKED:
                    break

                iso_time_str = iso_time.tostring().decode('utf-8')
                row.date_time = datetime.datetime.strptime(
                    iso_time_str, '%Y-%m-%d %H:%M:%S')
                if not utils.check_period(row.date_time, self.period):
                    continue

                # Insert rows which have read to TC table until
                # find next unread month
                # year, month = row.date_time.year, row.date_time.month
                # if not have_read[year][month]:
                #     if len(tc_list):
                #         utils.bulk_insert_avoid_duplicate_unique(
                #             tc_list, self.CONFIG['database']\
                #             ['batch_size']['insert'],
                #             IBTrACSTable, ['sid_date_time'], self.session,
                #             check_self=True)
                #         tc_list = []
                #     self.logger.debug((f'Reading WMO records of '
                #                       + f'{year}-{str(month).zfill(2)}'))
                #     have_read[year][month] = True

                # Read basin of TC
                row.basin = vars['basin'][i][j].tostring().decode('utf-8')

                # Read latitude, longitude, minimal centeral pressure,
                # maximum sustained wind speed from official WMO agency
                lat = vars['lat'][i][j]
                lon = (vars['lon'][i][j] + 360) % 360
                # breakpoint()
                if lat is MASKED or lon is MASKED:
                    continue
                if region_restriction:
                    if (lat < self.lat1 or lat > self.lat2 or lon < self.lon1
                            or lon > self.lon2):
                        continue

                pres = vars['wmo_pres'][i][j]
                wind = vars['wmo_wind'][i][j]
                # if pres is MASKED or wind is MASKED:
                #     continue

                # Set attributes of row
                row.sid = sid
                if name != 'NOT_NAMED':
                    row.name = name
                row.lat = float(lat)
                row.lon = float(lon)
                row.pres = int(pres) if pres is not MASKED else None
                row.wind = int(wind) if wind is not MASKED else None
                row.sid_date_time = f'{sid}_{row.date_time}'

                # Average radius of 34/50/64 knot winds in four
                # directions (ne, se, sw, nw) from three agencies
                # (bom, reunion, usa)
                dirs = ['ne', 'se', 'sw', 'nw']
                radii = dict()
                for r in ['r34', 'r50', 'r64']:
                    radii[r] = dict()
                    for d in range(4):
                        radii[r][d] = []
                        for a in ['bom', 'reunion', 'usa']:
                            r_d_a = vars[f'{a}_{r}'][i][j][d]
                            if r_d_a is not MASKED:
                                radii[r][d].append(int(r_d_a))
                        if len(radii[r][d]):
                            setattr(row, f'{r}_{dirs[d]}',
                                    int(sum(radii[r][d]) / len(radii[r][d])))

                tc_list.append(row)
                # breakpoint()

        if len(tc_list):
            utils.bulk_insert_avoid_duplicate_unique(
                tc_list, self.CONFIG['database']\
                ['batch_size']['insert'],
                IBTrACSTable, ['sid_date_time'], self.session,
                check_self=True)

        utils.delete_last_lines()
        print('Done')
Exemple #24
0
    def _insert_sfmr(self, read_all=False):
        self.logger.info(self.SFMR_CONFIG['prompt']['info']['read_hurr_sfmr'])
        # Create SFMR table
        table_name_prefix = self.SFMR_CONFIG['table_names']['prefix']
        skip_vars = ['DATE', 'TIME']
        notnull_vars = ['LAT', 'LON', 'SRR', 'SWS']
        unique_vars = []
        custom_cols = {
            1: Column('DATETIME', DateTime(), nullable=False, unique=False),
            21: Column('SPACE_TIME', String(255), nullable=False, unique=True)
        }
        total = 0
        for year in self.year_hurr_file_path.keys():
            for hurr in self.year_hurr_file_path[year].keys():
                total += len(self.year_hurr_file_path[year][hurr])
        count = 0

        for year in self.year_hurr_file_path.keys():
            for hurr in self.year_hurr_file_path[year].keys():
                if not len(self.year_hurr_file_path[year][hurr]):
                    continue

                table_name = f'{table_name_prefix}{year}_{hurr}'
                nc_template_path = self.year_hurr_file_path\
                        [year][hurr][0]
                SfmrTable = utils.create_table_from_netcdf(
                    self.engine, nc_template_path, table_name, self.session,
                    skip_vars, notnull_vars, unique_vars, custom_cols)

                for file_path in self.year_hurr_file_path[year][hurr]:
                    count += 1
                    info = (f'Extracting SFMR data from ' +
                            f'{file_path.split("/")[-1]}')
                    if count > 1:
                        utils.delete_last_lines()
                    print(f'\r{info} ({count}/{total})', end='')

                    start = time.process_time()
                    one_day_records, min_lat, max_lat,\
                            min_lon, max_lon = \
                            self._extract_sfmr_from_netcdf(file_path,
                                                           SfmrTable)
                    end = time.process_time()
                    self.logger.debug(f'{info} in {end-start:.2f} s')

                    start = time.process_time()
                    utils.bulk_insert_avoid_duplicate_unique(
                        one_day_records,
                        self.CONFIG['database']['batch_size']['insert'],
                        SfmrTable, ['SPACE_TIME'],
                        self.session,
                        check_self=True)
                    end = time.process_time()
                    self.logger.debug(
                        (f'Bulk inserting sfmr data into {table_name} ' +
                         f'in {end-start:.2f} s'))
                    # Update SFMR records of hurricanes
                    date_ = datetime.datetime.strptime(
                        file_path.split('/')[-1].\
                        split('SFMR')[1][:8]+'000000',
                        '%Y%m%d%H%M%S').date()
                    self._update_hurr_record(hurr, date_, min_lat, max_lat,
                                             min_lon, max_lon)
        utils.delete_last_lines()
        print('Done')
Exemple #25
0
    def _compare_with_cwind(self, ccmp_file_path):
        file = ccmp_file_path.split('/')[-1]
        base_datetime = datetime.datetime(year=int(file[19:23]),
                                          month=int(file[23:25]),
                                          day=int(file[25:27]),
                                          hour=0,
                                          minute=0,
                                          second=0)

        dis2coast_array = []
        wspd_absolute_error = []
        wdir_absolute_error = []

        vars = netCDF4.Dataset(ccmp_file_path).variables
        ccmp_lat = vars['latitude']
        ccmp_lon = vars['longitude']

        lat_padding = np.zeros(92)
        ccmp_lat = np.append(ccmp_lat, lat_padding, axis=0)
        ccmp_lat = np.roll(ccmp_lat, 46, axis=0)

        cwind_station_class = utils.get_class_by_tablename(
            self.engine, cwind.CwindStation.__tablename__)

        cwind_station_query = self.session.query(cwind_station_class)
        total = cwind_station_query.count()
        count = 0

        for stn in cwind_station_query:
            count += 1
            info = f'Comparing CCMP with cwind station {stn.id}'
            print(f'\r{info} ({count}/{total})', end='')
            # extract cwind speed and direction
            cwind_data_table_name = f'cwind_{stn.id}'
            CwindData = utils.get_class_by_tablename(self.engine,
                                                     cwind_data_table_name)
            if CwindData is None:
                return None, None

            for h in self.hours:
                target_datetime = (base_datetime +
                                   datetime.timedelta(hours=self.hours[h]))
                cwind_match = self.session.query(CwindData).\
                        filter_by(datetime=target_datetime).first()
                if cwind_match is None:
                    continue

                map_padding = np.zeros((92, 1440))

                uwnd = vars['uwnd'][h, :, :]
                vwnd = vars['vwnd'][h, :, :]

                uwnd = np.append(uwnd, map_padding, axis=0)
                vwnd = np.append(vwnd, map_padding, axis=0)
                uwnd = np.roll(uwnd, 46, axis=0)
                vwnd = np.roll(vwnd, 46, axis=0)

                ccmp_wspd, ccmp_wdir = self._ccmp_near_cwind(
                    stn, ccmp_lat, ccmp_lon, uwnd, vwnd)

                if ccmp_wspd is None or ccmp_wdir is None:
                    continue

                cwind_wspd = cwind_match.wspd_10
                cwind_wdir = cwind_match.wdir

                dis2coast_array.append(stn.distance_to_coast)
                wspd_absolute_error.append(abs(cwind_wspd - ccmp_wspd))
                wdir_absolute_error.append(abs(cwind_wdir - ccmp_wdir))

        utils.delete_last_lines()
        print('Done')
        print('MAE of wind speed: ' +
              str(sum(wspd_absolute_error) / len(wspd_absolute_error)))
        print('MAE of wind direction: ' +
              str(sum(wdir_absolute_error) / len(wdir_absolute_error)))

        dis2coast_array = np.array(dis2coast_array)
        wspd_absolute_error = np.array(wspd_absolute_error)
        wdir_absolute_error = np.array(wdir_absolute_error)

        plt.subplot(2, 1, 1)
        ax_1 = sns.regplot(x=dis2coast_array, y=wspd_absolute_error, color='b')
        plt.xlabel('Distance to coast (km)')
        plt.ylabel('Wind speed absolute_error (m/s)')
        plt.grid(True)

        plt.subplot(2, 1, 2)
        ax_2 = sns.regplot(x=dis2coast_array, y=wdir_absolute_error, color='g')
        plt.xlabel('Distance to coast (km)')
        plt.ylabel('Wind speed absolute_error (m/s)')
        plt.grid(True)

        plt.tight_layout()
        fig_path = (f'{self.CONFIG["result"]["dirs"]["fig"]}' +
                    f'ccmp_cwind_absolute_error_dis2coast.png')
        os.makedirs(os.path.dirname(fig_path), exist_ok=True)
        plt.savefig(fig_path)
        plt.show()
Exemple #26
0
    def _download_sfmr_data(self):
        """Download SFMR data of hurricanes.

        Parameters
        ----------
        None
            Nothing is required by this function.

        Returns
        -------
        hit_times : dict
            Times of hurricane NetCDF file's date being in period.

        """
        self.logger.info(self.SFMR_CONFIG['prompt']['info']\
                         ['download_hurr'])
        utils.set_format_custom_text(self.SFMR_CONFIG['data_name_length'])
        suffix = '.nc'
        save_root_dir = self.SFMR_CONFIG['dirs']['hurr']
        os.makedirs(save_root_dir, exist_ok=True)

        total = 0
        count = 0
        for year in self.year_hurr.keys():
            total += len(self.year_hurr[year])

        for year in self.year_hurr.keys():
            hurrs = list(self.year_hurr[year])
            for hurr in hurrs:
                count += 1
                info = (f'Download SFMR data of hurricane {hurr} ' +
                        f'in {year}')
                self.logger.debug(info)
                if count > 1:
                    utils.delete_last_lines()
                print(f'\r{info} ({count}/{total})', end='')

                # Create directory to store SFMR files
                dir_path = f'{save_root_dir}{year}/{hurr}/'
                os.makedirs(dir_path, exist_ok=True)
                # Generate keyword to consist url
                keyword = f'{hurr}{year}'
                url = (f'{self.SFMR_CONFIG["urls"]["prefix"]}' + f'{keyword}' +
                       f'{self.SFMR_CONFIG["urls"]["suffix"]}')
                # Get page according to url
                page = requests.get(url)
                data = page.text
                soup = bs4.BeautifulSoup(data, features='lxml')
                anchors = soup.find_all('a')

                # Times of NetCDF file's date being in period
                for link in anchors:
                    href = link.get('href')
                    # Find href of netcdf file
                    if href.endswith(suffix):
                        # Extract file name
                        filename = href.split('/')[-1]
                        tail_half = filename.split('SFMR')[1]
                        try:
                            # There may be NetCDF name format
                            # like 'USAF_SFMR0809221638.nc'
                            # from 'https://www.aoml.noaa.gov/hrd'
                            # '/Storm_pages/kyle2008/sfmr.html'
                            # It is very annoying and there seems
                            # no simple rule to check this problem.
                            # Because it hard to distinguish
                            # 'SFMR20110536' and 'SFMR20110524'.
                            # First one is the case as kyle2008, its
                            # actually date is 2020/11/05.
                            # Second one is a normal case, its
                            # actually date is 2011/05/24.
                            # Before 2020, following rule may work.
                            if (tail_half.startswith('20')
                                    or tail_half.startswith('199')):
                                date_str = tail_half[:8]
                                date_ = datetime.date(int(date_str[:4]),
                                                      int(date_str[4:6]),
                                                      int(date_str[6:]))
                            else:
                                date_str = tail_half[:6]
                                date_ = datetime.date(int(f'20{date_str[:2]}'),
                                                      int(date_str[2:4]),
                                                      int(date_str[4:]))
                                filename = (
                                    f'{filename.split("SFMR")[0]}SFMR20' +
                                    f'{filename.split("SFMR")[1]}')
                        except Exception as msg:
                            breakpoint()
                            exit(msg)
                        if not utils.check_period(date_, self.period):
                            continue
                        file_path = dir_path + filename

                        utils.download(href, file_path)

        utils.delete_last_lines()
        print('Done')
Exemple #27
0
    def read_tc_oriented(self, vars_mode, file_path):
        # load grib file
        grbs = pygrib.open(file_path)

        # Get TC table and count its row number
        tc_table_name = self.CONFIG['ibtracs']['table_name']
        TCTable = utils.get_class_by_tablename(self.engine, tc_table_name)
        tc_query = self.session.query(TCTable)
        total = tc_query.count()
        del tc_query
        count = 0
        info = f'Reading reanalysis data of TC records'
        self.logger.info(info)

        # Loop all row of TC table
        for row in self.session.query(TCTable).yield_per(
                self.CONFIG['database']['batch_size']['query']):

            # Get TC datetime
            tc_datetime = row.date_time

            # Get hit result and range of ERA5 data matrix near
            # TC center
            hit, lat1, lat2, lon1, lon2 = \
                    utils.get_subset_range_of_grib(
                        row.lat, row.lon, self.lat_grid_points,
                        self.lon_grid_points, self.edge, vars_mode='era5',
                        spatial_resolution=self.spa_resolu)
            if not hit:
                continue

            count += 1
            print(f'\r{info} {count}/{total}', end='')

            dirs = ['nw', 'sw', 'se', 'ne']
            r34 = dict()
            r34['nw'], r34['sw'], r34['se'], r34['ne'] = \
                    row.r34_nw, row.r34_sw, row.r34_se, row.r34_ne
            skip_compare = False
            for dir in dirs:
                if r34[dir] is None:
                    skip_compare = True
                    break
            if skip_compare:
                continue

            # Get name, sqlalchemy Table class and python original class
            # of ERA5 table
            table_name, sa_table, ERA5Table = self.get_era5_table_class(
                vars_mode, row.sid, tc_datetime)

            # Create entity of ERA5 table
            era5_table_entity = self._gen_whole_era5_table_entity(
                vars_mode, ERA5Table, lat1, lat2, lon1, lon2)

            # Record number of successfully reading data matrix of ERA5
            # grib file near TC center
            read_hit_count = 0

            # Loop all messages of grib file which consists of
            # all variables in all pressure levels
            for m in range(grbs.messages):
                grb = grbs.message(m + 1)

                # Generate datetime of message and compare it with TC's
                grb_date, grb_time = str(grb.dataDate), str(grb.dataTime)
                if grb_time == '0':
                    grb_time = '000'
                grb_datetime = datetime.datetime.strptime(
                    f'{grb_date}{grb_time}', '%Y%m%d%H%M%S')
                if tc_datetime != grb_datetime:
                    continue

                # extract corresponding data matrix in ERA5 reanalysis
                read_hit = self._read_grb_matrix(vars_mode, era5_table_entity,
                                                 grb, lat1, lat2, lon1, lon2)
                if read_hit:
                    read_hit_count += 1

            # Skip this turn of loop if not getting data matrix
            if not read_hit_count:
                continue

            # When ERA5 table doesn't exists, sa_table is None.
            # So need to create it.
            if sa_table is not None:
                # Create table of ERA5 data cube
                sa_table.create(self.engine)
                self.session.commit()

            # Write extracted data matrix into DB
            start = time.process_time()
            if vars_mode == 'threeD':
                utils.bulk_insert_avoid_duplicate_unique(
                    era5_table_entity,
                    int(self.CONFIG['database']['batch_size']['insert'] / 10),
                    ERA5Table, ['x_y_z'],
                    self.session,
                    check_self=True)
            elif vars_mode == 'surface_wind' or vars_mode == 'surface_all_vars':
                utils.bulk_insert_avoid_duplicate_unique(
                    era5_table_entity,
                    int(self.CONFIG['database']['batch_size']['insert'] / 10),
                    ERA5Table, ['x_y'],
                    self.session,
                    check_self=True)
            end = time.process_time()

            self.logger.debug((f'Bulk inserting ERA5 data into ' +
                               f'{table_name} in {end-start:2f} s'))

            self.compare_ibtracs_era5(vars_mode,
                                      row,
                                      ERA5Table,
                                      draw=True,
                                      draw_map=True,
                                      draw_bar=False)
        utils.delete_last_lines()
        print('Done')