def get_global_phy_hourly(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi): """ retrieve <phy> including ... variables for a specific timestamp, latitude, longitude considering the temporal resolution of the dataset to calculate interpolated values """ if date_lo < datetime(2019, 1, 1): return None logger.debug( 'obtaining GLOBAL_ANALYSIS_FORECAST_PHY Hourly dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) CheckConnection.set_url('nrt.cmems-du.eu') base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload&service=GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS' products = [ 'global-analysis-forecast-phy-001-024-hourly-t-u-v-ssh', 'global-analysis-forecast-phy-001-024-hourly-merged-uv' ] dataset_temporal_resolution = 60 time_in_min = (date_lo.hour * 60) + date_lo.minute rest = time_in_min % dataset_temporal_resolution # available times are at min 30 of each hour if date_lo.minute >= 30: t_lo = date_lo - timedelta(minutes=rest) + timedelta(minutes=30) else: t_lo = date_lo - timedelta(minutes=rest) - timedelta(minutes=30) time_in_min = (date_hi.hour * 60) + date_hi.minute rest = time_in_min % dataset_temporal_resolution if date_hi.minute >= 30: t_hi = date_hi + timedelta(minutes=(dataset_temporal_resolution - rest)) + timedelta(minutes=30) else: t_hi = date_hi + timedelta(minutes=(dataset_temporal_resolution - rest)) - timedelta(minutes=30) # coordinates y_lo = float(lat_lo) y_hi = float(lat_hi) x_lo = float(lon_lo) x_hi = float(lon_hi) # depth z_hi = 0.50 z_lo = 0.49 url = base_url + '&product=' + products[0] + '&product=global-analysis-forecast-phy-001-024-hourly-t-u-v-ssh' + \ '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&z_lo={6}&z_hi={7}&mode=console'.format(x_lo, x_hi, y_lo, y_hi, date_to_str( t_lo) , date_to_str( t_hi), z_lo, z_hi) data = try_get_data(url) return data
def get_global_phy_daily(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GLOBAL_ANALYSIS_FORECAST_PHY Daily dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) if date_lo >= datetime(2019, 1, 2): CheckConnection.set_url('nrt.cmems-du.eu') base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload' service = 'GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS' product = 'global-analysis-forecast-phy-001-024' elif date_lo >= datetime(1993, 1, 2): CheckConnection.set_url('my.cmems-du.eu') base_url = 'https://my.cmems-du.eu/motu-web/Motu?action=productdownload' service = 'GLOBAL_REANALYSIS_PHY_001_030-TDS' product = 'global-reanalysis-phy-001-030-daily' t_lo = datetime(date_lo.year, date_lo.month, date_lo.day, 12) - timedelta(days=1) t_hi = datetime(date_hi.year, date_hi.month, date_hi.day, 12) + timedelta(days=1) # coordinates y_lo = float(lat_lo) y_hi = float(lat_hi) x_lo = float(lon_lo) x_hi = float(lon_hi) # depth z_hi = 0.50 z_lo = 0.49 url = base_url + '&service=' + service + '&product=' + product + \ '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&z_lo={6}&z_hi={7}&mode=console'.format(x_lo, x_hi, y_lo, y_hi, date_to_str( t_lo) , date_to_str( t_hi), z_lo, z_hi) dataset = try_get_data(url) return dataset.interp( longitude=lon_points, latitude=lat_points, time=time_points).to_dataframe()[DAILY_PHY_VAR_LIST].reset_index( drop=True, inplace=True)
def get_global_wave(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): """ retrieve all wave variables for a specific timestamp, latitude, longitude concidering the temporal resolution of the dataset to calculate interpolated values """ logger.debug( 'obtaining GLOBAL_REANALYSIS_WAV dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) dataset_temporal_resolution = 180 if date_lo >= datetime(2019, 1, 1, 6): CheckConnection.set_url('nrt.cmems-du.eu') base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload' service = 'GLOBAL_ANALYSIS_FORECAST_WAV_001_027-TDS' product = 'global-analysis-forecast-wav-001-027' elif date_lo >= datetime(1993, 1, 1, 6): CheckConnection.set_url('my.cmems-du.eu') base_url = 'https://my.cmems-du.eu/motu-web/Motu?action=productdownload' service = 'GLOBAL_REANALYSIS_WAV_001_032-TDS' product = 'global-reanalysis-wav-001-032' y_lo = float(lat_lo) y_hi = float(lat_hi) x_lo = float(lon_lo) x_hi = float(lon_hi) # time lower time_in_min = (date_lo.hour * 60) + date_lo.minute rest = time_in_min % dataset_temporal_resolution t_lo = date_lo - timedelta(minutes=rest) # time upper time_in_min = (date_hi.hour * 60) + date_hi.minute rest = time_in_min % dataset_temporal_resolution t_hi = date_hi + timedelta(minutes=dataset_temporal_resolution - rest) url = base_url + '&service=' + service + '&product=' + product + '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&mode=console'.format( x_lo, x_hi, y_lo, y_hi, date_to_str(t_lo), date_to_str(t_hi)) dataset = try_get_data(url) return dataset.interp(longitude=lon_points, latitude=lat_points, time=time_points).to_dataframe()[WAVE_VAR_LIST]
def get_files_list(year: int, resume_download: typing.List[str]) -> typing.List[str]: # url link to data url = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{0}/".format(year) # check already installed files in the CheckConnection.set_url('coast.noaa.gov') # request the html file html_text = requests.get(url).text # parse the html soup = BeautifulSoup(html_text, 'html.parser') # iterate over the <a> tags and save each in a list files = [] for a in soup.find_all('a', href=True): if a.text and a.text.endswith('zip'): name = a['href'].split('.')[0] name = name.split('/')[-1] if len(name.split('/')) > 1 else name if name + '.csv' in resume_download or name + '.gdb' in resume_download: continue files.append(a['href']) return files
def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/' CheckConnection.set_url('ncei.noaa.gov') x_arr_list = [] start_date = datetime(date_lo.year, date_lo.month, date_lo.day) - timedelta(days=1) for day in range((date_hi - start_date).days + 1): dt = datetime(start_date.year, start_date.month, start_date.day) + timedelta(days=day) catalog = TDSCatalog( '%s%s%.2d/%s%.2d%.2d/catalog.xml' % (base_url, dt.year, dt.month, dt.year, dt.month, dt.day)) for hour in [3, 6]: for cycle in [0, 6, 12, 18]: attempts = 0 while True: try: attempts += 1 name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % ( dt.year, dt.month, dt.day, cycle, hour) if name in list(catalog.datasets): ds_subset = catalog.datasets[name].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_50_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) else: logger.warning('dataset %s is not found' % name) break except Exception as e: logger.error(traceback.format_exc()) CheckConnection.is_online() logger.error(e) logger.error( 'Filename %s - Failed connecting to GFS Server - number of attempts: %d' % (name, attempts)) time.sleep(2) dataset = xr.combine_by_coords(x_arr_list).squeeze() lon_points = ((lon_points + 180) % 360) + 180 res = dataset.interp(lon=lon_points, lat=lat_points, time=time_points).to_dataframe()[GFS_50_VAR_LIST] res[[ 'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground' ]] = [[np.nan, np.nan]] * len(res) return res
def download_file(zipped_file: str, download_dir: Path, year: int) -> str: # url link to data url = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{0}/".format(year) CheckConnection.is_online() logger.info('downloading AIS file: %s' % zipped_file) # download zip file using wget with url and file name with requests.get(os.path.join(url, zipped_file), stream=True) as req: req.raise_for_status() zipped_file = zipped_file.split('/')[-1] if len(zipped_file.split('/')) > 1 else zipped_file with open(zipped_file, "wb") as handle: for chunk in req.iter_content(chunk_size=8192): handle.write(chunk) handle.close() # extract each zip file into output directory then delete it with zipfile.ZipFile(zipped_file, 'r') as zip_ref: for f in zip_ref.infolist(): if f.filename.endswith('.csv'): f.filename = os.path.basename(f.filename) file_name = f.filename zip_ref.extract(f, download_dir) if str(Path(f.filename).parent).endswith('.gdb'): zip_ref.extractall(download_dir) name = str(Path(f.filename).parent) gdb_file = Path(download_dir, name) file_name = name.split('.')[0] + '.csv' file_path = Path(download_dir, file_name) try: chunkify_gdb(gdb_file, file_path, chunkSize=100000) except Exception as e: # discard the file in case of an error to resume later properly if file_path: file_path.unlink(missing_ok=True) raise e shutil.rmtree(gdb_file) break os.remove(zipped_file) return file_name
def get_global_wind(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining WIND_GLO_WIND_L4_NRT_OBSERVATIONS dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) dataset_temporal_resolution = 360 if date_lo >= datetime(2018, 1, 1, 6): CheckConnection.set_url('nrt.cmems-du.eu') base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload' service = 'WIND_GLO_WIND_L4_NRT_OBSERVATIONS_012_004-TDS' product = 'CERSAT-GLO-BLENDED_WIND_L4-V6-OBS_FULL_TIME_SERIE' elif date_lo >= datetime(1992, 1, 1, 6): CheckConnection.set_url('my.cmems-du.eu') base_url = 'https://my.cmems-du.eu/motu-web/Motu?action=productdownload' service = 'WIND_GLO_WIND_L4_REP_OBSERVATIONS_012_006-TDS' product = 'CERSAT-GLO-BLENDED_WIND_L4_REP-V6-OBS_FULL_TIME_SERIE' time_in_min = (date_lo.hour * 60) + date_lo.minute rest = time_in_min % dataset_temporal_resolution t_lo = date_lo - timedelta(minutes=rest) # extract the lower bound time_in_min = (date_hi.hour * 60) + date_hi.minute rest = time_in_min % dataset_temporal_resolution t_hi = date_hi + timedelta(minutes=dataset_temporal_resolution - rest) y_lo = float(lat_lo) y_hi = float(lat_hi) x_lo = float(lon_lo) x_hi = float(lon_hi) url = base_url + '&service=' + service + '&product=' + product + '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&mode=console'.format( x_lo, x_hi, y_lo, y_hi, date_to_str(t_lo), date_to_str(t_hi)) dataset = try_get_data(url) return dataset.interp(lon=lon_points, lat=lat_points, time=time_points).to_dataframe()[WIND_VAR_LIST]
def try_get_data(url): try: CheckConnection.is_online() url_auth = authenticate_CAS_for_URL(url, config['UN_CMEMS'], config['PW_CMEMS']) response = open_url(url_auth) CheckConnection.is_online() read_bytes = response.read() CheckConnection.is_online() return xr.open_dataset(read_bytes) except Exception as e: logger.error(traceback.format_exc()) raise ValueError( 'Error:', BeautifulSoup(read_bytes, 'html.parser').find('p', {"class": "error"}), 'Request: ', url, response)
required=True, type=int, choices=range(1, 1440)) parser.add_argument('-s', '--step', help='Select the specific step to perform.', required=False, type=int, choices=range(0, 4), default=0) parser.add_argument('-d', '--dir', help='The output directory to collect csv files. By default the root directory is used.', default='', type=str, required=False) parser.add_argument('-c', '--clear', help='Clears the raw output directory in order to free memory.', action='store_true') parser.add_argument('-f', '--depth-first', help='Clears the raw output directory in order to free memory.', action='store_true') args, unknown = parser.parse_known_args() # initialize a Thread to check connection connectionChecker = CheckConnection(check_interval=8) connectionChecker.daemon = True connectionChecker.start() logger.info('Starting a task for year %s with subsampling of %d minutes. The output files will be saved to %s' % ( str(args.year), int(args.minutes), args.dir if args.dir != '' else 'project directory')) # initialize directories download_dir = Path(args.dir, str(args.year)) merged_dir = Path(args.dir, str(args.year) + '_merged_%s' % args.minutes) filtered_dir = Path(args.dir, '{0}_filtered_{1}'.format(str(args.year), args.minutes)) download_dir.mkdir(parents=True, exist_ok=True) merged_dir.mkdir(parents=True, exist_ok=True) filtered_dir.mkdir(parents=True, exist_ok=True) interval = 10
import os from flask import Flask, render_template, request, redirect, url_for from flask_sqlalchemy import SQLAlchemy from check_connection import CheckConnection db_connection = "postgresql://*****:*****@172.17.0.2:5432/postgres" # teste container print(db_connection) # db_connection = "postgresql://*****:*****@localhost:5432/ac5" # teste local sql_connection = db_connection.replace("/postgres","") conn_check = CheckConnection(connection_test= db_connection, sql_connection=sql_connection) conn_check.check_connection() app = Flask(__name__) app.config["SQLALCHEMY_DATABASE_URI"] = db_connection db = SQLAlchemy(app) class Aluno(db.Model): __tablename__ = "tbaluno_rafael_belmonte_izukawa" idAluno = db.Column(db.Integer, primary_key=True, autoincrement=True) ra = db.Column(db.Integer, unique=True, nullable=False) nome = db.Column(db.String(50), unique=True, nullable=False) email = db.Column(db.String(50), nullable=False) logradouro = db.Column(db.String(50), nullable=False) numero = db.Column(db.String(5)) cep = db.Column(db.String(10)) complemento = db.Column(db.String(20)) def __init__(self, ra, nome, email, logradouro, numero, cep, complemento): self.ra = ra self.nome = nome
def get_GFS(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GFS 0.25 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) start_date = datetime(date_lo.year, date_lo.month, date_lo.day) - timedelta(days=1) # consider the supported time range if start_date < datetime(2015, 1, 15): logger.debug('GFS 0.25 DATASET is out of supported range') return get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points) x_arr_list = [] base_url = 'https://rda.ucar.edu/thredds/catalog/files/g/ds084.1' CheckConnection.set_url('rda.ucar.edu') # calculate a day prior for midnight interpolation http_util.session_manager.set_session_options(auth=(config['UN_RDA'], config['PW_RDA'])) start_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" % (base_url, start_date.year, start_date.year, start_date.month, start_date.day)) ds_subset = start_cat.datasets['gfs.0p25.%s%.2d%.2d18.f006.grib2' % (start_date.year, start_date.month, start_date.day)].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_25_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) for day in range((date_hi - date_lo).days + 1): end_date = datetime(date_lo.year, date_lo.month, date_lo.day) + timedelta(days=day) end_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" % (base_url, end_date.year, end_date.year, end_date.month, end_date.day)) for cycle in [0, 6, 12, 18]: for hours in [3, 6]: name = 'gfs.0p25.%s%.2d%.2d%.2d.f0%.2d.grib2' % ( end_date.year, end_date.month, end_date.day, cycle, hours) if name in list(end_cat.datasets): ds_subset = end_cat.datasets[name].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_25_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) else: logger.warning('dataset %s is not found' % name) dataset = xr.combine_by_coords(x_arr_list).squeeze() lon_points = ((lon_points + 180) % 360) + 180 b = xr.DataArray([1] * len(lon_points)) res = dataset.interp(longitude=lon_points, latitude=lat_points, time=time_points, bounds_dim=b).to_dataframe()[GFS_25_VAR_LIST] return res