Exemple #1
0
def get_global_phy_hourly(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi):
    """
        retrieve <phy> including ... variables for a specific timestamp, latitude, longitude considering
        the temporal resolution of the dataset to calculate interpolated values
    """
    if date_lo < datetime(2019, 1, 1): return None
    logger.debug(
        'obtaining GLOBAL_ANALYSIS_FORECAST_PHY Hourly dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))

    CheckConnection.set_url('nrt.cmems-du.eu')
    base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload&service=GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS'
    products = [
        'global-analysis-forecast-phy-001-024-hourly-t-u-v-ssh',
        'global-analysis-forecast-phy-001-024-hourly-merged-uv'
    ]
    dataset_temporal_resolution = 60
    time_in_min = (date_lo.hour * 60) + date_lo.minute
    rest = time_in_min % dataset_temporal_resolution

    # available times are at min 30 of each hour
    if date_lo.minute >= 30:
        t_lo = date_lo - timedelta(minutes=rest) + timedelta(minutes=30)
    else:
        t_lo = date_lo - timedelta(minutes=rest) - timedelta(minutes=30)

    time_in_min = (date_hi.hour * 60) + date_hi.minute
    rest = time_in_min % dataset_temporal_resolution

    if date_hi.minute >= 30:
        t_hi = date_hi + timedelta(minutes=(dataset_temporal_resolution -
                                            rest)) + timedelta(minutes=30)
    else:
        t_hi = date_hi + timedelta(minutes=(dataset_temporal_resolution -
                                            rest)) - timedelta(minutes=30)

    # coordinates
    y_lo = float(lat_lo)
    y_hi = float(lat_hi)
    x_lo = float(lon_lo)
    x_hi = float(lon_hi)

    # depth
    z_hi = 0.50
    z_lo = 0.49

    url = base_url + '&product=' + products[0] + '&product=global-analysis-forecast-phy-001-024-hourly-t-u-v-ssh' + \
          '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&z_lo={6}&z_hi={7}&mode=console'.format(x_lo, x_hi,
                                                                                                         y_lo,
                                                                                                         y_hi,
                                                                                                         date_to_str(
                                                                                                             t_lo)
                                                                                                         , date_to_str(
                  t_hi), z_lo, z_hi)
    data = try_get_data(url)
    return data
Exemple #2
0
def get_global_phy_daily(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi,
                         time_points, lat_points, lon_points):
    logger.debug(
        'obtaining GLOBAL_ANALYSIS_FORECAST_PHY Daily dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))

    if date_lo >= datetime(2019, 1, 2):
        CheckConnection.set_url('nrt.cmems-du.eu')
        base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload'
        service = 'GLOBAL_ANALYSIS_FORECAST_PHY_001_024-TDS'
        product = 'global-analysis-forecast-phy-001-024'
    elif date_lo >= datetime(1993, 1, 2):
        CheckConnection.set_url('my.cmems-du.eu')
        base_url = 'https://my.cmems-du.eu/motu-web/Motu?action=productdownload'
        service = 'GLOBAL_REANALYSIS_PHY_001_030-TDS'
        product = 'global-reanalysis-phy-001-030-daily'

    t_lo = datetime(date_lo.year, date_lo.month, date_lo.day,
                    12) - timedelta(days=1)
    t_hi = datetime(date_hi.year, date_hi.month, date_hi.day,
                    12) + timedelta(days=1)

    # coordinates
    y_lo = float(lat_lo)
    y_hi = float(lat_hi)
    x_lo = float(lon_lo)
    x_hi = float(lon_hi)

    # depth
    z_hi = 0.50
    z_lo = 0.49

    url = base_url + '&service=' + service + '&product=' + product + \
          '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&z_lo={6}&z_hi={7}&mode=console'.format(x_lo, x_hi,
                                                                                                         y_lo,
                                                                                                         y_hi,
                                                                                                         date_to_str(
                                                                                                             t_lo)
                                                                                                         , date_to_str(
                  t_hi), z_lo, z_hi)
    dataset = try_get_data(url)
    return dataset.interp(
        longitude=lon_points, latitude=lat_points,
        time=time_points).to_dataframe()[DAILY_PHY_VAR_LIST].reset_index(
            drop=True, inplace=True)
Exemple #3
0
def get_global_wave(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi,
                    time_points, lat_points, lon_points):
    """
        retrieve all wave variables for a specific timestamp, latitude, longitude concidering
        the temporal resolution of the dataset to calculate interpolated values
    """
    logger.debug(
        'obtaining GLOBAL_REANALYSIS_WAV dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))

    dataset_temporal_resolution = 180
    if date_lo >= datetime(2019, 1, 1, 6):
        CheckConnection.set_url('nrt.cmems-du.eu')
        base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload'
        service = 'GLOBAL_ANALYSIS_FORECAST_WAV_001_027-TDS'
        product = 'global-analysis-forecast-wav-001-027'
    elif date_lo >= datetime(1993, 1, 1, 6):
        CheckConnection.set_url('my.cmems-du.eu')
        base_url = 'https://my.cmems-du.eu/motu-web/Motu?action=productdownload'
        service = 'GLOBAL_REANALYSIS_WAV_001_032-TDS'
        product = 'global-reanalysis-wav-001-032'

    y_lo = float(lat_lo)
    y_hi = float(lat_hi)
    x_lo = float(lon_lo)
    x_hi = float(lon_hi)

    # time lower
    time_in_min = (date_lo.hour * 60) + date_lo.minute
    rest = time_in_min % dataset_temporal_resolution
    t_lo = date_lo - timedelta(minutes=rest)

    # time upper
    time_in_min = (date_hi.hour * 60) + date_hi.minute
    rest = time_in_min % dataset_temporal_resolution
    t_hi = date_hi + timedelta(minutes=dataset_temporal_resolution - rest)

    url = base_url + '&service=' + service + '&product=' + product + '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&mode=console'.format(
        x_lo, x_hi, y_lo, y_hi, date_to_str(t_lo), date_to_str(t_hi))

    dataset = try_get_data(url)
    return dataset.interp(longitude=lon_points,
                          latitude=lat_points,
                          time=time_points).to_dataframe()[WAVE_VAR_LIST]
Exemple #4
0
def get_files_list(year: int, resume_download: typing.List[str]) -> typing.List[str]:
    # url link to data
    url = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{0}/".format(year)
    # check already installed files in the
    CheckConnection.set_url('coast.noaa.gov')

    # request the html file
    html_text = requests.get(url).text

    # parse the html
    soup = BeautifulSoup(html_text, 'html.parser')

    # iterate over the <a> tags and save each in a list
    files = []
    for a in soup.find_all('a', href=True):
        if a.text and a.text.endswith('zip'):
            name = a['href'].split('.')[0]
            name = name.split('/')[-1] if len(name.split('/')) > 1 else name
            if name + '.csv' in resume_download or name + '.gdb' in resume_download:
                continue
            files.append(a['href'])
    return files
Exemple #5
0
def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points,
               lat_points, lon_points):
    logger.debug(
        'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))
    base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/'
    CheckConnection.set_url('ncei.noaa.gov')

    x_arr_list = []
    start_date = datetime(date_lo.year, date_lo.month,
                          date_lo.day) - timedelta(days=1)
    for day in range((date_hi - start_date).days + 1):
        dt = datetime(start_date.year, start_date.month,
                      start_date.day) + timedelta(days=day)
        catalog = TDSCatalog(
            '%s%s%.2d/%s%.2d%.2d/catalog.xml' %
            (base_url, dt.year, dt.month, dt.year, dt.month, dt.day))
        for hour in [3, 6]:
            for cycle in [0, 6, 12, 18]:
                attempts = 0
                while True:
                    try:
                        attempts += 1
                        name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % (
                            dt.year, dt.month, dt.day, cycle, hour)
                        if name in list(catalog.datasets):
                            ds_subset = catalog.datasets[name].subset()
                            query = ds_subset.query().lonlat_box(
                                north=lat_hi,
                                south=lat_lo,
                                east=lon_hi,
                                west=lon_lo).variables(*GFS_50_VAR_LIST)
                            CheckConnection.is_online()
                            data = ds_subset.get_data(query)
                            x_arr = xr.open_dataset(NetCDF4DataStore(data))
                            if 'time1' in list(x_arr.coords):
                                x_arr = x_arr.rename({'time1': 'time'})
                            x_arr_list.append(x_arr)
                        else:
                            logger.warning('dataset %s is not found' % name)
                        break
                    except Exception as e:
                        logger.error(traceback.format_exc())
                        CheckConnection.is_online()
                        logger.error(e)
                        logger.error(
                            'Filename %s - Failed connecting to GFS Server - number of attempts: %d'
                            % (name, attempts))
                        time.sleep(2)

    dataset = xr.combine_by_coords(x_arr_list).squeeze()
    lon_points = ((lon_points + 180) % 360) + 180
    res = dataset.interp(lon=lon_points, lat=lat_points,
                         time=time_points).to_dataframe()[GFS_50_VAR_LIST]
    res[[
        'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground'
    ]] = [[np.nan, np.nan]] * len(res)
    return res
Exemple #6
0
def download_file(zipped_file: str, download_dir: Path, year: int) -> str:
    # url link to data
    url = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{0}/".format(year)
    CheckConnection.is_online()
    logger.info('downloading AIS file: %s' % zipped_file)

    # download zip file using wget with url and file name
    with requests.get(os.path.join(url, zipped_file), stream=True) as req:
        req.raise_for_status()
        zipped_file = zipped_file.split('/')[-1] if len(zipped_file.split('/')) > 1 else zipped_file
        with open(zipped_file, "wb") as handle:
            for chunk in req.iter_content(chunk_size=8192):
                handle.write(chunk)
            handle.close()
    # extract each zip file into output directory then delete it
    with zipfile.ZipFile(zipped_file, 'r') as zip_ref:
        for f in zip_ref.infolist():
            if f.filename.endswith('.csv'):
                f.filename = os.path.basename(f.filename)
                file_name = f.filename
                zip_ref.extract(f, download_dir)
            if str(Path(f.filename).parent).endswith('.gdb'):
                zip_ref.extractall(download_dir)
                name = str(Path(f.filename).parent)
                gdb_file = Path(download_dir, name)
                file_name = name.split('.')[0] + '.csv'
                file_path = Path(download_dir, file_name)
                try:
                    chunkify_gdb(gdb_file, file_path, chunkSize=100000)
                except Exception as e:
                    # discard the file in case of an error to resume later properly
                    if file_path:
                        file_path.unlink(missing_ok=True)
                    raise e
                shutil.rmtree(gdb_file)
                break
    os.remove(zipped_file)
    return file_name
Exemple #7
0
def get_global_wind(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi,
                    time_points, lat_points, lon_points):
    logger.debug(
        'obtaining WIND_GLO_WIND_L4_NRT_OBSERVATIONS dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))

    dataset_temporal_resolution = 360
    if date_lo >= datetime(2018, 1, 1, 6):
        CheckConnection.set_url('nrt.cmems-du.eu')
        base_url = 'https://nrt.cmems-du.eu/motu-web/Motu?action=productdownload'
        service = 'WIND_GLO_WIND_L4_NRT_OBSERVATIONS_012_004-TDS'
        product = 'CERSAT-GLO-BLENDED_WIND_L4-V6-OBS_FULL_TIME_SERIE'
    elif date_lo >= datetime(1992, 1, 1, 6):
        CheckConnection.set_url('my.cmems-du.eu')
        base_url = 'https://my.cmems-du.eu/motu-web/Motu?action=productdownload'
        service = 'WIND_GLO_WIND_L4_REP_OBSERVATIONS_012_006-TDS'
        product = 'CERSAT-GLO-BLENDED_WIND_L4_REP-V6-OBS_FULL_TIME_SERIE'

    time_in_min = (date_lo.hour * 60) + date_lo.minute
    rest = time_in_min % dataset_temporal_resolution
    t_lo = date_lo - timedelta(minutes=rest)  # extract the lower bound

    time_in_min = (date_hi.hour * 60) + date_hi.minute
    rest = time_in_min % dataset_temporal_resolution
    t_hi = date_hi + timedelta(minutes=dataset_temporal_resolution - rest)

    y_lo = float(lat_lo)
    y_hi = float(lat_hi)
    x_lo = float(lon_lo)
    x_hi = float(lon_hi)

    url = base_url + '&service=' + service + '&product=' + product + '&x_lo={0}&x_hi={1}&y_lo={2}&y_hi={3}&t_lo={4}&t_hi={5}&mode=console'.format(
        x_lo, x_hi, y_lo, y_hi, date_to_str(t_lo), date_to_str(t_hi))
    dataset = try_get_data(url)
    return dataset.interp(lon=lon_points, lat=lat_points,
                          time=time_points).to_dataframe()[WIND_VAR_LIST]
Exemple #8
0
def try_get_data(url):
    try:
        CheckConnection.is_online()
        url_auth = authenticate_CAS_for_URL(url, config['UN_CMEMS'],
                                            config['PW_CMEMS'])
        response = open_url(url_auth)
        CheckConnection.is_online()
        read_bytes = response.read()
        CheckConnection.is_online()
        return xr.open_dataset(read_bytes)
    except Exception as e:
        logger.error(traceback.format_exc())
        raise ValueError(
            'Error:',
            BeautifulSoup(read_bytes,
                          'html.parser').find('p', {"class": "error"}),
            'Request: ', url, response)
Exemple #9
0
                        required=True, type=int, choices=range(1, 1440))
    parser.add_argument('-s', '--step', help='Select the specific step to perform.',
                        required=False, type=int, choices=range(0, 4), default=0)
    parser.add_argument('-d', '--dir',
                        help='The output directory to collect csv files. By default the root directory is used.',
                        default='', type=str, required=False)
    parser.add_argument('-c', '--clear',
                        help='Clears the raw output directory in order to free memory.',
                        action='store_true')
    parser.add_argument('-f', '--depth-first',
                        help='Clears the raw output directory in order to free memory.',
                        action='store_true')
    args, unknown = parser.parse_known_args()

    # initialize a Thread to check connection
    connectionChecker = CheckConnection(check_interval=8)
    connectionChecker.daemon = True
    connectionChecker.start()

    logger.info('Starting a task for year %s with subsampling of %d minutes. The output files will be saved to %s' % (
        str(args.year), int(args.minutes), args.dir if args.dir != '' else 'project directory'))

    # initialize directories
    download_dir = Path(args.dir, str(args.year))
    merged_dir = Path(args.dir, str(args.year) + '_merged_%s' % args.minutes)
    filtered_dir = Path(args.dir, '{0}_filtered_{1}'.format(str(args.year), args.minutes))
    download_dir.mkdir(parents=True, exist_ok=True)
    merged_dir.mkdir(parents=True, exist_ok=True)
    filtered_dir.mkdir(parents=True, exist_ok=True)

    interval = 10
Exemple #10
0
import os
from flask import Flask, render_template, request, redirect, url_for
from flask_sqlalchemy import SQLAlchemy
from check_connection import CheckConnection

db_connection =  "postgresql://*****:*****@172.17.0.2:5432/postgres" # teste container
print(db_connection)
# db_connection = "postgresql://*****:*****@localhost:5432/ac5" # teste local
sql_connection = db_connection.replace("/postgres","")

conn_check = CheckConnection(connection_test= db_connection, sql_connection=sql_connection)
conn_check.check_connection()


app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = db_connection
db = SQLAlchemy(app)

class Aluno(db.Model):
    __tablename__ = "tbaluno_rafael_belmonte_izukawa"
    idAluno = db.Column(db.Integer, primary_key=True, autoincrement=True)
    ra = db.Column(db.Integer, unique=True, nullable=False)
    nome = db.Column(db.String(50), unique=True, nullable=False)
    email = db.Column(db.String(50), nullable=False)
    logradouro = db.Column(db.String(50), nullable=False)
    numero = db.Column(db.String(5))
    cep = db.Column(db.String(10))
    complemento = db.Column(db.String(20))
    def __init__(self, ra, nome, email, logradouro, numero, cep, complemento):
        self.ra = ra
        self.nome = nome
Exemple #11
0
def get_GFS(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points,
            lat_points, lon_points):
    logger.debug(
        'obtaining GFS 0.25 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))
    start_date = datetime(date_lo.year, date_lo.month,
                          date_lo.day) - timedelta(days=1)
    # consider the supported time range
    if start_date < datetime(2015, 1, 15):
        logger.debug('GFS 0.25 DATASET is out of supported range')
        return get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi,
                          time_points, lat_points, lon_points)
    x_arr_list = []
    base_url = 'https://rda.ucar.edu/thredds/catalog/files/g/ds084.1'
    CheckConnection.set_url('rda.ucar.edu')
    # calculate a day prior for midnight interpolation
    http_util.session_manager.set_session_options(auth=(config['UN_RDA'],
                                                        config['PW_RDA']))
    start_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" %
                           (base_url, start_date.year, start_date.year,
                            start_date.month, start_date.day))
    ds_subset = start_cat.datasets['gfs.0p25.%s%.2d%.2d18.f006.grib2' %
                                   (start_date.year, start_date.month,
                                    start_date.day)].subset()
    query = ds_subset.query().lonlat_box(
        north=lat_hi, south=lat_lo, east=lon_hi,
        west=lon_lo).variables(*GFS_25_VAR_LIST)
    CheckConnection.is_online()
    data = ds_subset.get_data(query)
    x_arr = xr.open_dataset(NetCDF4DataStore(data))
    if 'time1' in list(x_arr.coords):
        x_arr = x_arr.rename({'time1': 'time'})
    x_arr_list.append(x_arr)

    for day in range((date_hi - date_lo).days + 1):
        end_date = datetime(date_lo.year, date_lo.month,
                            date_lo.day) + timedelta(days=day)
        end_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" %
                             (base_url, end_date.year, end_date.year,
                              end_date.month, end_date.day))
        for cycle in [0, 6, 12, 18]:
            for hours in [3, 6]:
                name = 'gfs.0p25.%s%.2d%.2d%.2d.f0%.2d.grib2' % (
                    end_date.year, end_date.month, end_date.day, cycle, hours)
                if name in list(end_cat.datasets):
                    ds_subset = end_cat.datasets[name].subset()
                    query = ds_subset.query().lonlat_box(
                        north=lat_hi, south=lat_lo, east=lon_hi,
                        west=lon_lo).variables(*GFS_25_VAR_LIST)
                    CheckConnection.is_online()
                    data = ds_subset.get_data(query)
                    x_arr = xr.open_dataset(NetCDF4DataStore(data))
                    if 'time1' in list(x_arr.coords):
                        x_arr = x_arr.rename({'time1': 'time'})
                    x_arr_list.append(x_arr)
                else:
                    logger.warning('dataset %s is not found' % name)
    dataset = xr.combine_by_coords(x_arr_list).squeeze()
    lon_points = ((lon_points + 180) % 360) + 180
    b = xr.DataArray([1] * len(lon_points))
    res = dataset.interp(longitude=lon_points,
                         latitude=lat_points,
                         time=time_points,
                         bounds_dim=b).to_dataframe()[GFS_25_VAR_LIST]
    return res