コード例 #1
0
def main(uri: str, filename: str):
    """Import Argo Profiles

    :param str uri: Database URI
    :param str filename: Argo NetCDF Filename, or directory of files
    """
    data.observational.init_db(uri, echo=False)
    data.observational.create_tables()

    session = data.observational.db.session

    if os.path.isdir(filename):
        filenames = sorted(glob.glob(os.path.join(filename, "*.nc")))
    else:
        filenames = [filename]

    for fname in filenames:
        print(fname)
        with xr.open_dataset(fname) as ds:
            times = pd.to_datetime(ds.JULD.values)

            for f in META_FIELDS:
                META_FIELDS[f] = ds[f].values.astype(str)

            for prof in ds.N_PROF.values:
                plat_number = ds.PLATFORM_NUMBER.values.astype(str)[prof]
                unique_id = f"argo_{plat_number}"

                # Grab the platform from the db base on the unique id
                platform = (session.query(Platform).filter(
                    Platform.unique_id == unique_id,
                    Platform.type == Platform.Type.argo,
                ).first())
                if platform is None:
                    # ... or make a new platform
                    platform = Platform(type=Platform.Type.argo,
                                        unique_id=unique_id)
                    attrs = {}
                    for f in META_FIELDS:
                        attrs[ds[f].long_name] = META_FIELDS[f][prof].strip()

                    platform.attrs = attrs
                    session.add(platform)

                # Make a new Station
                station = Station(
                    time=times[prof],
                    latitude=ds.LATITUDE.values[prof],
                    longitude=ds.LONGITUDE.values[prof],
                )
                platform.stations.append(station)
                # We need to commit the station here so that it'll have an id
                session.commit()

                depth = seawater.dpth(ds.PRES[prof].dropna("N_LEVELS").values,
                                      ds.LATITUDE.values[prof])

                samples = []
                for variable in VARIABLES:
                    # First check our local cache for the DataType object, if
                    # that comes up empty, check the db, and failing that,
                    # create a new one from the variable's attributes
                    if variable not in datatype_map:
                        dt = DataType.query.get(ds[variable].standard_name)
                        if dt is None:
                            dt = DataType(
                                key=ds[variable].standard_name,
                                name=ds[variable].long_name,
                                unit=ds[variable].units,
                            )

                            data.observational.db.session.add(dt)
                            # Commit the DataType right away. This might lead
                            # to a few extra commits on the first import, but
                            # reduces overall complexity in having to
                            # 'remember' if we added a new one later.
                            data.observational.db.session.commit()
                            datatype_map[variable] = dt
                    else:
                        dt = datatype_map[variable]

                    values = ds[variable][prof].dropna("N_LEVELS").values

                    # Using station_id and datatype_key here instead of the
                    # actual objects so that we can use bulk_save_objects--this
                    # is much faster, but it doesn't follow any relationships.
                    samples = [
                        Sample(
                            depth=pair[0],
                            datatype_key=dt.key,
                            value=pair[1],
                            station_id=station.id,
                        ) for pair in zip(depth, values)
                    ]

                    data.observational.db.session.bulk_save_objects(samples)

                session.commit()
コード例 #2
0
def main(uri: str, filename: str):
    """Import Glider NetCDF

    :param str uri: Database URI
    :param str filename: Glider Filename, or directory of NetCDF files
    """
    data.observational.init_db(uri, echo=False)
    data.observational.create_tables()

    if os.path.isdir(filename):
        filenames = sorted(glob.glob(os.path.join(filename, "*.nc")))
    else:
        filenames = [filename]

    datatype_map = {}
    for fname in filenames:
        print(fname)
        with xr.open_dataset(fname) as ds:
            variables = [v for v in VARIABLES if v in ds.variables]
            df = ds[['TIME', 'LATITUDE', 'LONGITUDE', 'PRES',
                     *variables]].to_dataframe().reset_index().dropna()

            df['DEPTH'] = seawater.dpth(df.PRES, df.LATITUDE)

            for variable in variables:
                if variable not in datatype_map:
                    dt = DataType.query.get(ds[variable].standard_name)
                    if dt is None:
                        dt = DataType(key=ds[variable].standard_name,
                                      name=ds[variable].long_name,
                                      unit=ds[variable].units)
                        data.observational.db.session.add(dt)

                    datatype_map[variable] = dt

            data.observational.db.session.commit()

            p = Platform(type=Platform.Type.glider,
                         unique_id=f"glider_{ds.deployment_label}")
            attrs = {
                'Glider Platform': ds.platform_code,
                'WMO': ds.wmo_platform_code,
                'Deployment': ds.deployment_label,
                'Institution': ds.institution,
                'Contact': ds.contact,
            }
            p.attrs = attrs
            data.observational.db.session.add(p)
            data.observational.db.session.commit()

            stations = [
                Station(
                    platform_id=p.id,
                    time=row.TIME,
                    latitude=row.LATITUDE,
                    longitude=row.LONGITUDE,
                ) for idx, row in df.iterrows()
            ]

            # Using return_defaults=True here so that the stations will get
            # updated with id's. It's slower, but it means that we can just
            # put all the station ids into a pandas series to use when
            # constructing the samples.
            data.observational.db.session.bulk_save_objects(
                stations, return_defaults=True)
            df["STATION_ID"] = [s.id for s in stations]

            samples = [[
                Sample(station_id=row.STATION_ID,
                       depth=row.DEPTH,
                       value=row[variable],
                       datatype_key=datatype_map[variable].key)
                for variable in variables
            ] for idx, row in df.iterrows()]
            data.observational.db.session.bulk_save_objects(
                [item for sublist in samples for item in sublist])
            data.observational.db.session.commit()

        data.observational.db.session.commit()
コード例 #3
0
def main(uri: str, filename: str):
    """Import CONCEPTS drifter NetCDF

    :param str uri: Database URI
    :param str filename: Drifter Filename, or directory of NetCDF files
    """
    data.observational.init_db(uri, echo=False)
    data.observational.create_tables()

    if os.path.isdir(filename):
        filenames = sorted(glob.glob(os.path.join(filename, "*.nc")))
    else:
        filenames = [filename]

    for fname in filenames:
        print(fname)
        with xr.open_dataset(fname) as ds:
            df = ds.to_dataframe().drop(['wmo', 'deployment', 'imei'], axis=1)
            columns = list(filter(lambda c: c in DATATYPE_MAPPING, df.columns))

            dt_map = {}
            for c in columns:
                # First check our local cache for the DataType object, if
                # that comes up empty, check the db, and failing that,
                # create a new one.
                if c not in dt_map:
                    dt = DataType.query.get(DATATYPE_MAPPING[c][0])
                    if dt is None:
                        dt = DataType(key=DATATYPE_MAPPING[c][0],
                                      name=DATATYPE_MAPPING[c][1],
                                      unit=DATATYPE_MAPPING[c][2])

                        data.observational.db.session.add(dt)

                    dt_map[c] = dt

            # Commit to make sure all the variables are in the db so we don't
            # get any foreign key errors
            data.observational.db.session.commit()

            p = Platform(type=Platform.Type.drifter)
            attrs = dict(ds.attrs)
            attrs['wmo'] = ds.wmo.values[0]
            attrs['deployment'] = ds.deployment.values[0]
            attrs['imei'] = ds.imei.values[0]
            p.attrs = attrs
            data.observational.db.session.add(p)
            data.observational.db.session.commit()

            samples = []
            for index, row in df.iterrows():
                time = index[0]
                lat = row['latitude']
                lon = row['longitude']

                station = Station(time=time,
                                  latitude=lat,
                                  longitude=lon,
                                  platform_id=p.id)
                data.observational.db.session.bulk_save_objects(
                    [station], return_defaults=True)

                for c in columns:
                    value = row[c]
                    if isinstance(value, pd.Timestamp):
                        value = value.value / 10**9

                    if np.isfinite(value):
                        samples.append(
                            Sample(depth=0,
                                   datatype_key=DATATYPE_MAPPING[c][0],
                                   value=value,
                                   station_id=station.id))

                # Commit every 1000 samples, that's a decent balance between
                # locking the db for too long and performance
                if len(samples) > 1000:
                    data.observational.db.session.bulk_save_objects(samples)
                    data.observational.db.session.commit()
                    samples = []

            # If there are any samples that haven't been committed yet, do so
            # now.
            if samples:
                data.observational.db.session.bulk_save_objects(samples)
                data.observational.db.session.commit()
                samples = []

        data.observational.db.session.commit()
コード例 #4
0
def main(uri: str, filename: str):
    """Import Seal Profiles

    :param str uri: Database URI
    :param str filename: Seal NetCDF Filename, or directory of files
    """
    data.observational.init_db(uri, echo=False)
    data.observational.create_tables()

    if os.path.isdir(filename):
        filenames = sorted(glob.glob(os.path.join(filename, "*.nc")))
    else:
        filenames = [filename]

    for fname in filenames:
        print(fname)
        # We're only loading Temperature and Salinity from these files, so
        # we'll just make sure the DataTypes are in the db now.
        if DataType.query.get("sea_water_temperature") is None:
            dt = DataType(
                key="sea_water_temperature",
                name="Water Temperature",
                unit="degree_Celsius",
            )
            data.observational.db.session.add(dt)

        if DataType.query.get("sea_water_temperature") is None:
            dt = DataType(key="sea_water_salinity",
                          name="Water Salinity",
                          unit="PSU")
            data.observational.db.session.add(dt)

        data.observational.db.session.commit()

        with xr.open_dataset(fname) as ds:
            ds["TIME"] = ds.JULD.to_index().to_datetimeindex()
            ds["TIME"] = ds.TIME.swap_dims({"TIME": "N_PROF"})
            depth = seawater.dpth(
                ds.PRES_ADJUSTED,
                np.tile(ds.LATITUDE, (ds.PRES.shape[1], 1)).transpose(),
            )
            ds["DEPTH"] = (["N_PROF", "N_LEVELS"], depth)

            # This is a single platform, so we can construct it here.
            p = Platform(type=Platform.Type.animal,
                         unique_id=ds.reference_file_name)
            p.attrs = {
                "Principle Investigator": ds.pi_name,
                "Platform Code": ds.platform_code,
                "Species": ds.species,
            }

            data.observational.db.session.add(p)
            data.observational.db.session.commit()

            # Generate Stations
            df = ds[["LATITUDE", "LONGITUDE", "TIME"]].to_dataframe()
            stations = [
                Station(
                    platform_id=p.id,
                    latitude=row.LATITUDE,
                    longitude=row.LONGITUDE,
                    time=row.TIME,
                ) for idx, row in df.iterrows()
            ]

            # Using return_defaults=True here so that the stations will get
            # updated with id's. It's slower, but it means that we can just
            # put all the station ids into a pandas series to use when
            # constructing the samples.
            data.observational.db.session.bulk_save_objects(
                stations, return_defaults=True)
            df["STATION_ID"] = [s.id for s in stations]

            # Generate Samples
            df_samp = (ds[["TEMP_ADJUSTED", "PSAL_ADJUSTED",
                           "DEPTH"]].to_dataframe().reorder_levels(
                               ["N_PROF", "N_LEVELS"]))

            samples = [[
                Sample(
                    station_id=df.STATION_ID[idx[0]],
                    datatype_key="sea_water_temperature",
                    value=row.TEMP_ADJUSTED,
                    depth=row.DEPTH,
                ),
                Sample(
                    station_id=df.STATION_ID[idx[0]],
                    datatype_key="sea_water_salinity",
                    value=row.PSAL_ADJUSTED,
                    depth=row.DEPTH,
                ),
            ] for idx, row in df_samp.iterrows()]
            samples = [item for sublist in samples for item in sublist]
            samples = [s for s in samples if not pd.isna(s.value)]

            data.observational.db.session.bulk_save_objects(samples)
            data.observational.db.session.commit()
コード例 #5
0
def main(uri: str, filename: str):
    """Import NAFC CTD

    :param str uri: Database URI
    :param str filename: NetCDF file, or directory of files
    """
    data.observational.init_db(uri, echo=False)
    data.observational.create_tables()

    datatype_map = {}

    if os.path.isdir(filename):
        filenames = sorted(glob.glob(os.path.join(filename, "*.nc")))
    else:
        filenames = [filename]

    for fname in filenames:
        print(fname)
        with xr.open_dataset(fname) as ds:
            if len(datatype_map) == 0:
                # Generate the DataTypes; only consider variables that have depth
                for var in filter(
                        lambda x, dataset=ds: 'level' in dataset[x].coords,
                    [d for d in ds.data_vars]):
                    dt = DataType.query.get(ds[var].standard_name)
                    if dt is None:
                        dt = DataType(key=ds[var].standard_name,
                                      name=ds[var].long_name,
                                      unit=ds[var].units)
                    datatype_map[var] = dt

                data.observational.db.session.add_all(datatype_map.values())

            # Query or generate the platform
            # The files I worked off of were not finalized -- in this case the
            # trip id also included the cast number, so I strip off the last 3
            # digits.
            unique_id = f"nafc_ctd_{ds.trip_id[:-3]}"
            p = Platform.query.filter(
                Platform.unique_id == unique_id).one_or_none()
            if p is None:
                p = Platform(type=Platform.Type.mission, unique_id=unique_id)
                p.attrs = {
                    'Institution': ds.institution,
                    'Trip ID': ds.trip_id[:-3],
                    'Ship Name': ds.shipname,
                }
                data.observational.db.session.add(p)

            # Generate the station
            s = Station(
                latitude=ds.latitude.values[0],
                longitude=ds.longitude.values[0],
                time=pd.Timestamp(ds.time.values[0]),
            )
            p.stations.append(s)
            data.observational.db.session.commit()

            ds['level'] = seawater.dpth(ds.level.values, ds.latitude[0].values)

            # Generate the samples
            for var, dt in datatype_map.items():
                da = ds[var].dropna('level')
                samples = [
                    Sample(value=d.item(),
                           depth=d.level.item(),
                           datatype_key=dt.key,
                           station_id=s.id) for d in da
                ]
                data.observational.db.session.bulk_save_objects(samples)

            data.observational.db.session.commit()
コード例 #6
0
def main(uri: str, filename: str):
    """Import Seal Profiles
    :param str uri: Database URI
    :param str filename: CIOOS csv Filename, or directory of files
    """
    data.observational.init_db(uri, echo=False)
    data.observational.create_tables()

    if os.path.isdir(filename):
        filenames = sorted(glob.glob(os.path.join(filename, "*.csv")))
    else:
        filenames = [filename]

    for fname in filenames:
        print(fname)
        ds = pd.read_csv(fname)

        print(ds)
        # we'll just make sure the DataTypes are in the db now.
        if DataType.query.get('wind_speed') is None:
            dt = DataType(key='wind_speed', name='Wind Speed', unit='m s-1')
            data.observational.db.session.add(dt)

        data.observational.db.session.commit()

        # This is a single platform, so we can construct it here.
        p = Platform(type=Platform.Type.animal, unique_id=fname)
        p.attrs = {
            #            'Principle Investigator': ds.pi_name,
            #            'Platform Code': ds.platform_code,
            #            'Species': ds.species,
        }

        data.observational.db.session.add(p)
        data.observational.db.session.commit()

        #Generate Stations

        df = ds[['latitude', 'longitude', 'time']]

        stations = [
            Station(
                platform_id=p.id,
                latitude=float(row.latitude),
                longitude=float(row.longitude),
                time=row.time,
            ) for idx, row in df.iterrows()
        ]

        # Using return_defaults=True here so that the stations will get
        # updated with id's. It's slower, but it means that we can just
        # put all the station ids into a pandas series to use when
        # constructing the samples.
        data.observational.db.session.bulk_save_objects(stations,
                                                        return_defaults=True)
        df['station_name'] = [s.id for s in stations]

        # Generate Samples
        #        df_samp = ds[
        #            ['TEMP_ADJUSTED', 'PSAL_ADJUSTED', 'DEPTH']
        #        ].to_dataframe().reorder_levels(['N_PROF', 'N_LEVELS'])
        #
        samples = [
            [
                Sample(
                    station_id=df.STATION_ID[idx[0]],
                    datatype_key='wind_speed',
                    value=row.wind_spd_avg,
                    depth=0,
                ),
                #                Sample(
                #                    station_id=df.STATION_ID[idx[0]],
                #                    datatype_key='sea_water_salinity',
                #                    value=row.PSAL_ADJUSTED,
                #                    depth=row.DEPTH,
                #                )
            ]
            #
            for idx, row in df_samp.iterrows()
        ]
        samples = [item for sublist in samples for item in sublist]
        samples = [s for s in samples if not pd.isna(s.value)]

        data.observational.db.session.bulk_save_objects(samples)
        data.observational.db.session.commit()