Beispiel #1
0
def check_find_person(session):
    p0 = api.find_person(session, last_name='Reeves')
    p1 = api.find_person(session, attribution='Curie, Marie, Awesome.')
    p2 = api.find_person(session, uuid='1e8402f8-4d11-41dd-85c7-93f9d95225e1')

    assert p0[0].first_name == 'Keanu'
    assert p1[0].uuid == 'e05c3360-1120-4c9b-a181-659da986b061'
    assert p2.organisation_name == 'University of Non-existent people'

    return True
Beispiel #2
0
def add_partial_invention(session):
    """
    Add a partial invention to 'Inventions made by David Edward Hughes'
    This is used to test the find_entry behavior
    """
    hughes = api.find_person(session, last_name='Hughes',
                             return_iterator=True).one()

    e3 = models.Entry(title='Warp drive',
                      abstract='Started, but never finished',
                      location="SRID=4326;POINT (51.505946 -0.132951)",
                      license_id=5,
                      variable_id=1,
                      is_partial=True)

    e3.contributors.append(
        models.PersonAssociation(relationship_type_id=1,
                                 person=hughes,
                                 order=1))

    inventions = api.find_group(session,
                                title='Awesome inventions',
                                return_iterator=True).one()
    inventions.entries.append(e3)

    try:
        session.add(e3)
        session.commit()
    except Exception as e:
        session.rollback()
        raise e

    assert len(inventions.entries) == 3

    return True
Beispiel #3
0
def add_split_dataset(session):
    # create dummy data
    data = pd.DataFrame(
        data={
            'value': np.random.normal(10, 1, size=350),
            'tstamp': pd.date_range('201309241100', periods=350, freq='15min')
        })
    data.set_index('tstamp', inplace=True)

    # add two entries as split datasets
    kit = api.find_person(session, organisation_abbrev='KIT')[0]
    historical_entry = api.add_entry(session,
                                     title='Historical data',
                                     abstract='Long descirption',
                                     location=(4.2, 42),
                                     variable=1,
                                     license=6,
                                     author=kit.id)
    recent_entry = api.add.add_entry(
        session,
        title='Recent data',
        abstract='something bad happended that needs description',
        location=(4.2, 42),
        variable=1,
        license=6,
        author=kit.id)

    # create datasource
    historical_entry.create_datasource(type=1,
                                       path='timeseries',
                                       datatype='timeseries')
    recent_entry.create_datasource(type=1,
                                   path='timeseries',
                                   datatype='timeseries')

    # split the data
    historical_entry.import_data(data=data.iloc[:300, :])
    recent_entry.import_data(data=data.iloc[300:, :])

    full_dataset = api.add_group(session, 'Split dataset',
                                 [historical_entry.id, recent_entry.id])

    # checkout
    result = api.find_entry(session, id=recent_entry.id, as_result=True)[0]

    # recover data
    db_data = result.get_data()

    # search for checksum - result.checksum is a checksum of member checksum, which is only one here
    assert len(result.checksums) == 1
    checksum = result.checksums[0]
    assert checksum in db_data

    recovered_data = db_data[checksum].values
    assert_array_almost_equal(data.values, recovered_data)

    return True
Beispiel #4
0
def auto_force_data_names(session, df_1D_wind, df_3D_prec):
    """
    If len(data_columns) != len(entry.variable.column_names) force_data_names
    should automatically become True and the column names of the imported data
    should be saved in datasource.data_names.
    To test this, we add 1D wind data (with 3D precision) to the 3D wind
    variable with variable.column_names=['u', 'v', 'w'].
    """
    # find the variable
    var_3D_wind = api.find_variable(session, name='3D-wind')[0]

    # find the previously added person
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_1D_precision = api.add_entry(
        session,
        title='1-dimensional windspeed data, precision',
        abstract='1-dimensional windspeed data',
        location=(8, 52),
        variable=var_3D_wind.id,
        comment='after double rotation',
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)
    # create datasource and scale
    entry_1D_precision.create_datasource(type=1,
                                         path='timeseries',
                                         datatype='timeseries')

    entry_1D_precision.datasource.create_scale(resolution='30min',
                                               extent=(df_1D_wind.index[0],
                                                       df_1D_wind.index[-1]),
                                               support=1.0,
                                               scale_dimension='temporal')

    # add data
    entry_1D_precision.import_data(data=df_1D_wind,
                                   precision=df_3D_prec,
                                   force_data_names=False)

    #load data
    dat = entry_1D_precision.get_data()

    # assert
    assert dat.columns.tolist() == [
        'u_ms', 'precision1', 'precision2', 'precision3'
    ]
    assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05)

    return True
Beispiel #5
0
def precision_test(session, df_3D_wind, df_3D_prec):
    """
    Test if precision columns are handled correctly.
    We use the 3D eddy wind data with 3 precision columns for this.
    """
    # find the variable
    var_3D_wind = api.find_variable(session, name='3D-wind')[0]

    # find the previously added person
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_3D_precision = api.add_entry(
        session,
        title='3-dimensional windspeed data, precision',
        abstract='3-dimensional windspeed data from the Fendt data set',
        location=(8, 52),
        variable=var_3D_wind.id,
        comment='after double rotation',
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)

    # create datasource and scale
    entry_3D_precision.create_datasource(type=1,
                                         path='timeseries',
                                         datatype='timeseries')

    entry_3D_precision.datasource.create_scale(resolution='30min',
                                               extent=(df_3D_wind.index[0],
                                                       df_3D_wind.index[-1]),
                                               support=1.0,
                                               scale_dimension='temporal')

    # add data
    entry_3D_precision.import_data(data=df_3D_wind,
                                   precision=df_3D_prec,
                                   force_data_names=False)

    #load data
    dat = entry_3D_precision.get_data()

    # assert
    assert dat.columns.tolist() == [
        'u', 'v', 'w', 'precision1', 'precision2', 'precision3'
    ]  # note: input was 'precision_1'
    assert dat['u'].mean() == pytest.approx(3.1, 0.05)

    return True
Beispiel #6
0
def force_data_names_true(session, df_3D_wind):
    """
    Test force_data_names=True when loading the data into the database.
    In this case, datasource.data_names will be overwritten with the column
    names of the imported data, when exporting the data, these column col_names
    will be displayed.
    We use the 3D eddy wind data for this again.
    """
    # find the variable
    var_3D_wind = api.find_variable(session, name='3D-wind')[0]

    # find the previously added author
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_3D_force_data_names = api.add_entry(
        session,
        title='3-dimensional windspeed data, force_data_names',
        abstract='3-dimensional windspeed data from the Fendt data set',
        location=(8, 52),
        variable=var_3D_wind.id,
        comment='after double rotation',
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)

    # create datasource and scale
    entry_3D_force_data_names.create_datasource(type=1,
                                                path='timeseries',
                                                datatype='timeseries')

    entry_3D_force_data_names.datasource.create_scale(
        resolution='30min',
        extent=(df_3D_wind.index[0], df_3D_wind.index[-1]),
        support=1.0,
        scale_dimension='temporal')

    # add data
    entry_3D_force_data_names.import_data(df_3D_wind, force_data_names=True)

    #load data
    dat = entry_3D_force_data_names.get_data()

    # assert
    assert dat.columns.tolist() == ['u_ms', 'v_ms', 'w_ms']
    assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05)

    return True
Beispiel #7
0
def one_dim_data(session, df_1D_wind):
    """
    Do the same as above, but with one-dimensional data instead.
    """
    # add the variable
    var_1D_wind = api.add_variable(session,
                                   name='1D-wind',
                                   symbol='u',
                                   column_names=['u'],
                                   unit=107)

    # find the previously added author
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_1D_wind = api.add_entry(
        session,
        title='1-dimensional windspeed data',
        abstract='1-dimensional windspeed data from the Fendt data set',
        location=(8, 52),
        variable=var_1D_wind.id,
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)

    # create datasource and scale
    entry_1D_wind.create_datasource(type=1,
                                    path='timeseries',
                                    datatype='timeseries')

    entry_1D_wind.datasource.create_scale(resolution='30min',
                                          extent=(df_1D_wind.index[0],
                                                  df_1D_wind.index[-1]),
                                          support=1.0,
                                          scale_dimension='temporal')

    # add data
    entry_1D_wind.import_data(df_1D_wind)

    # read data
    dat = entry_1D_wind.get_data()

    # assert
    assert dat.columns == 'u'
    assert dat['u'].mean() == pytest.approx(3.1, 0.05)

    return True
Beispiel #8
0
def get_uuid(session: Session, uuid: str, not_found='raise'):
    """
    .. versionadded:: 0.1.13

    Return the Metacatalog object of given
    version 4 UUID. The supported objects are:

    - Entry
    - EntryGroup
    - Keyword
    .. versionadded:: 0.2.7
    - Person

    """
    # check if an Entry exists
    entry = api.find_entry(session, uuid=uuid)
    if entry is not None:
        return entry

    # check if Entrygroup exists
    group = api.find_group(session, uuid=uuid)
    if group is not None:
        return group

    # check if a Person exists
    person = api.find_person(session, uuid=uuid)
    if person is not None:
        return person

    # handle keyword
    keyword = api.find_keyword(session, uuid=uuid)
    if keyword is not None:
        return keyword

    if not_found == 'raise':
        raise NoResultFound("The UUID='%s' was not found." % uuid)
    else:
        return None
Beispiel #9
0
def add_persons_to_entries(session, entries, persons, roles, order):
    r"""Add person(s) to entrie(s)

    Adds associations between entries and persons. The Entry and Person
    instances have to already exist in the database. Each association
    has to further define the role of the person for the respective entry.

    Parameters
    ----------
    session : sqlalchemy.Session
        SQLAlchemy session connected to the database.
    entries : list
        List of identifier or single identifier to load entries.
        If int, the Entry.id is assumed. If str, title is assumed.
        Can also pass a metacatalog.Entry object.
    persons : list
        List of identifier or single identifier to load persons.
        If int, Person.id is assumed, If str, Person.last_name is assumed.
        Can also pass a metacatalog.Person object.
    roles : list
        List of, or single role. The shape has to match the
        persons parameter. The role has to be identifies by id (int) or
        role name (str).
    order : list
        List of, or single order. The shape has to match the
        persons parameter. The order gives the ascending order of
        contributors on the respecive entry (after the author).

    Returns
    -------
    void

    See Also
    --------
    metacatalog.Entry
    metacatalog.Person
    metacatalog.PersonRole

    """
    # check the input shapes
    if not isinstance(entries, list):
        entries = [entries]
    if not isinstance(persons, list):
        persons = [persons]
    if not isinstance(roles, list):
        roles = [roles] * len(persons)
    if not isinstance(order, list):
        order = [order] * len(persons)

    # add for each entry
    for entry_id in entries:
        # load the entry
        if isinstance(entry_id, models.Entry):
            entry = entry_id
        elif isinstance(entry_id, int):
            # TODO sort by version descending to get the lastest
            entry = api.find_entry(session=session,
                                   id=entry_id,
                                   return_iterator=True).first()
        elif isinstance(entry_id, str):
            # TODO sort by version descending to get the lastest
            entry = api.find_variable(session=session,
                                      title=entry_id,
                                      return_iterator=True).first()
        else:
            raise AttributeError("Value '%s' not allowed for entries" %
                                 str(type(entry_id)))

        # add each person
        assocs = []
        for person_id, role_id, order_num in zip(persons, roles, order):
            # load the person
            if isinstance(person_id, models.Person):
                person = person_id
            elif isinstance(person_id, int):
                person = api.find_person(session=session,
                                         id=person_id,
                                         return_iterator=True).one()
            elif isinstance(person_id, str):
                person = api.find_person(session=session,
                                         last_name=person_id,
                                         return_iterator=True).first()
            else:
                raise AttributeError(
                    'Persons can only be identified by id or last_name')

            # load the role
            if isinstance(role_id, models.PersonRole):
                role = role_id
            elif isinstance(role_id, int):
                role = api.find_role(session=session,
                                     id=role_id,
                                     return_iterator=True).one()
            elif isinstance(role_id, str):
                role = api.find_role(session=session,
                                     name=role_id,
                                     return_iterator=True).first()
            else:
                raise AttributeError(
                    'Roles can only be identified by id or name')

            # create the new association
            assocs.append(
                models.PersonAssociation(entry=entry,
                                         person=person,
                                         role=role,
                                         order=order_num))

        # add each person to entry
        try:
            entry.contributors.extend(assocs)
            session.add(entry)
            session.commit()
        except Exception as e:
            session.rollback()
            raise e
Beispiel #10
0
def add_entry(session,
              title,
              author,
              location,
              variable,
              abstract=None,
              external_id=None,
              geom=None,
              license=None,
              embargo=False,
              **kwargs):
    r"""Add new Entry

    Adds a new metadata Entry to the database. This method will create the core
    entry. Usually, more steps are necessary, which will need the newly created
    database ID. Such steps are:

    * adding contributors   (mandatory)
    * adding data           (extremly useful)
    * adding keywords       (recommended)

    Parameters
    ----------
    session : sqlalchemy.Session
        SQLAlchemy session connected to the database.
    title : str
        Title of the Entry
    author : int, str
        First author of the Entry. The Person record has to exist already in the
        database and can be found by exact match on id (int) or last_name (str).
    location : str, tuple
        Can be either a WKT of a EPSG:4326 location, or the coordinates as a
        tuple. It has to be (X,Y), to (longitude, latitude)
    variable : int, str
        **Full** variable name (str) or ID (int) of the data described by the Entry.
    abstract : str
        Description of the data. Be as detailed as possible
    external_id : str
        If the data described by Entry has another unique identifier,
        usually supplied by the data provider, it can be stored for reference reasons.
    comment : str
        General purpose comment that should not contain any vital information to
        understand the entry. If it's vital, it should go into the abstract.
    geom : str
        WKT of any additional geoinformation in EPSG:4326
    license : str, int
        Either the id or **full** name of the license to be linked to this Entry.
    embargo : bool
        If True, this Entry will **not** be publicly available until the embargo ends
        The embargo period is usually 2 years but can be modified using the kwargs.

    Returns
    -------
    entry: metacatalog.Entry
        Entry instance of the added entry entity


    """
    # create the attribute dict
    attr = dict(title=title,
                abstract=abstract,
                external_id=external_id,
                embargo=embargo)
    attr.update(kwargs)

    # parse the author
    if isinstance(author, int):
        author = api.find_person(session=session,
                                 id=author,
                                 return_iterator=True).one()
    elif isinstance(author, str):
        author = api.find_person(session=session,
                                 last_name=author,
                                 return_iterator=True).first()
    else:
        raise AttributeError('author has to be of type int or str')

    # parse the location and geom
    if isinstance(location, str):
        attr['location'] = location
    elif isinstance(location, (tuple, list)):
        attr['location'] = 'SRID=4326;POINT (%f %f)' % (location[0],
                                                        location[1])

    if geom is not None and isinstance(geom, str):
        attr['geom'] = geom

    # handle variable
    if isinstance(variable, int):
        variable = api.find_variable(session=session,
                                     id=variable,
                                     return_iterator=True).one()
    elif isinstance(variable, str):
        variable = api.find_variable(session=session,
                                     name=variable,
                                     return_iterator=True).first()
    else:
        raise AttributeError('variable has to be of type integer or string.')
    attr['variable_id'] = variable.id

    # handle license
    if isinstance(license, int):
        license = api.find_license(session=session,
                                   id=license,
                                   return_iterator=True).one()
    elif isinstance(license, str):
        license = api.find_license(session=session,
                                   short_title=license,
                                   return_iterator=True).first()
    if license is not None:
        attr['license_id'] = license.id

    # add the entry
    entry = add_record(session=session, tablename='entries', **attr)

    # reference the person using 'First Author' (ID=1) Role
    add_persons_to_entries(session, entry, author, 1, 1)

    return entry
Beispiel #11
0
def find(args):
    # get the session
    session = connect(args)

    # get the entity
    entity = args.entity

    # set by to an empty list if not given
    if args.by is None:
        args.by = []

    # parse out the BY arguments
    kwargs = dict()
    for by in args.by:
        # if len(by) != 2:
        kwargs[by[0]] = by[1]

    # switch entity
    if entity.lower() == 'units' or entity.lower() == 'unit':
        results = api.find_unit(session, **kwargs)
    elif entity.lower() == 'variables' or entity.lower() == 'variable':
        results = api.find_variable(session, **kwargs)
    elif entity.lower() == 'licenses' or entity.lower() == 'license':
        results = api.find_license(session, **kwargs)
    elif entity.lower() == 'keywords' or entity.lower() == 'keyword':
        results = api.find_keyword(session, **kwargs)
    elif entity.lower() == 'roles' or entity.lower() == 'role':
        results = api.find_role(session, **kwargs)
    elif entity.lower() == 'persons' or entity.lower() == 'person':
        results = api.find_person(session, **kwargs)
    elif entity.lower() == 'group_types' or entity.lower() == 'group_type':
        results = api.find_group_type(session, **kwargs)
    elif entity.lower() == 'groups' or entity.lower() == 'group':
        results = api.find_group(session, **kwargs)
    elif entity.lower() == 'entries' or entity.lower() == 'entry':
        results = api.find_entry(session, **kwargs)
    elif entity.lower() == 'thesaurus':
        results = api.find_thesaurus(session, **kwargs)
    else:
        cprint(args, 'Oops. Finding %s is not supported.' % entity)
        exit(0)

    # switch the output
    if args.json:
        obj = [serialize(r) for r in results]
        cprint(args, json.dumps(obj, indent=4))
    elif args.csv:
        obj = [flatten(serialize(r)) for r in results]
        f = io.StringIO(newline='')
        colnames = set([n for o in obj for n in o.keys()])
        writer = csv.DictWriter(f,
                                fieldnames=colnames,
                                quotechar='"',
                                quoting=csv.QUOTE_NONNUMERIC,
                                lineterminator='\r')
        writer.writeheader()
        for o in obj:
            writer.writerow(o)

        f.seek(0)
        cprint(args, f.getvalue())
    else:  # stdOut
        for result in results:
            cprint(args, result)
Beispiel #12
0
def find(args):
    # get the session
    session = connect(args)

    # get the entity
    entity = args.entity

    # set by to an empty list if not given
    if args.by is None:
        args.by = []


    # parse out the BY arguments
    kwargs=dict()
    for by in args.by:
        # if len(by) != 2:
        kwargs[by[0]] = by[1]

    # switch entity
    if entity.lower() == 'units' or entity.lower() == 'unit':
        results = api.find_unit(session, **kwargs)
    elif entity.lower() == 'variables' or entity.lower() == 'variable':
        results = api.find_variable(session, **kwargs)
    elif entity.lower() == 'licenses' or entity.lower() == 'license':
        results = api.find_license(session, **kwargs)
    elif entity.lower() == 'keywords' or entity.lower() == 'keyword':
        results = api.find_keyword(session, **kwargs)
    elif entity.lower() == 'roles' or entity.lower() == 'role':
        results = api.find_role(session, **kwargs)
    elif entity.lower() == 'persons' or entity.lower() == 'person':
        results = api.find_person(session, **kwargs)
    elif entity.lower() == 'group_types' or entity.lower() == 'group_type':
        results = api.find_group_type(session, **kwargs)
    elif entity.lower() == 'groups' or entity.lower() == 'group':
        results = api.find_group(session, **kwargs)
    elif entity.lower() == 'entries' or entity.lower() == 'entry':
        if args.include_partial:
            kwargs['include_partial'] = True
        results = api.find_entry(session, **kwargs)
    elif entity.lower() == 'thesaurus':
        results = api.find_thesaurus(session, **kwargs)
    else:
        cprint(args, 'Oops. Finding %s is not supported.' % entity)
        exit(0)

    if args.export is not None and args.export != '':
        # only entry and group can be exported
        if entity.lower() not in ('entry', 'group'):
            cprint(args, 'Can only export entity=Entry and entity=Group')
            return
        
        # get the fmt and path
        path = args.export
        fmt = args.export.split('.')[-1]
        fmt = 'netCDF' if fmt == 'nc' else fmt

        # check amount of results
        if len(results) == 1:
            results[0].export(path=path, fmt=fmt)
            cprint(args, f'Wrote {path}.')
        else:
            for i, result in enumerate(results):
                path = '.'.join([*args.export.split('.')[:-1], f'_{i}', args.export.split('.')[-1]])
                result.export(path=path, fmt=fmt)
            cprint(args, f'Wrote {len(results)} files.')
        
        return

    # switch the output
    if args.json:
        obj = [serialize(r) for r in results]
        cprint(args, json.dumps(obj, indent=4))
    elif args.csv:
        obj = [flatten(serialize(r)) for r in results]
        f = io.StringIO(newline='')
        colnames = set([n for o in obj for n in o.keys()])
        writer = csv.DictWriter(f, fieldnames=colnames, quotechar='"', quoting=csv.QUOTE_NONNUMERIC, lineterminator='\r')
        writer.writeheader()
        for o in obj:
            writer.writerow(o)
            
        f.seek(0)
        cprint(args, f.getvalue())
    else:   # stdOut
        for result in results:
            cprint(args, result)