def add_details(session): """ Add some random detail """ # find the first entry e1 = api.find_entry(session, title="Dummy 1")[0] e2 = api.find_entry(session, title="Dummy 2")[0] # check compability of < v0.1.8 api api.add_details_to_entries(session, [e1], **{'foo': 'bar 1'}) api.add_details_to_entries(session, [e1, e2], **{'banana': 'both love it'}) # check the new possibilites: api.add_details_to_entries( session, [e1], details=[dict(key='baz', value=42, description='Baz is the best kw')]) # add nested details e2.add_details(foo=dict(bar=['list', 'of', 'strings'], baz=42), answer=42, commit=True) # get the table e2.details_table(fmt='markdown') # find the details found_entry = api.find_entry(session, details=dict(answer=42))[0] assert e2.id == found_entry.id # find nested details found_entry2 = api.find_entry(session, details=dict(foo=dict(baz=42)))[0] assert e2.id == found_entry2.id return True
def add_details_to_entries(session, entries, details=None, **kwargs): """Associate detail(s) to entrie(s) Add key-value pair details to one, or many Entry(s). The Entry(s) have to already exist in the database. Parameters ---------- session : sqlalchemy.Session SQLAlchemy session connected to the database. entries : list List of identifier or single identifier to load entries. If int, the Entry.id is assumed. If str, title is assumed. Can also pass a metacatalog.Entry object. details : list, None .. versionadded:: 0.1.8 List of dict of structure: .. code-block:: [{ 'key': '', 'value': '', 'description': '' }] where the ``description`` is optional and can be omitted. If no descriptions are passed at all, you can also use `**kwargs` to pass ``key=value`` pairs. You can mix `details` and `kwargs` kwargs : keyword arguments Each keyword argument will be added as a py:class:`metacatalog.models.Detail` and linked to each entry """ # check the input shapes if not isinstance(entries, list): entries = [entries] # add for each entry for entry_id in entries: # load the entry if isinstance(entry_id, models.Entry): entry = entry_id elif isinstance(entry_id, int): # TODO sort by version descending to get the lastest entry = api.find_entry(session=session, id=entry_id, return_iterator=True).first() elif isinstance(entry_id, str): # TODO sort by version descending to get the lastest entry = api.find_entry(session=session, title=entry_id, return_iterator=True).first() else: raise AttributeError("Value '%s' not allowed for entries" % str(type(entry_id))) # add the details entry.add_details(details=details, commit=True, **kwargs)
def neighbors(self, distance, unit='meter', buffer_epsg=3857, as_sql=False, **kwargs): """ Find neighboring :class:`Entries <metacatalog.models.Entry>` around the location of this instance. You can return the result, or the sqlalchemy Query object, which can be printed as plain SQL. Parameters ---------- distance : int, float The maximum distance at which another Entry is still considered to be a neighbor. unit : str Has to be one of ['meter', 'km', 'mile', 'nautic'] to specify the unit of the given distance. Note that the distance will always be transformed into meter. buffer_epsg : int The EPSG identification number of any projected cartesian coordinate reference system that uses meter as unit. This CRS will be used to apply the search distance (in meter). .. note:: The default system is the transversal Mercartor projection, which is a global system. Thus, it can always be applied, but may introduce large uncertainties in small areas. Replace this attribute by a local CRS wherever possible. as_sql : bool If False (default) the SQL query for neighbors will be executed and the result is returned. Else, the SQL query itself will be returned. kwargs : keyword arguments Any passed keyword argument will be passed down to the :func:`api.find_entry <metacatalog.api.find_entry>` function to further filter the results. See Also -------- :func:`around <metacatalog.util.location.around>` :func:`find_entry <metacatalog.api.find_entry>` """ # open a session session = object_session(self) # get the base filter query kwargs['return_iterator'] = True query = api.find_entry(session, **kwargs) # get the area filter_query = around(self, distance=distance, unit=unit, query=query, buffer_use_epsg=buffer_epsg) if as_sql: return filter_query else: return filter_query.all()
def associate_persons(session): """ Set the others as associated persons """ e1 = api.find_entry(session, title='Dummy 1')[0] e2 = api.find_entry(session, external_id='foobar')[0] assert e1.id != e2.id # make Homer a coauthor to both api.add_persons_to_entries(session, [e1, e2], ['Simpson'], 13, 2) # and Keanu also contributed to e2 as a contributor api.add_persons_to_entries(session, [e2], ['Reeves'], 15, 3) return True
def add_group(session, group_type, entry_ids, title=None, description=None): """ .. versionadded:: 0.2 Adds a new EntryGroup to the database. The Entry(s) have to exist in the database to be associated correctly. Parameters ---------- session : sqlalchemy.Session SQLAlchemy session connected to the database. group_type : int, str Either :class:`EntryGroupType <metacatalog.models.EntryGroupType>` id or name to be used. entry_ids : list of int List of :class:`Entry.id <metacatalog.models.Entry>` to be associated tp this Group. title : str Optional title of this Group. Mandatory, if the type is a 'Project' description : str Optional description of this Group. Mandatory is the type is a 'Project'. Returns ------- entry: metacatalog.models.EntryGroup EntryGroup instance of the added group """ # check ids if isinstance(entry_ids, int): entry_ids = [entry_ids] if not all([isinstance(i, int) for i in entry_ids]): raise AttributeError("entry_ids has to be a list of integers.") # get the type if isinstance(group_type, int): type_ = api.find_group_type(session, id=group_type)[0] elif isinstance(group_type, str): type_ = api.find_group_type(session, name=group_type)[0] else: type_ = group_type if not isinstance(type_, models.EntryGroupType): raise AttributeError("The group_type has to be int or str.") if type_.name == 'Project' and (title is None or description is None): raise AttributeError("Projects must not omit title and description.") # load entries entries = [ api.find_entry(session, include_partial=True, id=id_)[0] for id_ in entry_ids ] attr = dict(title=title, description=description, type=type_, entries=entries) return add_record(session, 'entry_groups', **attr)
def add_split_dataset(session): # create dummy data data = pd.DataFrame( data={ 'value': np.random.normal(10, 1, size=350), 'tstamp': pd.date_range('201309241100', periods=350, freq='15min') }) data.set_index('tstamp', inplace=True) # add two entries as split datasets kit = api.find_person(session, organisation_abbrev='KIT')[0] historical_entry = api.add_entry(session, title='Historical data', abstract='Long descirption', location=(4.2, 42), variable=1, license=6, author=kit.id) recent_entry = api.add.add_entry( session, title='Recent data', abstract='something bad happended that needs description', location=(4.2, 42), variable=1, license=6, author=kit.id) # create datasource historical_entry.create_datasource(type=1, path='timeseries', datatype='timeseries') recent_entry.create_datasource(type=1, path='timeseries', datatype='timeseries') # split the data historical_entry.import_data(data=data.iloc[:300, :]) recent_entry.import_data(data=data.iloc[300:, :]) full_dataset = api.add_group(session, 'Split dataset', [historical_entry.id, recent_entry.id]) # checkout result = api.find_entry(session, id=recent_entry.id, as_result=True)[0] # recover data db_data = result.get_data() # search for checksum - result.checksum is a checksum of member checksum, which is only one here assert len(result.checksums) == 1 checksum = result.checksums[0] assert checksum in db_data recovered_data = db_data[checksum].values assert_array_almost_equal(data.values, recovered_data) return True
def add_3D_data(session, df_3D_wind): """ Add Eddy 3D windspeed data to the eddy entry. """ entry_3D_wind = api.find_entry(session, title='3-dimensional windspeed data')[0] entry_3D_wind.import_data(df_3D_wind) return True
def check_find_with_wildcard(session): """ Check the wildcard pattern added in version 0.1.8 """ # match all titles entries = api.find_entry(session, title='Dummy *') assert len(entries) == 3 # match only the two abstracts entries = api.find_entry(session, abstract='%entry%') assert len(entries) == 2 assert entries[0].author.last_name == 'Curie' entries = api.find_entry(session, abstract='!*entry*') assert not any([e.title in ('Dummy 2', 'Dummy 3') for e in entries]) assert any([e.author.first_name == 'Keanu' for e in entries]) return True
def check_related_information(session): """ Run some assert statements on related information """ e1 = api.find_entry(session, title="Dummy 1")[0] e2 = api.find_entry(session, title="Dummy 2")[0] # do some tests assert len(e1.contributors) == 2 assert len(e2.contributors) == 3 # details e1_details = e1.details_dict(full=True) assert 'banana' in e1_details.keys() assert e1_details['foo']['value'] == 'bar 1' return True
def find_composite_entry(session): """ Find the Telegraph because it's a composite entry """ entry = api.find_entry(session, title='Telegraph')[0] assert 'telegraph from 1855' in entry.abstract return entry
def find_partial_invention(session): # do not find warp = api.find_entry(session, title='Warp drive', return_iterator=True).first() # it should not be found assert warp is None warp = api.find_entry(session, title='Warp drive', include_partial=True, return_iterator=True).first() # make the tests assert warp is not None assert len(warp.associated_groups) == 1 return True
def check_has_uuid(session): """ load UUID of all Entries and check that they are all different """ entries = api.find_entry(session) uuids = [e.uuid for e in entries] assert len(uuids) > 0 assert len(uuids) == len(set(uuids)) return True
def add_project_group(session): """ Group two entries into a group """ # get the entries e1 = api.find_entry(session, title="Dummy 1")[0] e2 = api.find_entry(session, title="Dummy 2")[0] # get the project type group_type = api.find_group_type(session, name='Composite')[0] # create a EntryGroup # TODO this needs an api endpoint. until then the models are used project = models.EntryGroup(title='Dummies', type=group_type, description='empty') project.entries.append(e1) project.entries.append(e2) session.add(project) session.commit() return True
def check_result_set(session): """ Load only one of the entries and create a ImmutableResultSet """ result = api.find_entry(session, title='Microphone', as_result=True)[0] assert isinstance(result, ImmutableResultSet) # there should assert len(result.uuids) == 3 # but only one author assert isinstance(result.get('author'), dict) return True
def get_entry_neighbors(session): # get the Van Gogh Museum gogh = api.find_entry(session, title="Van Gogh", return_iterator=True).one() # find only one - Stedelijk neighs = gogh.neighbors(distance=200) assert len(neighs) == 1 assert neighs[0].title == 'Stedelijk' # find Rijksmuseum as well more = gogh.neighbors(distance=550) assert len(more) == 2 return True
def read_3D_data(session): """ Read the 3D windspeed data and check column names. """ entry_3D_wind = api.find_entry(session, title='3-dimensional windspeed data')[0] dat = entry_3D_wind.get_data() # assert assert dat.columns[1] == 'v' assert dat.columns.tolist() == ['u', 'v', 'w'] assert dat.index[2] == pd.to_datetime("2018-01-01 01:30:00", format='%Y-%m-%d %H:%M:%S') assert dat['u'].mean() == pytest.approx(3.1, 0.05) return True
def find_by_project(session): dummies = api.find_group(session, title="Dumm%")[0] # create a project project = api.add_project(session, entry_ids=[e.id for e in dummies.entries], title='Awesome Project', description='Nice project for testing purposes') for pars in [ dict(project=project), dict(project=project.id), dict(project='Awesome%') ]: entries = api.find_entry(session, **pars) assert len(entries) == 2 assert set([e.title for e in entries]) == set(['Dummy 1', 'Dummy 2']) return True
def mutate_details(session): """ Check detail mutability """ # get the same entry as before e2 = api.find_entry(session, title="Dummy 2")[0] detail = [d for d in e2.details if d.key == 'answer'][0] # store detail id and check current value detail_id = detail.id assert detail.value == 42 # update detail.value = 1312 session.add(e2) session.commit() # reload from database updated = session.query( models.Detail).filter(models.Detail.id == detail_id).one() assert detail.value == 1312 return True
def create_3D_datasource(session, df_3D_wind): """ Add a datasource to the eddy entry. """ entry_3D_wind = api.find_entry(session, title='3-dimensional windspeed data')[0] entry_3D_wind.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_3D_wind.datasource.create_scale(resolution='30min', extent=(df_3D_wind.index[0], df_3D_wind.index[-1]), support=1.0, scale_dimension='temporal') session.commit() # assert assert entry_3D_wind.variable.column_names == ['u', 'v', 'w'] return True
def get_uuid(session: Session, uuid: str, not_found='raise'): """ .. versionadded:: 0.1.13 Return the Metacatalog object of given version 4 UUID. The supported objects are: - Entry - EntryGroup - Keyword .. versionadded:: 0.2.7 - Person """ # check if an Entry exists entry = api.find_entry(session, uuid=uuid) if entry is not None: return entry # check if Entrygroup exists group = api.find_group(session, uuid=uuid) if group is not None: return group # check if a Person exists person = api.find_person(session, uuid=uuid) if person is not None: return person # handle keyword keyword = api.find_keyword(session, uuid=uuid) if keyword is not None: return keyword if not_found == 'raise': raise NoResultFound("The UUID='%s' was not found." % uuid) else: return None
def check_composite_raises_error(session): with pytest.raises(TypeError) as excinfo: dummies = api.find_group(session, title="Dumm%")[0] api.find_entry(session, project=dummies) return "has to be of type 'Project'" in str(excinfo.value)
def add_persons_to_entries(session, entries, persons, roles, order): r"""Add person(s) to entrie(s) Adds associations between entries and persons. The Entry and Person instances have to already exist in the database. Each association has to further define the role of the person for the respective entry. Parameters ---------- session : sqlalchemy.Session SQLAlchemy session connected to the database. entries : list List of identifier or single identifier to load entries. If int, the Entry.id is assumed. If str, title is assumed. Can also pass a metacatalog.Entry object. persons : list List of identifier or single identifier to load persons. If int, Person.id is assumed, If str, Person.last_name is assumed. Can also pass a metacatalog.Person object. roles : list List of, or single role. The shape has to match the persons parameter. The role has to be identifies by id (int) or role name (str). order : list List of, or single order. The shape has to match the persons parameter. The order gives the ascending order of contributors on the respecive entry (after the author). Returns ------- void See Also -------- metacatalog.Entry metacatalog.Person metacatalog.PersonRole """ # check the input shapes if not isinstance(entries, list): entries = [entries] if not isinstance(persons, list): persons = [persons] if not isinstance(roles, list): roles = [roles] * len(persons) if not isinstance(order, list): order = [order] * len(persons) # add for each entry for entry_id in entries: # load the entry if isinstance(entry_id, models.Entry): entry = entry_id elif isinstance(entry_id, int): # TODO sort by version descending to get the lastest entry = api.find_entry(session=session, id=entry_id, return_iterator=True).first() elif isinstance(entry_id, str): # TODO sort by version descending to get the lastest entry = api.find_variable(session=session, title=entry_id, return_iterator=True).first() else: raise AttributeError("Value '%s' not allowed for entries" % str(type(entry_id))) # add each person assocs = [] for person_id, role_id, order_num in zip(persons, roles, order): # load the person if isinstance(person_id, models.Person): person = person_id elif isinstance(person_id, int): person = api.find_person(session=session, id=person_id, return_iterator=True).one() elif isinstance(person_id, str): person = api.find_person(session=session, last_name=person_id, return_iterator=True).first() else: raise AttributeError( 'Persons can only be identified by id or last_name') # load the role if isinstance(role_id, models.PersonRole): role = role_id elif isinstance(role_id, int): role = api.find_role(session=session, id=role_id, return_iterator=True).one() elif isinstance(role_id, str): role = api.find_role(session=session, name=role_id, return_iterator=True).first() else: raise AttributeError( 'Roles can only be identified by id or name') # create the new association assocs.append( models.PersonAssociation(entry=entry, person=person, role=role, order=order_num)) # add each person to entry try: entry.contributors.extend(assocs) session.add(entry) session.commit() except Exception as e: session.rollback() raise e
def add_keywords_to_entries(session, entries, keywords, alias=None): r"""Associate keyword(s) to entrie(s) Adds associations between entries and keywords. The Entry and Keyword instances have to already exist in the database. Keywords are usually prepopulated. You might want to alias an keyword or associate a value to it. Use the alias and value lists for this. Parameters ---------- session : sqlalchemy.Session SQLAlchemy session connected to the database. entries : list List of identifier or single identifier to load entries. If int, the Entry.id is assumed. If str, title is assumed. Can also pass a metacatalog.Entry object. keywords : list List of identifier or single identifier to load keywords. If int, Keyword.id is assumed, If str, Keyword.value is assumed. Can also pass a metacatalog.Keyword object. alias : list List of, or single alias names. The shape has to match the keywords parameter. These alias will rename the keywords on association. In case one instance should not recive an alias, pass None instead. .. deprecated:: 0.4.5 'alias' will be removed with a future release Returns ------- void See Also -------- metacatalog.Entry metacatalog.Keyword """ # check the input shapes if not isinstance(entries, list): entries = [entries] if not isinstance(keywords, list): keywords = [keywords] # if not isinstance(alias, list): # alias = [alias] * len(keywords) # add for each entry for entry_id in entries: # load the entry if isinstance(entry_id, models.Entry): entry = entry_id elif isinstance(entry_id, int): # TODO sort by version descending to get the lastest entry = api.find_entry(session=session, id=entry_id, return_iterator=True).first() elif isinstance(entry_id, str): # TODO sort by version descending to get the lastest entry = api.find_entry(session=session, title=entry_id, return_iterator=True).first() else: raise AttributeError("Value '%s' not allowed for entries" % str(type(entry_id))) # add each keyword assocs = [] for keyword_id in keywords: # load the keyword if isinstance(keyword_id, models.Keyword): keyword = keyword_id elif isinstance(keyword_id, int): keyword = api.find_keyword(session=session, id=keyword_id, return_iterator=True).first() elif isinstance(keyword_id, str): keyword = api.find_keyword(session=session, value=keyword_id, return_iterator=True).first() else: raise AttributeError("Value '%s' not allowed for keywords" % str(type(keyword_id))) # create a new keyword association assocs.append( models.KeywordAssociation(entry=entry, keyword=keyword)) # add keyword to current entry try: entry.keywords.extend(assocs) session.add(entry) session.commit() except Exception as e: session.rollback() raise e
def find(args): # get the session session = connect(args) # get the entity entity = args.entity # set by to an empty list if not given if args.by is None: args.by = [] # parse out the BY arguments kwargs=dict() for by in args.by: # if len(by) != 2: kwargs[by[0]] = by[1] # switch entity if entity.lower() == 'units' or entity.lower() == 'unit': results = api.find_unit(session, **kwargs) elif entity.lower() == 'variables' or entity.lower() == 'variable': results = api.find_variable(session, **kwargs) elif entity.lower() == 'licenses' or entity.lower() == 'license': results = api.find_license(session, **kwargs) elif entity.lower() == 'keywords' or entity.lower() == 'keyword': results = api.find_keyword(session, **kwargs) elif entity.lower() == 'roles' or entity.lower() == 'role': results = api.find_role(session, **kwargs) elif entity.lower() == 'persons' or entity.lower() == 'person': results = api.find_person(session, **kwargs) elif entity.lower() == 'group_types' or entity.lower() == 'group_type': results = api.find_group_type(session, **kwargs) elif entity.lower() == 'groups' or entity.lower() == 'group': results = api.find_group(session, **kwargs) elif entity.lower() == 'entries' or entity.lower() == 'entry': if args.include_partial: kwargs['include_partial'] = True results = api.find_entry(session, **kwargs) elif entity.lower() == 'thesaurus': results = api.find_thesaurus(session, **kwargs) else: cprint(args, 'Oops. Finding %s is not supported.' % entity) exit(0) if args.export is not None and args.export != '': # only entry and group can be exported if entity.lower() not in ('entry', 'group'): cprint(args, 'Can only export entity=Entry and entity=Group') return # get the fmt and path path = args.export fmt = args.export.split('.')[-1] fmt = 'netCDF' if fmt == 'nc' else fmt # check amount of results if len(results) == 1: results[0].export(path=path, fmt=fmt) cprint(args, f'Wrote {path}.') else: for i, result in enumerate(results): path = '.'.join([*args.export.split('.')[:-1], f'_{i}', args.export.split('.')[-1]]) result.export(path=path, fmt=fmt) cprint(args, f'Wrote {len(results)} files.') return # switch the output if args.json: obj = [serialize(r) for r in results] cprint(args, json.dumps(obj, indent=4)) elif args.csv: obj = [flatten(serialize(r)) for r in results] f = io.StringIO(newline='') colnames = set([n for o in obj for n in o.keys()]) writer = csv.DictWriter(f, fieldnames=colnames, quotechar='"', quoting=csv.QUOTE_NONNUMERIC, lineterminator='\r') writer.writeheader() for o in obj: writer.writerow(o) f.seek(0) cprint(args, f.getvalue()) else: # stdOut for result in results: cprint(args, result)
def find(args): # get the session session = connect(args) # get the entity entity = args.entity # set by to an empty list if not given if args.by is None: args.by = [] # parse out the BY arguments kwargs = dict() for by in args.by: # if len(by) != 2: kwargs[by[0]] = by[1] # switch entity if entity.lower() == 'units' or entity.lower() == 'unit': results = api.find_unit(session, **kwargs) elif entity.lower() == 'variables' or entity.lower() == 'variable': results = api.find_variable(session, **kwargs) elif entity.lower() == 'licenses' or entity.lower() == 'license': results = api.find_license(session, **kwargs) elif entity.lower() == 'keywords' or entity.lower() == 'keyword': results = api.find_keyword(session, **kwargs) elif entity.lower() == 'roles' or entity.lower() == 'role': results = api.find_role(session, **kwargs) elif entity.lower() == 'persons' or entity.lower() == 'person': results = api.find_person(session, **kwargs) elif entity.lower() == 'group_types' or entity.lower() == 'group_type': results = api.find_group_type(session, **kwargs) elif entity.lower() == 'groups' or entity.lower() == 'group': results = api.find_group(session, **kwargs) elif entity.lower() == 'entries' or entity.lower() == 'entry': results = api.find_entry(session, **kwargs) elif entity.lower() == 'thesaurus': results = api.find_thesaurus(session, **kwargs) else: cprint(args, 'Oops. Finding %s is not supported.' % entity) exit(0) # switch the output if args.json: obj = [serialize(r) for r in results] cprint(args, json.dumps(obj, indent=4)) elif args.csv: obj = [flatten(serialize(r)) for r in results] f = io.StringIO(newline='') colnames = set([n for o in obj for n in o.keys()]) writer = csv.DictWriter(f, fieldnames=colnames, quotechar='"', quoting=csv.QUOTE_NONNUMERIC, lineterminator='\r') writer.writeheader() for o in obj: writer.writerow(o) f.seek(0) cprint(args, f.getvalue()) else: # stdOut for result in results: cprint(args, result)
def find_by_author(session): entries = api.find_entry(session, author='Reev*') assert len(entries) == 2 assert set([e.title for e in entries]) == set(['Dummy 1', 'Dummy 3']) return True