def check_find_person(session): p0 = api.find_person(session, last_name='Reeves') p1 = api.find_person(session, attribution='Curie, Marie, Awesome.') p2 = api.find_person(session, uuid='1e8402f8-4d11-41dd-85c7-93f9d95225e1') assert p0[0].first_name == 'Keanu' assert p1[0].uuid == 'e05c3360-1120-4c9b-a181-659da986b061' assert p2.organisation_name == 'University of Non-existent people' return True
def add_partial_invention(session): """ Add a partial invention to 'Inventions made by David Edward Hughes' This is used to test the find_entry behavior """ hughes = api.find_person(session, last_name='Hughes', return_iterator=True).one() e3 = models.Entry(title='Warp drive', abstract='Started, but never finished', location="SRID=4326;POINT (51.505946 -0.132951)", license_id=5, variable_id=1, is_partial=True) e3.contributors.append( models.PersonAssociation(relationship_type_id=1, person=hughes, order=1)) inventions = api.find_group(session, title='Awesome inventions', return_iterator=True).one() inventions.entries.append(e3) try: session.add(e3) session.commit() except Exception as e: session.rollback() raise e assert len(inventions.entries) == 3 return True
def add_split_dataset(session): # create dummy data data = pd.DataFrame( data={ 'value': np.random.normal(10, 1, size=350), 'tstamp': pd.date_range('201309241100', periods=350, freq='15min') }) data.set_index('tstamp', inplace=True) # add two entries as split datasets kit = api.find_person(session, organisation_abbrev='KIT')[0] historical_entry = api.add_entry(session, title='Historical data', abstract='Long descirption', location=(4.2, 42), variable=1, license=6, author=kit.id) recent_entry = api.add.add_entry( session, title='Recent data', abstract='something bad happended that needs description', location=(4.2, 42), variable=1, license=6, author=kit.id) # create datasource historical_entry.create_datasource(type=1, path='timeseries', datatype='timeseries') recent_entry.create_datasource(type=1, path='timeseries', datatype='timeseries') # split the data historical_entry.import_data(data=data.iloc[:300, :]) recent_entry.import_data(data=data.iloc[300:, :]) full_dataset = api.add_group(session, 'Split dataset', [historical_entry.id, recent_entry.id]) # checkout result = api.find_entry(session, id=recent_entry.id, as_result=True)[0] # recover data db_data = result.get_data() # search for checksum - result.checksum is a checksum of member checksum, which is only one here assert len(result.checksums) == 1 checksum = result.checksums[0] assert checksum in db_data recovered_data = db_data[checksum].values assert_array_almost_equal(data.values, recovered_data) return True
def auto_force_data_names(session, df_1D_wind, df_3D_prec): """ If len(data_columns) != len(entry.variable.column_names) force_data_names should automatically become True and the column names of the imported data should be saved in datasource.data_names. To test this, we add 1D wind data (with 3D precision) to the 3D wind variable with variable.column_names=['u', 'v', 'w']. """ # find the variable var_3D_wind = api.find_variable(session, name='3D-wind')[0] # find the previously added person kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_1D_precision = api.add_entry( session, title='1-dimensional windspeed data, precision', abstract='1-dimensional windspeed data', location=(8, 52), variable=var_3D_wind.id, comment='after double rotation', license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_1D_precision.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_1D_precision.datasource.create_scale(resolution='30min', extent=(df_1D_wind.index[0], df_1D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_1D_precision.import_data(data=df_1D_wind, precision=df_3D_prec, force_data_names=False) #load data dat = entry_1D_precision.get_data() # assert assert dat.columns.tolist() == [ 'u_ms', 'precision1', 'precision2', 'precision3' ] assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05) return True
def precision_test(session, df_3D_wind, df_3D_prec): """ Test if precision columns are handled correctly. We use the 3D eddy wind data with 3 precision columns for this. """ # find the variable var_3D_wind = api.find_variable(session, name='3D-wind')[0] # find the previously added person kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_3D_precision = api.add_entry( session, title='3-dimensional windspeed data, precision', abstract='3-dimensional windspeed data from the Fendt data set', location=(8, 52), variable=var_3D_wind.id, comment='after double rotation', license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_3D_precision.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_3D_precision.datasource.create_scale(resolution='30min', extent=(df_3D_wind.index[0], df_3D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_3D_precision.import_data(data=df_3D_wind, precision=df_3D_prec, force_data_names=False) #load data dat = entry_3D_precision.get_data() # assert assert dat.columns.tolist() == [ 'u', 'v', 'w', 'precision1', 'precision2', 'precision3' ] # note: input was 'precision_1' assert dat['u'].mean() == pytest.approx(3.1, 0.05) return True
def force_data_names_true(session, df_3D_wind): """ Test force_data_names=True when loading the data into the database. In this case, datasource.data_names will be overwritten with the column names of the imported data, when exporting the data, these column col_names will be displayed. We use the 3D eddy wind data for this again. """ # find the variable var_3D_wind = api.find_variable(session, name='3D-wind')[0] # find the previously added author kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_3D_force_data_names = api.add_entry( session, title='3-dimensional windspeed data, force_data_names', abstract='3-dimensional windspeed data from the Fendt data set', location=(8, 52), variable=var_3D_wind.id, comment='after double rotation', license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_3D_force_data_names.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_3D_force_data_names.datasource.create_scale( resolution='30min', extent=(df_3D_wind.index[0], df_3D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_3D_force_data_names.import_data(df_3D_wind, force_data_names=True) #load data dat = entry_3D_force_data_names.get_data() # assert assert dat.columns.tolist() == ['u_ms', 'v_ms', 'w_ms'] assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05) return True
def one_dim_data(session, df_1D_wind): """ Do the same as above, but with one-dimensional data instead. """ # add the variable var_1D_wind = api.add_variable(session, name='1D-wind', symbol='u', column_names=['u'], unit=107) # find the previously added author kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_1D_wind = api.add_entry( session, title='1-dimensional windspeed data', abstract='1-dimensional windspeed data from the Fendt data set', location=(8, 52), variable=var_1D_wind.id, license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_1D_wind.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_1D_wind.datasource.create_scale(resolution='30min', extent=(df_1D_wind.index[0], df_1D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_1D_wind.import_data(df_1D_wind) # read data dat = entry_1D_wind.get_data() # assert assert dat.columns == 'u' assert dat['u'].mean() == pytest.approx(3.1, 0.05) return True
def get_uuid(session: Session, uuid: str, not_found='raise'): """ .. versionadded:: 0.1.13 Return the Metacatalog object of given version 4 UUID. The supported objects are: - Entry - EntryGroup - Keyword .. versionadded:: 0.2.7 - Person """ # check if an Entry exists entry = api.find_entry(session, uuid=uuid) if entry is not None: return entry # check if Entrygroup exists group = api.find_group(session, uuid=uuid) if group is not None: return group # check if a Person exists person = api.find_person(session, uuid=uuid) if person is not None: return person # handle keyword keyword = api.find_keyword(session, uuid=uuid) if keyword is not None: return keyword if not_found == 'raise': raise NoResultFound("The UUID='%s' was not found." % uuid) else: return None
def add_persons_to_entries(session, entries, persons, roles, order): r"""Add person(s) to entrie(s) Adds associations between entries and persons. The Entry and Person instances have to already exist in the database. Each association has to further define the role of the person for the respective entry. Parameters ---------- session : sqlalchemy.Session SQLAlchemy session connected to the database. entries : list List of identifier or single identifier to load entries. If int, the Entry.id is assumed. If str, title is assumed. Can also pass a metacatalog.Entry object. persons : list List of identifier or single identifier to load persons. If int, Person.id is assumed, If str, Person.last_name is assumed. Can also pass a metacatalog.Person object. roles : list List of, or single role. The shape has to match the persons parameter. The role has to be identifies by id (int) or role name (str). order : list List of, or single order. The shape has to match the persons parameter. The order gives the ascending order of contributors on the respecive entry (after the author). Returns ------- void See Also -------- metacatalog.Entry metacatalog.Person metacatalog.PersonRole """ # check the input shapes if not isinstance(entries, list): entries = [entries] if not isinstance(persons, list): persons = [persons] if not isinstance(roles, list): roles = [roles] * len(persons) if not isinstance(order, list): order = [order] * len(persons) # add for each entry for entry_id in entries: # load the entry if isinstance(entry_id, models.Entry): entry = entry_id elif isinstance(entry_id, int): # TODO sort by version descending to get the lastest entry = api.find_entry(session=session, id=entry_id, return_iterator=True).first() elif isinstance(entry_id, str): # TODO sort by version descending to get the lastest entry = api.find_variable(session=session, title=entry_id, return_iterator=True).first() else: raise AttributeError("Value '%s' not allowed for entries" % str(type(entry_id))) # add each person assocs = [] for person_id, role_id, order_num in zip(persons, roles, order): # load the person if isinstance(person_id, models.Person): person = person_id elif isinstance(person_id, int): person = api.find_person(session=session, id=person_id, return_iterator=True).one() elif isinstance(person_id, str): person = api.find_person(session=session, last_name=person_id, return_iterator=True).first() else: raise AttributeError( 'Persons can only be identified by id or last_name') # load the role if isinstance(role_id, models.PersonRole): role = role_id elif isinstance(role_id, int): role = api.find_role(session=session, id=role_id, return_iterator=True).one() elif isinstance(role_id, str): role = api.find_role(session=session, name=role_id, return_iterator=True).first() else: raise AttributeError( 'Roles can only be identified by id or name') # create the new association assocs.append( models.PersonAssociation(entry=entry, person=person, role=role, order=order_num)) # add each person to entry try: entry.contributors.extend(assocs) session.add(entry) session.commit() except Exception as e: session.rollback() raise e
def add_entry(session, title, author, location, variable, abstract=None, external_id=None, geom=None, license=None, embargo=False, **kwargs): r"""Add new Entry Adds a new metadata Entry to the database. This method will create the core entry. Usually, more steps are necessary, which will need the newly created database ID. Such steps are: * adding contributors (mandatory) * adding data (extremly useful) * adding keywords (recommended) Parameters ---------- session : sqlalchemy.Session SQLAlchemy session connected to the database. title : str Title of the Entry author : int, str First author of the Entry. The Person record has to exist already in the database and can be found by exact match on id (int) or last_name (str). location : str, tuple Can be either a WKT of a EPSG:4326 location, or the coordinates as a tuple. It has to be (X,Y), to (longitude, latitude) variable : int, str **Full** variable name (str) or ID (int) of the data described by the Entry. abstract : str Description of the data. Be as detailed as possible external_id : str If the data described by Entry has another unique identifier, usually supplied by the data provider, it can be stored for reference reasons. comment : str General purpose comment that should not contain any vital information to understand the entry. If it's vital, it should go into the abstract. geom : str WKT of any additional geoinformation in EPSG:4326 license : str, int Either the id or **full** name of the license to be linked to this Entry. embargo : bool If True, this Entry will **not** be publicly available until the embargo ends The embargo period is usually 2 years but can be modified using the kwargs. Returns ------- entry: metacatalog.Entry Entry instance of the added entry entity """ # create the attribute dict attr = dict(title=title, abstract=abstract, external_id=external_id, embargo=embargo) attr.update(kwargs) # parse the author if isinstance(author, int): author = api.find_person(session=session, id=author, return_iterator=True).one() elif isinstance(author, str): author = api.find_person(session=session, last_name=author, return_iterator=True).first() else: raise AttributeError('author has to be of type int or str') # parse the location and geom if isinstance(location, str): attr['location'] = location elif isinstance(location, (tuple, list)): attr['location'] = 'SRID=4326;POINT (%f %f)' % (location[0], location[1]) if geom is not None and isinstance(geom, str): attr['geom'] = geom # handle variable if isinstance(variable, int): variable = api.find_variable(session=session, id=variable, return_iterator=True).one() elif isinstance(variable, str): variable = api.find_variable(session=session, name=variable, return_iterator=True).first() else: raise AttributeError('variable has to be of type integer or string.') attr['variable_id'] = variable.id # handle license if isinstance(license, int): license = api.find_license(session=session, id=license, return_iterator=True).one() elif isinstance(license, str): license = api.find_license(session=session, short_title=license, return_iterator=True).first() if license is not None: attr['license_id'] = license.id # add the entry entry = add_record(session=session, tablename='entries', **attr) # reference the person using 'First Author' (ID=1) Role add_persons_to_entries(session, entry, author, 1, 1) return entry
def find(args): # get the session session = connect(args) # get the entity entity = args.entity # set by to an empty list if not given if args.by is None: args.by = [] # parse out the BY arguments kwargs = dict() for by in args.by: # if len(by) != 2: kwargs[by[0]] = by[1] # switch entity if entity.lower() == 'units' or entity.lower() == 'unit': results = api.find_unit(session, **kwargs) elif entity.lower() == 'variables' or entity.lower() == 'variable': results = api.find_variable(session, **kwargs) elif entity.lower() == 'licenses' or entity.lower() == 'license': results = api.find_license(session, **kwargs) elif entity.lower() == 'keywords' or entity.lower() == 'keyword': results = api.find_keyword(session, **kwargs) elif entity.lower() == 'roles' or entity.lower() == 'role': results = api.find_role(session, **kwargs) elif entity.lower() == 'persons' or entity.lower() == 'person': results = api.find_person(session, **kwargs) elif entity.lower() == 'group_types' or entity.lower() == 'group_type': results = api.find_group_type(session, **kwargs) elif entity.lower() == 'groups' or entity.lower() == 'group': results = api.find_group(session, **kwargs) elif entity.lower() == 'entries' or entity.lower() == 'entry': results = api.find_entry(session, **kwargs) elif entity.lower() == 'thesaurus': results = api.find_thesaurus(session, **kwargs) else: cprint(args, 'Oops. Finding %s is not supported.' % entity) exit(0) # switch the output if args.json: obj = [serialize(r) for r in results] cprint(args, json.dumps(obj, indent=4)) elif args.csv: obj = [flatten(serialize(r)) for r in results] f = io.StringIO(newline='') colnames = set([n for o in obj for n in o.keys()]) writer = csv.DictWriter(f, fieldnames=colnames, quotechar='"', quoting=csv.QUOTE_NONNUMERIC, lineterminator='\r') writer.writeheader() for o in obj: writer.writerow(o) f.seek(0) cprint(args, f.getvalue()) else: # stdOut for result in results: cprint(args, result)
def find(args): # get the session session = connect(args) # get the entity entity = args.entity # set by to an empty list if not given if args.by is None: args.by = [] # parse out the BY arguments kwargs=dict() for by in args.by: # if len(by) != 2: kwargs[by[0]] = by[1] # switch entity if entity.lower() == 'units' or entity.lower() == 'unit': results = api.find_unit(session, **kwargs) elif entity.lower() == 'variables' or entity.lower() == 'variable': results = api.find_variable(session, **kwargs) elif entity.lower() == 'licenses' or entity.lower() == 'license': results = api.find_license(session, **kwargs) elif entity.lower() == 'keywords' or entity.lower() == 'keyword': results = api.find_keyword(session, **kwargs) elif entity.lower() == 'roles' or entity.lower() == 'role': results = api.find_role(session, **kwargs) elif entity.lower() == 'persons' or entity.lower() == 'person': results = api.find_person(session, **kwargs) elif entity.lower() == 'group_types' or entity.lower() == 'group_type': results = api.find_group_type(session, **kwargs) elif entity.lower() == 'groups' or entity.lower() == 'group': results = api.find_group(session, **kwargs) elif entity.lower() == 'entries' or entity.lower() == 'entry': if args.include_partial: kwargs['include_partial'] = True results = api.find_entry(session, **kwargs) elif entity.lower() == 'thesaurus': results = api.find_thesaurus(session, **kwargs) else: cprint(args, 'Oops. Finding %s is not supported.' % entity) exit(0) if args.export is not None and args.export != '': # only entry and group can be exported if entity.lower() not in ('entry', 'group'): cprint(args, 'Can only export entity=Entry and entity=Group') return # get the fmt and path path = args.export fmt = args.export.split('.')[-1] fmt = 'netCDF' if fmt == 'nc' else fmt # check amount of results if len(results) == 1: results[0].export(path=path, fmt=fmt) cprint(args, f'Wrote {path}.') else: for i, result in enumerate(results): path = '.'.join([*args.export.split('.')[:-1], f'_{i}', args.export.split('.')[-1]]) result.export(path=path, fmt=fmt) cprint(args, f'Wrote {len(results)} files.') return # switch the output if args.json: obj = [serialize(r) for r in results] cprint(args, json.dumps(obj, indent=4)) elif args.csv: obj = [flatten(serialize(r)) for r in results] f = io.StringIO(newline='') colnames = set([n for o in obj for n in o.keys()]) writer = csv.DictWriter(f, fieldnames=colnames, quotechar='"', quoting=csv.QUOTE_NONNUMERIC, lineterminator='\r') writer.writeheader() for o in obj: writer.writerow(o) f.seek(0) cprint(args, f.getvalue()) else: # stdOut for result in results: cprint(args, result)