예제 #1
0
def _index(doc, metadata, op_type='index', _addon=MappingProxyType({})):

    assert isinstance(doc, ValidatedDict)
    assert isinstance(metadata, ValidatedDict)

    _now = datetime.now(timezone.utc)

    dataset = ESDataset(**doc)
    dataset['_meta'] = metadata
    dataset['_n3c'] = {
        "url": _addon.get("n3c_url"),
        "status": _addon.get("n3c_status"),
        "timestamp": _addon.get("n3c_timestamp")
    }
    dataset['_ts'] = {
        "date_created": _addon.get("date_created") or _now,
        "last_updated": _now
    }

    try:
        dataset.save(op_type=op_type)
    except elasticsearch_dsl.ValidationException as exc:
        raise DatasetValidationError(str(exc))
    except elasticsearch.exceptions.ConflictError:
        raise ConflictError("document already exists.")

    return dataset
예제 #2
0
def update_url():
    for title, url in get_titles():
        print(title)
        print(url)
        search = Dataset.search()
        search = search.query("match", name=title)
        for dataset in search:
            # since we did a match query on a text field
            if dataset.name == title:
                print(dataset.meta.id)
                _dataset = Dataset.get(dataset.meta.id)
                _dataset["_n3c"] = {"url": url}
                _dataset.save()
        print()
예제 #3
0
def _clean(metadict, defdict=None):

    defdict = defdict or {}  # defaults
    assert isinstance(metadict, dict)
    assert isinstance(defdict, dict)

    # declared fields in database

    _meta = ESDataset._ObjectBase__get_field('_meta')._doc_class()
    fields = {field for field, _, _ in _meta._ObjectBase__list_fields()}

    # auto-correct

    field_to_aliases = {
        # database key: (aliases, )
        "username": ("user", "owner"),
        "class_id": ("schema", "schema_class", "schema_class_id", "type"),
    }

    alias_to_field = {}
    for key, aliases in field_to_aliases.items():
        for alias in aliases:
            alias_to_field[alias] = key

    _metadata = {}
    E01 = "repeated metadata field '{}'."
    E02 = "unsupported metadata field '{}'."
    for key, val in metadict.items():
        if key in fields:
            if key in _metadata:
                raise RegistryError(E01.format(key))
            _metadata[key] = val
        elif key in alias_to_field:
            if alias_to_field[key] in _metadata:
                raise RegistryError(E01.format(key))
            _metadata[alias_to_field[key]] = val
        else:  # undefined key
            raise RegistryError(E02.format(key))

    # default values

    if defdict:
        defdict = _clean(defdict)
    for key, val in defdict.items():
        if key not in _metadata:
            _metadata[key] = val

    # result

    class AliasDict(ValidatedDict):
        def __getitem__(self, key):
            if key in alias_to_field:  # alias
                key = alias_to_field[key]
            return super().__getitem__(key)

    return AliasDict(_metadata)
예제 #4
0
def exists(anyid=None, **multi_match):  # TODO multimatch
    """
    Check if a document exists by its id fields.
    Or optionally provide other criterions.

    Examples:
        dataset.exists('83dc3401f86819de')
        dataset.exists('EGAD00001003941')
        dataset.exists(name="Wellderly Dataset from Scripps CTSA center")

    """
    if not any((anyid, multi_match)):
        raise RegistryError("specify at least one condition.")

    if anyid:
        return ESDataset.exists(_id=anyid) or \
            ESDataset.exists(identifier=anyid)

    return ESDataset.exists(**multi_match)
예제 #5
0
def get_meta(_id):
    """
    Retrieve a dataset file's metadata.
    """
    dataset = ESDataset.get(id=_id, ignore=404, _source="_meta")

    if dataset:
        return RegistryDocument.wraps(dataset).meta

    raise NoEntityError(f"dataset {_id} does not exist.")
예제 #6
0
def get(_id):
    """
    Retrieve a dataset document with its _id.
    Identifier field is not possible to be used here.
    This way, we have a weak privacy assurance.
    """
    dataset = ESDataset.get(id=_id, ignore=404)

    if dataset:
        return RegistryDocument.wraps(dataset)

    raise NoEntityError(f"dataset {_id} does not exist.")
예제 #7
0
def delete(_id):
    """
    Delete a dataset metadata document.
    If you only have the identifier, use get function
    to lookup the _id and then delete with _id.
    Return the name of the metadata to confirm.
    """
    dataset = ESDataset.get(id=_id, ignore=404)

    if not dataset:
        raise NoEntityError(f"dataset {_id} does not exist.")

    dataset.delete()

    return dataset.name
예제 #8
0
def _build(metafilter):

    assert isinstance(metafilter, ValidatedDict)

    # special consideration for field 'private'
    private = metafilter.pop('private', None)
    # pass the rest as _meta field filters
    search = ESDataset.find(**metafilter)

    if private:  # if explicitly want private datasets, only return private ones
        search = search.filter('match', _meta__private=True)
    else:  # if not, only return public datasets of that criterion
        search = search.exclude('match', _meta__private=True)
    # private datasets and public ones are never returned together

    return search
예제 #9
0
def update(_id, new_doc, **metadata):
    """
    Update a dataset metadata document.
    Return the version after update. (1, 2, ...)
    """
    # NOTE
    # Internally, the update is performed by
    # Revalidating and replacing the original document.

    new_doc = ensure_document(new_doc)
    dataset = ESDataset.get(id=_id, ignore=404)

    if not dataset:
        raise NoEntityError(f"dataset {_id} does not exist.")

    # Cannot change the identifier field, because it would result
    # in changing the document _id. Delete and add again instead.
    if new_doc.get('identifier') != dataset.identifier:
        raise ConflictError("cannot change identifier field.")

    # NOTE **important**
    # Patch the original document metadata with the partial update.
    _meta = dataset['_meta'].to_dict()
    _meta.update(_clean(new_doc.pop('_meta', {})))
    _meta.update(_clean(metadata))
    _meta = _clean(_meta)

    new_doc = validate(new_doc, _meta['schema'])

    dataset = _index(
        new_doc,
        _meta,
        _addon={
            # Carry over our internal metadata like
            # N3C ticket info and creation timestamp.
            "date_created": dataset._ts.date_created,
            "n3c_url": dataset._n3c.url,
            "n3c_status": dataset._n3c.status,
            "n3c_timestamp": dataset._n3c.timestamp
        })

    return dataset.meta.version