def get_metadata(self, nid: str=None, relation: str=None, nid_is_source: bool=True): """ :param nid: node id :param relation: the relation to search for :param nid_is_source: true iff nid is the source of the relation :return: metadata that reference the nid with the given relation, or all metadata if fields are empty """ match_source_id = {"term": {"source": nid}} match_target_id = {"nested": {"path": "target", "query": { "bool": {"should": [{"term": {"target.id": nid}}]} }}} if nid is None: body = {"query": {"match_all": {}}} elif relation is None: body = {"query": {"bool": {"should": [ match_source_id, match_target_id ]}}} else: match_id = match_source_id if nid_is_source else match_target_id body = {"query": {"bool": {"must": [ match_id, {"nested": {"path": "target", "query": { "bool": {"should": [{"term": {"target.type": relation}}]} }}} ]}}} res = client.search(index='metadata', doc_type="annotation", body=body, scroll="10m", filter_path=[ 'hits.hits._id', 'hits.total', 'hits.hits._source.author', 'hits.hits._source.class', 'hits.hits._source.source', 'hits.hits._source.target', 'hits.hits._source.text']) if res["hits"]["total"] == 0: return md_hits = [] for md in res["hits"]["hits"]: md_hit = MDHit(md["_id"], md["_source"]["author"], md["_source"]["class"], md["_source"]["text"], md["_source"]["source"], md["_source"]["target"]["id"], md["_source"]["target"]["type"]) md_hits.append(md_hit) yield md_hit for hit in md_hits: for comment in self.get_comments(hit.id): yield comment
def search_keywords_md(self, keywords: list, max_hits=15): """ Performs a search query on metadata to match the provided keywords :param keywords: the list of keywords to match :param max_hits: max number of results to return :return: the metadata that contain the keywords """ index = "metadata" body = { "from": 0, "size": max_hits, "query": { "bool": { "should": [{ "match": { "text": keywords } }, { "nested": { "path": "tags", "query": { "bool": { "should": [{ "match": { "tags.tag": keywords } }] } } } }] } } } filter_path = [ 'hits.total', 'hits.hits._type', 'hits.hits._id', 'hits.hits._parent', 'hits.hits._source.author', 'hits.hits._source.class', 'hits.hits._source.source', 'hits.hits._source.target', 'hits.hits._source.text' ] res = client.search(index=index, body=body, filter_path=filter_path) if res['hits']['total'] == 0: return [] for el in res['hits']['hits']: if el["_type"] == "comment": yield MDComment(el["_id"], el["_source"]["author"], el["_source"]["text"], el["_parent"]) elif el["_type"] == "annotation": yield MDHit(el["_id"], el["_source"]["author"], el["_source"]["class"], el["_source"]["text"], el["_source"]["source"], el["_source"]["target"]["id"], el["_source"]["target"]["type"])
def add_annotation(self, author: str, text: str, md_class: str, source: str, target={ "id": None, "type": None }, tags=[]): """ Adds annotation document to the elasticsearch graph. :param author: user or process who wrote the metadata :param text: free text annotation :param md_class: metadata class :param source: nid of column source :param target: (optional) { "id": nid of column target, "type": metadata relation } :param tags: (optional) keyword tags :return: an MDHit of the new annotation """ timestamp = self._current_time() mapped_tags = [] for tag in tags: mapped_tags.append({ "author": author, "creation_date": timestamp, "tag": tag }) body = { "author": author, "text": text, "class": md_class, "source": source, "target": target, "tags": mapped_tags, "creation_date": timestamp, "updated_date": timestamp } res = client.create(index='metadata', doc_type='annotation', body=body) hit = MDHit(res["_id"], author, md_class, text, source, target["id"], target["type"]) return hit
def add_tags(self, author: str, tags: list, md_id: str): """ Add tags to the annotation with the given md_id. :param author: identifiable name of user or process :param tags: list of tags :param md_id: metadata id :return: an MDHit of the updated annotation """ timestamp = self._current_time() res = client.search(index='metadata', doc_type='annotation', body={"query": { "terms": { "_id": [md_id] } }}) if res["hits"]["total"] == 0: raise ValueError("Given md_id does not exist.") source = res["hits"]["hits"][0]["_source"] new_tags = [] for tag in tags: new_tags.append({ "author": author, "creation_date": timestamp, "tag": tag }) new_tags.extend(source["tags"]) body = {"doc": {"updated_date": timestamp, "tags": new_tags}} res = client.update(index='metadata', doc_type='annotation', id=md_id, body=body) return MDHit(res["_id"], author, source["class"], source["text"], source["source"], source["target"]["id"], source["target"]["type"])