def match_filter(self, uristr): uristrs = uri.expand(uristr) clauses = [{"match": {"uri": u}} for u in uristrs] if len(clauses) == 1: return {"query": clauses[0]} return {"query": {"bool": {"should": clauses}}}
def __call__(self, params): uristr = params.pop('uri', None) if uristr is None: return None scopes = [uri.normalize(u) for u in uri.expand(uristr)] return {"terms": {"target.scope": scopes}}
def _match_clause_for_uri(uristr): """Return an Elasticsearch match clause dict for the given URI.""" uristrs = uri.expand(uristr) matchers = [{"match": {"uri": u}} for u in uristrs] if len(matchers) == 1: return matchers[0] return {"bool": {"minimum_should_match": 1, "should": matchers}}
def test_expand_document_uris(document_model): document_model.get_by_uri.return_value.uris.return_value = [ "http://foo.com/", "http://bar.com/", ] assert uri.expand("http://example.com/") == [ "http://foo.com/", "http://bar.com/", ]
def _expand_uris(self, clause): uris = clause['value'] expanded = set() if not isinstance(uris, list): uris = [uris] for item in uris: expanded.update(uri.expand(item)) clause['value'] = list(expanded)
def _term_clause_for_uri(uristr): """Return an Elasticsearch term clause for the given URI.""" uristrs = uri.expand(uristr) filters = [{"term": {"target.scope": uri.normalize(u)}} for u in uristrs] if len(filters) == 1: return filters[0] return { "or": filters }
def _term_clause_for_uri(uristr): """Return an Elasticsearch term clause for the given URI.""" uristrs = uri.expand(uristr) filters = [{ "term": { "target.source_normalized": uri.normalize(u) } } for u in uristrs] if len(filters) == 1: return filters[0] return {"or": filters}
def test_expand_document_doesnt_expand_canonical_uris(document_model): document = document_model.get_by_uri.return_value document.get.return_value = [ {"href": "http://foo.com/"}, {"href": "http://bar.com/"}, {"href": "http://example.com/", "rel": "canonical"}, ] document.uris.return_value = [ "http://foo.com/", "http://bar.com/", "http://example.com/", ] assert uri.expand("http://example.com/") == ["http://example.com/"]
def _match_clause_for_uri(uristr): """Return an Elasticsearch match clause dict for the given URI.""" uristrs = uri.expand(uristr) matchers = [{"match": {"uri": u}} for u in uristrs] if len(matchers) == 1: return matchers[0] return { "bool": { "minimum_should_match": 1, "should": matchers } }
def test_expand_no_document(document_model): document_model.get_by_uri.return_value = None assert uri.expand("http://example.com/") == ["http://example.com/"]
def term_filter(self, uristr): scopes = [uri.normalize(u) for u in uri.expand(uristr)] return {"terms": {"target.scope": scopes}}