def get_records(self, ids): """Return records by their identifiers. :param ids: A list of record identifier. :returns: A list of records. """ return self.query(Ids(values=[str(id_) for id_ in ids]))
def get_record(self, id_): """Return a record by its identifier. :param id_: The record identifier. :returns: The record. """ return self.query(Ids(values=[str(id_)]))
def test_basic_stats(app, db, es, locations, event_queues, minimal_record): """Test basic statistics results.""" search = Search(using=es) records = create_stats_fixtures( # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files metadata=minimal_record, n_records=10, n_versions=2, n_files=3, event_data={'user_id': '1'}, # 4 event timestamps start_date=datetime(2018, 1, 1, 13), end_date=datetime(2018, 1, 1, 15), interval=timedelta(minutes=30)) # Events indices prefix = app.config['SEARCH_INDEX_PREFIX'] # 2 versions * 10 records * 3 files * 4 events -> 240 assert search.index(prefix + 'events-stats-file-download').count() == 240 # 2 versions * 10 records * 4 events -> 80 assert search.index(prefix + 'events-stats-record-view').count() == 80 # Aggregations indices # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks # 30d assert search.index(prefix + 'stats-file-download').count() == 30 # 30d assert search.index(prefix + 'stats-record-view').count() == 30 # 2bm + 2bm assert search.index(prefix + 'stats-bookmarks').count() == 4 # Records index for _, record, _ in records: query = search.index(prefix + '*') \ .query(Ids(values=[str(record.id)])) \ .source(include='_stats') doc = (query.execute()[0]) assert doc['_stats'] == { # 4 view events 'views': 4.0, 'version_views': 8.0, # 4 view events over 2 different hours 'unique_views': 2.0, 'version_unique_views': 2.0, # 4 download events * 3 files 'downloads': 12.0, 'version_downloads': 24.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 2.0, 'version_unique_downloads': 2.0, # 4 download events * 3 files * 10 bytes 'volume': 120.0, 'version_volume': 240.0, }
def lookup(self, ad_id, field): """ Get data from ad_id :param ad_id: str String to be queried """ if not isinstance(ad_id, list): ad_id = [ad_id] results = self.elastic.query(Ids(values=ad_id)).execute() return set(flatten([ hits['_source'][field] for hits in results.hits.hits if field in hits['_source'] ]))
def test_large_stats(app, db, es, locations, event_queues, minimal_record): """Test record page view event import.""" search = Search(using=es) records = create_stats_fixtures( # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files metadata=minimal_record, n_records=3, n_versions=4, n_files=2, event_data={'user_id': '1'}, # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day) start_date=datetime(2018, 3, 1), end_date=datetime(2018, 5, 1), interval=timedelta(hours=12)) # Events indices prefix = app.config['SEARCH_INDEX_PREFIX'] # 4 versions * 3 records * 2 files * 122 events -> 2928 assert search.index(prefix + 'events-stats-file-download').count() == 2928 # 4 versions * 3 records * 122 events -> 1464 assert search.index(prefix + 'events-stats-record-view').count() == 1464 # Aggregations indices # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks q = search.index(prefix + 'stats-file-download') q = q.doc_type('file-download-day-aggregation') assert q.count() == 915 # 61 days * 15 records q = search.index(prefix + 'stats-record-view') q = q.doc_type('record-view-day-aggregation') assert q.count() == 915 # 61 days * 15 records # Reords index for _, record, _ in records: query = search.index(prefix + '*') \ .query(Ids(values=[str(record.id)])) \ .source(include='_stats') doc = (query.execute()[0]) assert doc['_stats'] == { # 4 view events 'views': 122.0, 'version_views': 488.0, # 4 view events over 2 different hours 'unique_views': 122.0, 'version_unique_views': 122.0, # 4 download events * 3 files 'downloads': 244.0, 'version_downloads': 976.0, # 4 download events * 3 files over 2 different hours 'unique_downloads': 122.0, 'version_unique_downloads': 122.0, # 4 download events * 3 files * 10 bytes 'volume': 2440.0, 'version_volume': 9760.0, }
def test_aclrecordsearch_explicit_user(app, db, es, es_acl_prepare, test_users): current_explicit_acls.prepare(RECORD_SCHEMA) with db.session.begin_nested(): acl1 = DefaultACL(name='test', schemas=[RECORD_SCHEMA], priority=0, operation='get', originator=test_users.u1) actor1 = UserActor(name='auth', acl=acl1, users=[test_users.u1], originator=test_users.u1) db.session.add(acl1) db.session.add(actor1) current_explicit_acls.reindex_acl(acl1, delayed=False) record_uuid = uuid.uuid4() data = {'title': 'blah', 'contributors': [], 'keywords': ['blah']} recid_minter(record_uuid, data) rec = SchemaEnforcingRecord.create(data, id_=record_uuid) RecordIndexer().index(rec) current_search_client.indices.refresh() current_search_client.indices.flush() rs = ACLRecordsSearch(user=test_users.u1, context={ 'system_roles': ['authenticated_user'] }) rec_id = str(rec.id) print(json.dumps(rs.query(Ids(values=[rec_id])).query.to_dict(), indent=4)) assert rs.query(Ids(values=[rec_id])).query.to_dict() == { "bool": { "minimum_should_match": "100%", "filter": [ { "bool": { "should": [ { "nested": { "path": "_invenio_explicit_acls", "_name": "invenio_explicit_acls_match_get", "query": { "bool": { "must": [ { "term": { "_invenio_explicit_acls.operation": "get" } }, { "bool": { "minimum_should_match": 1, "should": [ { "terms": { "_invenio_explicit_acls.role": [ 1 ] } }, { "term": { "_invenio_explicit_acls.user": 1 } }, { "terms": { "_invenio_explicit_acls.role": [ 1 ] } }, { "terms": { "_invenio_explicit_acls.system_role": [ "authenticated_user" ] } }, { "term": { "_invenio_explicit_acls.user": 1 } } ] } } ] } } } } ], "minimum_should_match": 1 } } ], "must": [ { "ids": { "values": [ rec_id ] } } ] } } hits = list(ACLRecordsSearch(user=test_users.u1, context={ 'system_roles': [authenticated_user] }).get_record(rec.id).execute()) assert len(hits) == 1 assert hits[0].meta.id == rec_id print(hits) hits = list(ACLRecordsSearch(user=test_users.u2, context={ 'system_roles': [authenticated_user] }).get_record(rec.id).execute()) assert hits == []
query_info = {'query': query_dict, 'candidates': candidates} jsonl_file.write(json.dumps(query_info) + '\n') # query_text = ["Johnson, 55, was admitted to St. Thomas' Hospital on April 5"] # query_text = ["Addressing the nation via a video conference , Prime Minister Narendra Modi made it clear that the threat could only be combatted with the full cooperation of the people , a token demonstration of which , he said , would be to light candles , lamps and mobile torches for nine minutes on April 5 at 9 pm ."] if __name__ == "__main__": connections.create_connection(hosts=["localhost"], timeout=100, alias="default") # q_match_all = MatchAll() # match all documents # q_basic = Match( # context={"query": "doctor"} # ) # match "D.C" in the title field of the index, using BM25 as default q_match_ids = Ids(values=[170054]) # match ids # es = Elasticsearch([{'host': 'localhost', 'port': '9200', 'timeout': 60}]) # print(es.cat.count("covid_events_index", params={"format": "json"})) # query_vector = encoder.encode(['This is a precautionary step , as the Prime Minister continues to have persistent symptoms of coronavirus ten days after testing positive for the virus , " a Downing Street spokesperson said .']).tolist()[ # 0 # ] # get the embedding for the query text # q_vector = generate_script_score_query( # query_vector, "context_vec" # ) # score documents based on cosine similarity # search("covid_events_index", q_match_ids) # search sorted_trigger_id_mapping = get_queries() get_candidates(sorted_trigger_id_mapping) # e = Event.get(0, index='covid_events_index') # print(type(e.meta.id))