Beispiel #1
0
    def get_records(self, ids):
        """Return records by their identifiers.

        :param ids: A list of record identifier.
        :returns: A list of records.
        """
        return self.query(Ids(values=[str(id_) for id_ in ids]))
Beispiel #2
0
    def get_record(self, id_):
        """Return a record by its identifier.

        :param id_: The record identifier.
        :returns: The record.
        """
        return self.query(Ids(values=[str(id_)]))
Beispiel #3
0
def test_basic_stats(app, db, es, locations, event_queues, minimal_record):
    """Test basic statistics results."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (10 * 2) -> 20 records and (10 * 2 * 3) -> 60 files
        metadata=minimal_record, n_records=10, n_versions=2, n_files=3,
        event_data={'user_id': '1'},
        # 4 event timestamps
        start_date=datetime(2018, 1, 1, 13),
        end_date=datetime(2018, 1, 1, 15),
        interval=timedelta(minutes=30))

    # Events indices
    prefix = app.config['SEARCH_INDEX_PREFIX']

    # 2 versions * 10 records * 3 files * 4 events -> 240
    assert search.index(prefix + 'events-stats-file-download').count() == 240
    # 2 versions * 10 records * 4 events -> 80
    assert search.index(prefix + 'events-stats-record-view').count() == 80

    # Aggregations indices
    # (2 versions + 1 concept) * 10 records -> 30 documents + 2 bookmarks

    # 30d
    assert search.index(prefix + 'stats-file-download').count() == 30

    # 30d
    assert search.index(prefix + 'stats-record-view').count() == 30

    # 2bm + 2bm
    assert search.index(prefix + 'stats-bookmarks').count() == 4

    # Records index
    for _, record, _ in records:
        query = search.index(prefix + '*') \
            .query(Ids(values=[str(record.id)])) \
            .source(include='_stats')

        doc = (query.execute()[0])
        assert doc['_stats'] == {
            # 4 view events
            'views': 4.0, 'version_views': 8.0,
            # 4 view events over 2 different hours
            'unique_views': 2.0, 'version_unique_views': 2.0,
            # 4 download events * 3 files
            'downloads': 12.0, 'version_downloads': 24.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 2.0, 'version_unique_downloads': 2.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 120.0, 'version_volume': 240.0,
        }
Beispiel #4
0
    def lookup(self, ad_id, field):
        """
        Get data from ad_id

        :param ad_id: str
            String to be queried
        """
        if not isinstance(ad_id, list):
            ad_id = [ad_id]

        results = self.elastic.query(Ids(values=ad_id)).execute()

        return set(flatten([
            hits['_source'][field] for hits in results.hits.hits
                if field in hits['_source']
        ]))
Beispiel #5
0
def test_large_stats(app, db, es, locations, event_queues, minimal_record):
    """Test record page view event import."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files
        metadata=minimal_record, n_records=3, n_versions=4, n_files=2,
        event_data={'user_id': '1'},
        # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day)
        start_date=datetime(2018, 3, 1),
        end_date=datetime(2018, 5, 1),
        interval=timedelta(hours=12))

    # Events indices
    prefix = app.config['SEARCH_INDEX_PREFIX']

    # 4 versions * 3 records * 2 files * 122 events -> 2928
    assert search.index(prefix + 'events-stats-file-download').count() == 2928
    # 4 versions * 3 records * 122 events -> 1464
    assert search.index(prefix + 'events-stats-record-view').count() == 1464

    # Aggregations indices
    # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks
    q = search.index(prefix + 'stats-file-download')
    q = q.doc_type('file-download-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records
    q = search.index(prefix + 'stats-record-view')
    q = q.doc_type('record-view-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records

    # Reords index
    for _, record, _ in records:
        query = search.index(prefix + '*') \
            .query(Ids(values=[str(record.id)])) \
            .source(include='_stats')
        doc = (query.execute()[0])
        assert doc['_stats'] == {
            # 4 view events
            'views': 122.0, 'version_views': 488.0,
            # 4 view events over 2 different hours
            'unique_views': 122.0, 'version_unique_views': 122.0,
            # 4 download events * 3 files
            'downloads': 244.0, 'version_downloads': 976.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 122.0, 'version_unique_downloads': 122.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 2440.0, 'version_volume': 9760.0,
        }
Beispiel #6
0
def test_aclrecordsearch_explicit_user(app, db, es, es_acl_prepare, test_users):
    current_explicit_acls.prepare(RECORD_SCHEMA)

    with db.session.begin_nested():
        acl1 = DefaultACL(name='test', schemas=[RECORD_SCHEMA],
                          priority=0, operation='get', originator=test_users.u1)
        actor1 = UserActor(name='auth', acl=acl1, users=[test_users.u1], originator=test_users.u1)
        db.session.add(acl1)
        db.session.add(actor1)

    current_explicit_acls.reindex_acl(acl1, delayed=False)

    record_uuid = uuid.uuid4()
    data = {'title': 'blah', 'contributors': [], 'keywords': ['blah']}
    recid_minter(record_uuid, data)
    rec = SchemaEnforcingRecord.create(data, id_=record_uuid)
    RecordIndexer().index(rec)

    current_search_client.indices.refresh()
    current_search_client.indices.flush()

    rs = ACLRecordsSearch(user=test_users.u1, context={
        'system_roles': ['authenticated_user']
    })
    rec_id = str(rec.id)
    print(json.dumps(rs.query(Ids(values=[rec_id])).query.to_dict(), indent=4))
    assert rs.query(Ids(values=[rec_id])).query.to_dict() == {
        "bool": {
            "minimum_should_match": "100%",
            "filter": [
                {
                    "bool": {
                        "should": [
                            {
                                "nested": {
                                    "path": "_invenio_explicit_acls",
                                    "_name": "invenio_explicit_acls_match_get",
                                    "query": {
                                        "bool": {
                                            "must": [
                                                {
                                                    "term": {
                                                        "_invenio_explicit_acls.operation": "get"
                                                    }
                                                },
                                                {
                                                    "bool": {
                                                        "minimum_should_match": 1,
                                                        "should": [
                                                            {
                                                                "terms": {
                                                                    "_invenio_explicit_acls.role": [
                                                                        1
                                                                    ]
                                                                }
                                                            },
                                                            {
                                                                "term": {
                                                                    "_invenio_explicit_acls.user": 1
                                                                }
                                                            },
                                                            {
                                                                "terms": {
                                                                    "_invenio_explicit_acls.role": [
                                                                        1
                                                                    ]
                                                                }
                                                            },
                                                            {
                                                                "terms": {
                                                                    "_invenio_explicit_acls.system_role": [
                                                                        "authenticated_user"
                                                                    ]
                                                                }
                                                            },
                                                            {
                                                                "term": {
                                                                    "_invenio_explicit_acls.user": 1
                                                                }
                                                            }
                                                        ]
                                                    }
                                                }
                                            ]
                                        }
                                    }
                                }
                            }
                        ],
                        "minimum_should_match": 1
                    }
                }
            ],
            "must": [
                {
                    "ids": {
                        "values": [
                            rec_id
                        ]
                    }
                }
            ]
        }
    }

    hits = list(ACLRecordsSearch(user=test_users.u1, context={
        'system_roles': [authenticated_user]
    }).get_record(rec.id).execute())

    assert len(hits) == 1
    assert hits[0].meta.id == rec_id
    print(hits)

    hits = list(ACLRecordsSearch(user=test_users.u2, context={
        'system_roles': [authenticated_user]
    }).get_record(rec.id).execute())
    assert hits == []
            query_info = {'query': query_dict, 'candidates': candidates}
            jsonl_file.write(json.dumps(query_info) + '\n')

    # query_text = ["Johnson, 55, was admitted to St. Thomas' Hospital on April 5"]
    # query_text = ["Addressing the nation via a video conference , Prime Minister Narendra Modi made it clear that the threat could only be combatted with the full cooperation of the people , a token demonstration of which , he said , would be to light candles , lamps and mobile torches for nine minutes on April 5 at 9 pm ."]


if __name__ == "__main__":
    connections.create_connection(hosts=["localhost"],
                                  timeout=100,
                                  alias="default")
    # q_match_all = MatchAll()  # match all documents
    # q_basic = Match(
    #     context={"query": "doctor"}
    # )  # match "D.C" in the title field of the index, using BM25 as default
    q_match_ids = Ids(values=[170054])  # match ids
    # es = Elasticsearch([{'host': 'localhost', 'port': '9200', 'timeout': 60}])
    # print(es.cat.count("covid_events_index", params={"format": "json"}))
    # query_vector = encoder.encode(['This is a precautionary step , as the Prime Minister continues to have persistent symptoms of coronavirus ten days after testing positive for the virus , " a Downing Street spokesperson said .']).tolist()[
    #     0
    # ]  # get the embedding for the query text
    # q_vector = generate_script_score_query(
    #     query_vector, "context_vec"
    # )  # score documents based on cosine similarity
    # search("covid_events_index", q_match_ids)  # search

    sorted_trigger_id_mapping = get_queries()
    get_candidates(sorted_trigger_id_mapping)
    # e = Event.get(0, index='covid_events_index')
    # print(type(e.meta.id))