Ejemplo n.º 1
0
def jobs_similar(id):

    out = ''

    es_query = RecordsSearch(index='records-jobs', doc_type='jobs')
    es_query = es_query.query(
        {
            "more_like_this": {
                "docs": [
                    {
                        "_id": id
                    }
                ],
                "min_term_freq": 0,
                "min_doc_freq": 0,
            }
        }
    )[0:2]

    similar_jobs = es_query.execute()

    for job in similar_jobs:
        out = out + (render_template_to_string(
            "inspirehep_theme/similar_jobs.html",
            record=job))

    return out
Ejemplo n.º 2
0
def test_es_preference_param_no_request():
    """Test that the preference param is not added when not in a request."""
    RecordsSearch.__bases__ = (SpySearch, )

    rs = RecordsSearch()
    new_rs = rs.with_preference_param()
    assert new_rs.exposed_params == {}
Ejemplo n.º 3
0
def test_cleanup_indexed_deposits(app, db, es, locations, users,
                                  deposit_metadata, sip_metadata_types):
    with app.test_request_context():
        datastore = app.extensions['security'].datastore
        login_user(datastore.get_user(users[0]['email']))
        id_ = uuid4()
        depid = zenodo_deposit_minter(id_, deposit_metadata)
        ZenodoDeposit.create(deposit_metadata, id_=id_)

    # Emulate a database "failure", which would wipe any models in the session
    db.session.remove()
    current_search.flush_and_refresh(index='deposits')

    # Deposit has been indexed in ES, but not commimted in DB
    assert PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == depid.pid_type,
        PersistentIdentifier.pid_value == depid.pid_value).count() == 0
    assert (RecordsSearch(index='deposits').get_record(id_).execute()
            [0]._deposit.id == depid.pid_value)

    cleanup_indexed_deposits.apply()
    current_search.flush_and_refresh(index='deposits')

    assert PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == depid.pid_type,
        PersistentIdentifier.pid_value == depid.pid_value).count() == 0
    assert len(RecordsSearch(index='deposits').get_record(id_).execute()) == 0
Ejemplo n.º 4
0
def test_es_preference_param(app):
    """Test the preference param is correctly added in a request."""
    BaseRecordsSearch.__bases__ = (SpySearch, )

    with app.test_request_context('/',
                                  headers={'User-Agent': 'Chrome'},
                                  environ_base={'REMOTE_ADDR': '212.54.1.8'}):
        rs = RecordsSearch()
        new_rs = rs.with_preference_param()

        alg = hashlib.md5()
        encoded_user_agent = 'Chrome'.encode('utf8')
        encoded_user_string = '{ip}-{ua}'.format(ip=request.remote_addr,
                                                 ua=encoded_user_agent)
        alg.update(encoded_user_string.encode('utf8'))
        digest = alg.hexdigest()

        assert new_rs.exposed_params == dict(preference=digest)

    # Note: V2 does not require a request context
    BaseRecordsSearchV2.__bases__ = (SpySearch, )

    rs = RecordsSearchV2()
    new_rs = rs.with_preference_param(preference=1234)
    assert new_rs.exposed_params == {'preference': 1234}
Ejemplo n.º 5
0
def cleanup_indexed_deposits():
    """Delete indexed deposits that do not exist in the database.

    .. note:: This task exists because of deposit REST API calls sometimes
        failing after the deposit has already been sent for indexing to ES,
        leaving an inconsistent state of a deposit existing in ES and not in
        the database. It should be removed once a proper signal mechanism has
        been implemented in the ``invenio-records-rest`` and
        ``invenio-deposit`` modules.
    """
    search = RecordsSearch(index='deposits')
    q = (search.query('term', **{
        '_deposit.status': 'draft'
    }).source(['_deposit.id']))
    res = q.scan()
    es_depids_info = [(d.to_dict().get('_deposit.id', [None])[0], d.meta.id,
                       d.meta.index, d.meta.doc_type) for d in res]
    es_depids = {p[0] for p in es_depids_info}
    db_depids_query = PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == 'depid',
        PersistentIdentifier.pid_value.in_(es_depids))
    db_depids = {d.pid_value for d in db_depids_query}
    missing_db_depids = filter(lambda d: d[0] not in db_depids, es_depids_info)

    indexer = RecordIndexer()

    for _, deposit_id, index, doc_type in missing_db_depids:
        indexer.client.delete(id=str(deposit_id),
                              index=index,
                              doc_type=doc_type)
Ejemplo n.º 6
0
def test_elasticsearch_query_min_score(app):
    """Test building a query with min_score."""
    app.config.update(SEARCH_RESULTS_MIN_SCORE=0.1)

    q = RecordsSearch()
    q = q.query(Q('match', title='Higgs'))

    search_dict = q.to_dict()
    assert 'min_score' in search_dict
    assert search_dict['min_score'] == app.config['SEARCH_RESULTS_MIN_SCORE']
Ejemplo n.º 7
0
def perform_es_search(q, index, start=0, size=10, sort=None, fields=None):
    """Helper to use elasticsearch_dsl with Spires/Invenio syntax."""
    query = IQ(q)

    search = RecordsSearch(index=index).query(query)
    if sort:
        search = search.sort(sort)
    if fields and isinstance(fields, list):
        search = search.extra(_source={'include': fields})
    return search[start:start + size].execute()
Ejemplo n.º 8
0
 def _build_query(id_):
     result = RecordsSearch(index='records-jobs', doc_type='jobs')
     return result.query({
         'more_like_this': {
             'docs': [
                 {
                     '_id': id_,
                 },
             ],
             'min_term_freq': 0,
             'min_doc_freq': 0,
         }
     })[0:2]
Ejemplo n.º 9
0
def get_experiment_publications(experiment_name):
    """
    Get paper count for a given experiment.

    :param experiment_name: canonical name of the experiment.
    :type experiment_name: string
    """
    query = {
        "term": {"accelerator_experiments.experiment": experiment_name}
    }
    search = RecordsSearch(index="records-hep").query(query)
    search = search.params(search_type="count")
    return search.execute().hits.total
Ejemplo n.º 10
0
    def get_expired_embargos(cls):
        """Get records for which the embargo period have expired."""
        endpoint = current_app.config['RECORDS_REST_ENDPOINTS']['recid']

        s = RecordsSearch(
            using=current_search_client, index=endpoint['search_index']).query(
                'query_string',
                query='access_right:{0} AND embargo_date:{{* TO {1}}}'.format(
                    cls.EMBARGOED,
                    # Uses timestamp instead of date on purpose.
                    datetime.utcnow().isoformat()),
                allow_leading_wildcard=False).fields([])

        return [hit.meta.id for hit in s.scan()]
Ejemplo n.º 11
0
def directly_list_v2_record_ids():
    size = 100
    page = 1
    while True:
        search = RecordsSearch().params(version=True)
        search = search[(page - 1) * size:page * size]
        search_result = search.execute()
        for record in search_result.hits.hits:
            if record.get('_index') == 'records-records':
                yield record
        if size * page < search_result.hits.total:
            page += 1
        else:
            break
Ejemplo n.º 12
0
def delete_group_relations(group_id):
    """Delete all relations for given group ID from ES."""
    q = RecordsSearch(index='relationships').query('term', Source__ID=group_id)
    # Ignore versioning conflicts when deleting
    q.params(conflicts='proceed').delete()

    q = RecordsSearch(index='relationships').query('term', Target__ID=group_id)
    q.params(conflicts='proceed').delete()
Ejemplo n.º 13
0
def delete(user_id):
    """Delete spam."""
    # Only admin can access this view
    if not Permission(ActionNeed('admin-access')).can():
        abort(403)

    user = User.query.get(user_id)
    deleteform = DeleteSpamForm()
    communities = Community.query.filter_by(id_user=user.id)

    rs = RecordsSearch(index='records').query(
        Q('query_string', query="owners: {0}".format(user.id)))
    rec_count = rs.count()

    ctx = {
        'user': user,
        'form': deleteform,
        'is_new': False,
        'communities': communities,
        'rec_count': rec_count,
    }

    if deleteform.validate_on_submit():

        if deleteform.remove_all_communities.data:
            for c in communities:
                if not c.deleted_at:
                    if not c.description.startswith('--SPAM--'):
                        c.description = '--SPAM--' + c.description
                    if c.oaiset:
                        db.session.delete(c.oaiset)
                    c.delete()
            db.session.commit()
        if deleteform.deactivate_user.data:
            _datastore.deactivate_user(user)
            db.session.commit()
        # delete_record function commits the session internally
        # for each deleted record
        if deleteform.remove_all_records.data:
            for r in rs.scan():
                delete_record(r.meta.id, 'spam', int(current_user.get_id()))

        flash("Spam removed", category='success')
        return redirect(url_for('.delete', user_id=user.id))
    else:
        records = islice(rs.scan(), 10)
        ctx.update(records=records)
        return render_template('zenodo_spam/delete.html', **ctx)
Ejemplo n.º 14
0
def delete(user_id):
    """Delete spam."""
    # Only admin can access this view
    if not Permission(ActionNeed('admin-access')).can():
        abort(403)

    user = User.query.get(user_id)
    deleteform = DeleteSpamForm()
    communities = Community.query.filter_by(id_user=user.id)

    rs = RecordsSearch(index='records').query(
        Q('query_string', query="owners: {0}".format(user.id)))
    rec_count = rs.count()

    ctx = {
        'user': user,
        'form': deleteform,
        'is_new': False,
        'communities': communities,
        'rec_count': rec_count,
    }

    if deleteform.validate_on_submit():

        if deleteform.remove_all_communities.data:
            for c in communities:
                if not c.deleted_at:
                    if not c.description.startswith('--SPAM--'):
                        c.description = '--SPAM--' + c.description
                    if c.oaiset:
                        db.session.delete(c.oaiset)
                    c.delete()
            db.session.commit()
        if deleteform.deactivate_user.data:
            _datastore.deactivate_user(user)
            db.session.commit()
        # delete_record function commits the session internally
        # for each deleted record
        if deleteform.remove_all_records.data:
            for r in rs.scan():
                delete_record(r.meta.id, 'spam', int(current_user.get_id()))

        flash("Spam removed", category='success')
        return redirect(url_for('.delete', user_id=user.id))
    else:
        records = islice(rs.scan(), 10)
        ctx.update(records=records)
        return render_template('zenodo_spam/delete.html', **ctx)
Ejemplo n.º 15
0
def test_deposit_index(db, es):
    """Test update embargoed records."""
    deposit_index_name = 'deposits-records-record-v1.0.0'
    rec1 = Record.create({
        'title': 'One',
        '_deposit': {
            'status': 'published',
            'pid': {
                'type': 'recid',
                'value': '1'
            }
        }
    })
    PersistentIdentifier.create(pid_type='recid',
                                pid_value='1',
                                status=PIDStatus.REGISTERED,
                                object_uuid=rec1.id,
                                object_type='rec')
    Deposit.create({
        '_deposit': {
            'status': 'published',
            'pid': {
                'type': 'recid',
                'value': '1'
            }
        }
    })
    db.session.commit()
    current_search.flush_and_refresh(deposit_index_name)
    res = RecordsSearch(index=deposit_index_name).execute()
    # Make sure the 'title' was indexed from record
    assert res['hits']['hits'][0]['_source']['title'] == 'One'
Ejemplo n.º 16
0
    def loans_of_transaction_library_by_item_location(self, libraries_map,
                                                      library_pid, trigger):
        """Number of circulation operation during the specified timeframe.

        Number of loans of items by location when transaction location
        is equal to any of the library locations
        :param libraries_map: dict - map of library pid and name
        :param library_pid: string - the library to filter with
        :param trigger: string - action name (checkin, checkout)
        :return: the number of matched circulation operation
        :rtype: dict
        """
        location_pids = self._get_locations_pid(library_pid)
        search = RecordsSearch(index=LoanOperationLog.index_name)\
            .filter('range', date=self.date_range)\
            .filter('terms', loan__trigger=trigger)\
            .filter('terms', loan__transaction_location__pid=location_pids)\
            .source('loan').scan()

        stats = {}
        for s in search:
            item_library_pid = s.loan.item.library_pid
            item_library_name = libraries_map[item_library_pid]
            location_name = s.loan.item.holding.location_name

            key = f'{item_library_pid}: {item_library_name} - {location_name}'
            stats.setdefault(key, {
                'location_name': location_name,
                'checkin': 0,
                'checkout': 0
            })
            stats[key][s.loan.trigger] += 1

        return stats
Ejemplo n.º 17
0
def get_institution_experiments_from_es(icn):
    """
    Get experiments from a given institution.

    To avoid killing ElasticSearch the number of experiments is limited.

    :param icn: Institution canonical name.
    :type icn: string
    """
    query = {
        "term": {"affiliation": icn}
    }
    search = RecordsSearch(index="records-experiments").query(query)[:100]
    search = search.sort('-earliest_date')

    return search.execute().hits
Ejemplo n.º 18
0
def check_and_handle_spam(community=None, deposit=None):
    """Checks community/deposit metadata for spam."""
    try:
        if current_app.config.get('ZENODO_SPAM_MODEL_LOCATION'):
            if community:
                task = check_metadata_for_spam.delay(community_id=community.id)
            if deposit:
                task = check_metadata_for_spam.delay(dep_id=str(deposit.id))
            spam_proba = task.get(
                timeout=current_app.config['ZENODO_SPAM_CHECK_TIMEOUT'])
        else:
            spam_proba = 0

        if spam_proba > current_app.config['ZENODO_SPAM_THRESHOLD']:
            if not Permission(ActionNeed('admin-access')).can():
                has_records = RecordsSearch(index='records').query(
                    Q('query_string',
                      query="owners:{}".format(community.id_user))).count()
                has_communities = Community.query.filter_by(
                    id_user=community.id_user).count() - 1

                if not (has_records or has_communities):
                    current_app.config['ZENODO_SPAM_HANDLING_ACTIONS'](
                        community=community, deposit=deposit)
    except HTTPException:
        raise
    except Exception:
        current_app.logger.exception(u'Could not check for spam')
Ejemplo n.º 19
0
def delete_group_relations(group_ids: Iterable[str]):
    """Delete all relations for given group IDs from ES."""
    RecordsSearch(index='relationships').query(
        'bool',
        should=[
            Q('terms', Source__ID=list(group_ids)),
            Q('terms', Target__ID=list(group_ids)),
        ]).params(conflicts='proceed').delete()  # ignore versioning conflicts
Ejemplo n.º 20
0
def dump_operation_logs(outfile_name, year):
    """Dumps operation log records in a given file.

    :param outfile: JSON operation log output file.
    """
    click.secho('Dumps operation log records:', fg='green')
    index_name = OperationLog.index_name
    if year is not None:
        index_name = f'{index_name}-{year}'
    search = RecordsSearch(index=index_name)

    index_count = 0
    outfile = JsonWriter(outfile_name)
    with click.progressbar(search.scan(), length=search.count()) as bar:
        for oplg in bar:
            outfile.write(str(oplg.to_dict()))
            index_count += 1
    click.echo(f'created {index_count} operation logs.')
Ejemplo n.º 21
0
    def get_expired_embargos(cls):
        """Get records for which the embargo period have expired."""
        endpoint = current_app.config['RECORDS_REST_ENDPOINTS']['recid']

        s = RecordsSearch(
            using=current_search_client,
            index=endpoint['search_index']
        ).query(
            'query_string',
            query='access_right:{0} AND embargo_date:{{* TO {1}}}'.format(
                cls.EMBARGOED,
                # Uses timestamp instead of date on purpose.
                datetime.utcnow().isoformat()
            ),
            allow_leading_wildcard=False
        ).fields([])

        return [hit.meta.id for hit in s.scan()]
Ejemplo n.º 22
0
def check_and_handle_spam(community=None, deposit=None, retry=True):
    """Checks community/deposit metadata for spam."""
    try:
        if current_app.config.get('ZENODO_SPAM_MODEL_LOCATION'):
            if community:
                task = check_metadata_for_spam.delay(community_id=community.id)
                user_id = community.id_user
            if deposit:
                task = check_metadata_for_spam.delay(dep_id=str(deposit.id))
                user_id = deposit['owners'][0]
            spam_proba = task.get(
                timeout=current_app.config['ZENODO_SPAM_CHECK_TIMEOUT'])
        else:
            spam_proba = 0
        if spam_proba > current_app.config['ZENODO_SPAM_THRESHOLD']:
            if not Permission(ActionNeed('admin-access')).can():
                user_records = RecordsSearch(index='records').query(
                    Q('query_string',
                      query="owners:{}".format(user_id))).count()
                user_communities = Community.query.filter_by(
                    id_user=user_id).count()
                if community:
                    # Ignore the newly created community
                    user_communities = user_communities - 1
                current_app.logger.warning(
                    u'Found spam upload',
                    extra={
                        'depid': deposit.id if deposit else None,
                        'comid': community.id if community else None
                    })
                if not (user_records + user_communities >
                        current_app.config['ZENODO_SPAM_SKIP_CHECK_NUM']):
                    current_app.config['ZENODO_SPAM_HANDLING_ACTIONS'](
                        community=community, deposit=deposit)
    except HTTPException:
        raise
    except TimeoutError:
        if retry:
            check_and_handle_spam(community=community,
                                  deposit=deposit,
                                  retry=False)
        else:
            current_app.logger.exception(
                u'Could not check for spam',
                extra={
                    'depid': deposit.id if deposit else None,
                    'comid': community.id if community else None
                })
    except Exception:
        current_app.logger.exception(u'Could not check for spam',
                                     extra={
                                         'depid':
                                         deposit.id if deposit else None,
                                         'comid':
                                         community.id if community else None
                                     })
Ejemplo n.º 23
0
def test_filter_by_patron(app, patron_pid, qs, should_raise):
    """Test the function filter_by_patron."""
    search = RecordsSearch()
    if should_raise:
        with pytest.raises(UnauthorizedSearchError):
            _filter_by_patron(patron_pid, search, qs)
    else:
        _search, _qs = _filter_by_patron(patron_pid, search, qs)
        term = _search.to_dict()["query"]["bool"]["filter"][0]["term"]
        assert term == {"patron_pid": patron_pid}
Ejemplo n.º 24
0
def test():
    # This function renders the test.html page

    # The HTML file is a jinja template. Its content is generated dynamically by 
    # creating sections using the passed records. The records that will be displayed are 
    # retrieved with the query made by the "RecordsSearch" class
    return render_template(
        "gkhext/test.html",
        invenio_records=RecordsSearch().sort("-created").execute()
    )
Ejemplo n.º 25
0
def test_prefix_index_from_kwargs(app):
    """Test that index is prefixed when pass it through kwargs."""
    prefix_value = 'myprefix-'
    index_value = 'myindex'
    app.config['SEARCH_INDEX_PREFIX'] = prefix_value

    prefixed_index = ['{}{}'.format(prefix_value, index_value)]
    q = RecordsSearch(index=index_value)
    _test_original_index_is_stored_when_prefixing(q, prefixed_index,
                                                  [index_value])
Ejemplo n.º 26
0
def suggest():
    """Power typeahead.js search bar suggestions."""
    field = request.values.get('field')
    query = request.values.get('query')

    search = RecordsSearch(index='records-hep', doc_type='hep')
    search = search.suggest(
        'suggestions', query, completion={"field": field}
    )
    suggestions = search.execute_suggest()

    if field == "authors.name_suggest":
        bai_name_map = {}
        for suggestion in suggestions['suggestions'][0]['options']:
            bai = suggestion['payload']['bai']
            if bai in bai_name_map:
                bai_name_map[bai].append(
                    suggestion['text']
                )
            else:
                bai_name_map[bai] = [suggestion['text']]

        result = []
        for key, value in six.iteritems(bai_name_map):
            result.append(
                {
                    'name': max(value, key=len),
                    'value': key,
                    'template': 'author'
                }
            )

        return jsonify({
            'results': result
        })

    return jsonify({
        'results': [
            {'value': s['text']}
            for s in suggestions['suggestions'][0]['options']
        ]
    })
Ejemplo n.º 27
0
 def spam_check(self):
     """Checks deposit metadata for spam content."""
     try:
         if current_app.config.get('ZENODO_SPAM_MODEL_LOCATION'):
             task = check_metadata_for_spam.delay(str(self.id))
             spam_proba = task.get(
                 timeout=current_app.config['ZENODO_SPAM_CHECK_TIMEOUT'])
         else:
             spam_proba = 0
         if spam_proba > current_app.config['ZENODO_SPAM_THRESHOLD']:
             if not Permission(ActionNeed('admin-access')).can():
                 rs = RecordsSearch(index='records').query(
                     Q('query_string',
                       query="owners:{}".format(self['owners'][0])))
                 if not rs.count():
                     current_app.config['ZENODO_SPAM_HANDLING_ACTIONS'](
                         self)
     except HTTPException:
         raise
     except Exception:
         current_app.logger.exception(u'Could not check deposit for spam')
Ejemplo n.º 28
0
def get_record_stats(recordid, throws=True):
    """Fetch record statistics from Elasticsearch."""
    try:
        res = (
            RecordsSearch().source(
                include='_stats')  # only include "_stats" field
            .get_record(recordid).execute())
        return res[0]._stats.to_dict() if res else None
    except Exception:
        if throws:
            raise
        pass
Ejemplo n.º 29
0
def test_prefix_index_list(app):
    """Test that index is prefixed when pass it through kwargs."""
    prefix_value = 'myprefix-'
    index_value = ['myindex', 'myanotherindex']
    app.config['SEARCH_INDEX_PREFIX'] = prefix_value

    prefixed_index = [
        '{}{}'.format(prefix_value, _index) for _index in index_value
    ]

    q = RecordsSearch(index=index_value)
    _test_original_index_is_stored_when_prefixing(q, prefixed_index,
                                                  index_value)
Ejemplo n.º 30
0
    def number_of_circ_operations(self, library_pid, trigger):
        """Number of circulation operation  during the specified timeframe.

        :param library_pid: string - the library to filter with
        :param trigger: string - action name
        :return: the number of matched circulation operation
        :rtype: integer
        """
        return RecordsSearch(index=LoanOperationLog.index_name)\
            .filter('range', date=self.date_range)\
            .filter('term', loan__trigger=trigger)\
            .filter('term', loan__item__library_pid=library_pid)\
            .count()
Ejemplo n.º 31
0
    def new_documents(self, library_pid):
        """Number of new documents per library for given time interval.

        :param library_pid: string - the library to filter with
        :return: the number of matched documents
        :rtype: integer
        """
        return RecordsSearch(index=LoanOperationLog.index_name)\
            .filter('range', date=self.date_range)\
            .filter('term', operation='create')\
            .filter('term', record__type='doc')\
            .filter('term', library__value=library_pid)\
            .count()
Ejemplo n.º 32
0
    def number_of_deleted_items(self, library_pid):
        """Number of deleted items during the specified timeframe.

        :param library_pid: string - the library to filter with
        :return: the number of matched deleted items
        :rtype: integer
        """
        return RecordsSearch(index=LoanOperationLog.index_name)\
            .filter('range', date=self.date_range)\
            .filter('term', operation='delete')\
            .filter('term', record__type='item')\
            .filter('term', library__pid=library_pid)\
            .count()
Ejemplo n.º 33
0
def test_large_stats(app, db, es, locations, event_queues, minimal_record):
    """Test record page view event import."""
    search = Search(using=es)
    records = create_stats_fixtures(
        # (3 * 4) -> 12 records and (3 * 4 * 2) -> 24 files
        metadata=minimal_record,
        n_records=3,
        n_versions=4,
        n_files=2,
        event_data={'user_id': '1'},
        # (31 + 30) * 2 -> 122 event timestamps (61 days and 2 events/day)
        start_date=datetime(2018, 3, 1),
        end_date=datetime(2018, 5, 1),
        interval=timedelta(hours=12))

    # Events indices
    # 4 versions * 3 records * 2 files * 122 events -> 2928
    assert search.index('events-stats-file-download').count() == 2928
    # 4 versions * 3 records * 122 events -> 1464
    assert search.index('events-stats-record-view').count() == 1464

    # Aggregations indices
    # (4 versions + 1 concept) * 3 records -> 15 documents + 2 bookmarks
    q = search.index('stats-file-download')
    q = q.doc_type('file-download-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records
    q = search.index('stats-record-view')
    q = q.doc_type('record-view-day-aggregation')
    assert q.count() == 915  # 61 days * 15 records

    # Reords index
    for _, record, _ in records:
        doc = (RecordsSearch().get_record(
            record.id).source(include='_stats').execute()[0])
        assert doc['_stats'] == {
            # 4 view events
            'views': 122.0,
            'version_views': 488.0,
            # 4 view events over 2 different hours
            'unique_views': 122.0,
            'version_unique_views': 122.0,
            # 4 download events * 3 files
            'downloads': 244.0,
            'version_downloads': 976.0,
            # 4 download events * 3 files over 2 different hours
            'unique_downloads': 122.0,
            'version_unique_downloads': 122.0,
            # 4 download events * 3 files * 10 bytes
            'volume': 2440.0,
            'version_volume': 9760.0,
        }
Ejemplo n.º 34
0
    def renewals(self, library_pid, trigger):
        """Number of items with loan extended.

        Number of items with loan extended per library for given time interval
        :param library_pid: string - the library to filter with
        :param trigger: string - action name extend
        :return: the number of matched documents
        :rtype: integer
        """
        return RecordsSearch(index=LoanOperationLog.index_name)\
            .filter('range', date=self.date_range)\
            .filter('terms', loan__trigger=trigger)\
            .filter('term', loan__item__library_pid=library_pid)\
            .count()
Ejemplo n.º 35
0
    def checkouts_for_owning_library(self, library_pid, trigger):
        """Number of circulation operation during the specified timeframe.

        Number of loans of items per library when the item is owned by
        the library
        :param library_pid: string - the library to filter with
        :param trigger: string - action name (checkout)
        :return: the number of matched circulation operation
        :rtype: integer
        """
        return RecordsSearch(index=LoanOperationLog.index_name)\
            .filter('range', date=self.date_range)\
            .filter('terms', loan__trigger=trigger)\
            .filter('term', loan__item__library_pid=library_pid)\
            .count()
Ejemplo n.º 36
0
def test_prefix_multi_index_string(app):
    """Test that index is prefixed when pass it through kwargs."""
    prefix_value = 'myprefix-'
    index_value = 'myindex,myanotherindex'
    app.config['SEARCH_INDEX_PREFIX'] = prefix_value

    prefixed_index = [
        ','.join([
            '{}{}'.format(prefix_value, _index)
            for _index in index_value.split(',')
        ])
    ]
    q = RecordsSearch(index=index_value)
    _test_original_index_is_stored_when_prefixing(q, prefixed_index,
                                                  [index_value])
Ejemplo n.º 37
0
def assert_es_equals_db():
    """Assert that the relationships in ES the GroupRelationships in DB.

    NOTE: This tests takes the state of the DB as the reference for comparison.
    """
    # Wait for ES to be available
    current_search.flush_and_refresh('relationships')

    # Fetch all DB objects and all ES objects
    es_q = list(RecordsSearch(index='relationships').query().scan())
    db_q = GroupRelationship.query.all()

    # normalize and compare two sets
    es_norm_q = list(map(normalize_es_result, es_q))
    db_norm_q = list(map(normalize_db_result, db_q))
    assert set(es_norm_q) == set(db_norm_q)
Ejemplo n.º 38
0
    def validated_requests(self, library_pid, trigger):
        """Number of validated requests.

        Number of validated requests per library for given time interval
        Match is done on the library of the librarian.
        Note: trigger is 'validate' and not 'validate_request'
        :param library_pid: string - the library to filter with
        :param trigger: string - action name validate
        :return: the number of matched documents
        :rtype: integer
        """
        return RecordsSearch(index=LoanOperationLog.index_name)\
            .filter('range', date=self.date_range)\
            .filter('terms', loan__trigger=trigger)\
            .filter('term', library__value=library_pid)\
            .count()
Ejemplo n.º 39
0
def test_empty_query(app):
    """Test building an empty query."""
    with app.app_context():
        q = RecordsSearch()
        assert q.to_dict()['query'] == {'match_all': {}}

        q = RecordsSearch.faceted_search('')
        assert q._s.to_dict()['query'] == {'match_all': {}}

        q = RecordsSearch()[10]
        assert q.to_dict()['from'] == 10
        assert q.to_dict()['size'] == 1

        q = q[10:20]
        assert q.to_dict()['from'] == 10
        assert q.to_dict()['size'] == 10

        q = q.sort({'field1': {'order': 'asc'}})
        assert q.to_dict()['sort'][0] == {'field1': {'order': 'asc'}}

        q = q.sort()
        assert 'sort' not in q.to_dict()

        q = q.sort('-field1')
        assert q.to_dict()['sort'][0] == {'field1': {'order': 'desc'}}

        q = q.sort('field2', {'field3': {'order': 'asc'}})
        assert q.to_dict()['sort'][0] == 'field2'
        assert q.to_dict()['sort'][1] == {'field3': {'order': 'asc'}}
        q.sort()

        q = RecordsSearch()
        q = q.highlight('field1', index_options='offsets')
        assert len(q.to_dict()['highlight']['fields']) == 1
        assert q.to_dict()['highlight']['fields']['field1'] == {
            'index_options': 'offsets'
        }

        q = q.highlight('field2')
        assert len(q.to_dict()['highlight']['fields']) == 2
        assert q.to_dict()['highlight']['fields']['field1'] == {
            'index_options': 'offsets'
        }
        assert q.to_dict()['highlight']['fields']['field2'] == {}

        q = q.highlight()
        assert 'highligth' not in q.to_dict()