def jobs_similar(id): out = '' es_query = RecordsSearch(index='records-jobs', doc_type='jobs') es_query = es_query.query( { "more_like_this": { "docs": [ { "_id": id } ], "min_term_freq": 0, "min_doc_freq": 0, } } )[0:2] similar_jobs = es_query.execute() for job in similar_jobs: out = out + (render_template_to_string( "inspirehep_theme/similar_jobs.html", record=job)) return out
def cleanup_indexed_deposits(): """Delete indexed deposits that do not exist in the database. .. note:: This task exists because of deposit REST API calls sometimes failing after the deposit has already been sent for indexing to ES, leaving an inconsistent state of a deposit existing in ES and not in the database. It should be removed once a proper signal mechanism has been implemented in the ``invenio-records-rest`` and ``invenio-deposit`` modules. """ search = RecordsSearch(index='deposits') q = (search.query('term', **{ '_deposit.status': 'draft' }).source(['_deposit.id'])) res = q.scan() es_depids_info = [(d.to_dict().get('_deposit.id', [None])[0], d.meta.id, d.meta.index, d.meta.doc_type) for d in res] es_depids = {p[0] for p in es_depids_info} db_depids_query = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'depid', PersistentIdentifier.pid_value.in_(es_depids)) db_depids = {d.pid_value for d in db_depids_query} missing_db_depids = filter(lambda d: d[0] not in db_depids, es_depids_info) indexer = RecordIndexer() for _, deposit_id, index, doc_type in missing_db_depids: indexer.client.delete(id=str(deposit_id), index=index, doc_type=doc_type)
def test_elasticsearch_query_min_score(app): """Test building a query with min_score.""" app.config.update(SEARCH_RESULTS_MIN_SCORE=0.1) q = RecordsSearch() q = q.query(Q('match', title='Higgs')) search_dict = q.to_dict() assert 'min_score' in search_dict assert search_dict['min_score'] == app.config['SEARCH_RESULTS_MIN_SCORE']
def _build_query(id_): result = RecordsSearch(index='records-jobs', doc_type='jobs') return result.query({ 'more_like_this': { 'docs': [ { '_id': id_, }, ], 'min_term_freq': 0, 'min_doc_freq': 0, } })[0:2]