def jobs_similar(id): out = '' es_query = RecordsSearch(index='records-jobs', doc_type='jobs') es_query = es_query.query( { "more_like_this": { "docs": [ { "_id": id } ], "min_term_freq": 0, "min_doc_freq": 0, } } )[0:2] similar_jobs = es_query.execute() for job in similar_jobs: out = out + (render_template_to_string( "inspirehep_theme/similar_jobs.html", record=job)) return out
def get_experiment_publications(experiment_name): """ Get paper count for a given experiment. :param experiment_name: canonical name of the experiment. :type experiment_name: string """ query = { "term": {"accelerator_experiments.experiment": experiment_name} } search = RecordsSearch(index="records-hep").query(query) search = search.params(search_type="count") return search.execute().hits.total
def directly_list_v2_record_ids(): size = 100 page = 1 while True: search = RecordsSearch().params(version=True) search = search[(page - 1) * size:page * size] search_result = search.execute() for record in search_result.hits.hits: if record.get('_index') == 'records-records': yield record if size * page < search_result.hits.total: page += 1 else: break
def get_institution_experiments_from_es(icn): """ Get experiments from a given institution. To avoid killing ElasticSearch the number of experiments is limited. :param icn: Institution canonical name. :type icn: string """ query = { "term": {"affiliation": icn} } search = RecordsSearch(index="records-experiments").query(query)[:100] search = search.sort('-earliest_date') return search.execute().hits