def execute_query(scopus_queries):
    eids = []
    for search_string in scopus_queries.search_strings:
        search = scopus.ScopusSearch(search_string, refresh=True)
        print(search)
        eids = eids + search.get_eids()

    # convert to set in order to remove duplicates
    eids = set(eids)
    return eids
Esempio n. 2
0
def retrieve_publications_sample(project_id, query_id):
    session_id = request.args.get('session')
    sample_size = int(request.args.get('sample_size'))
    if sample_size is None:
        sample_size = 100
    if session_id is None:
        session_id = 'default_session_'
    try:
        random_sample_eids = eids_service.load_eid_list(project_id, session_id)
    except:
        random_sample_eids = generate_sample_publication_list(
            project_id, sample_size, session_id)
    search_string = utils.generate_scopus_search_from_eid_list(
        random_sample_eids)
    search = scopus.ScopusSearch(search_string,
                                 refresh=True,
                                 project_id=project_id)
    sample_publications_json = json.dumps(search.results, cls=PropertyEncoder)
    return Response(sample_publications_json,
                    status=200,
                    mimetype='application/json')
Esempio n. 3
0
def prepare_fig(w=1, h=None):
    if h is None: h = w
    figsize = (6 * w, 3 * h)
    sns.set(rc={'figure.figsize': figsize})
    fig = plt.figure(figsize=figsize)
    plt.clf()
    return fig


def top_k(mapping, k=10):
    return sorted(mapping.keys(), key=lambda x: mapping[x])[::-1][:k]


pd.set_option('display.max_rows', 250)

eids = scopus.ScopusSearch(query).get_eids()
random.seed(0)
random.shuffle(eids)

bar = widgets.IntProgress(min=0, max=len(eids), description='Loading')
display(bar)

print('query: {} ({} results)'.format(query, len(eids)))

papers = []
for eid in eids:
    papers.append(scopus.AbstractRetrieval(eid, view='FULL'))
    bar.value += 1
    bar.description = str(bar.value)

print('scopus returned {} results'.format(len(papers)))
def query_execution(project_id):
    """
    executes the defined and saved query in scopus
    :param project_id: the ID of the current project
    :return: 'finished' with a status of 204 when the query was executed successfully
    """
    app.logger.info('project {}: running queries'.format(project_id))
    # reads the saved Scopus search string from disk
    scopus_queries = query_service.load_scopus_queries(project_id)

    # retrieve the project from disk, set the booleans and save the project
    project = project_service.load_project(project_id)
    project.isEidsCollected = False
    project.isEidsCollecting = True
    project_service.save_project(project)

    # prepares the status file
    status = Status("EIDS_COLLECTING")
    status_service.save_status(project_id, status)

    # prepare EIDs list
    eids = []

    for index, search_strings in enumerate(scopus_queries.search_strings):
        individual_eids = []
        for search_string in search_strings:
            app.logger.info('project {}: executing search {} - {}'.format(
                project_id, index, search_string))
            search = scopus.ScopusSearch(search_string,
                                         refresh=True,
                                         field='eid',
                                         view='STANDARD')
            if search.results is not None:
                app.logger.info(
                    'project {}: result search {} - {} entries found'.format(
                        project_id, index, len(search.results)))
                for result in search.results:
                    # add EID if it is not already in the list (from a former search)
                    eids.append(result.eid)
                    individual_eids.append(result.eid)
        eids_service.save_eid_list(
            project_id=project_id,
            eids=set(individual_eids),
            prefix=(str(scopus_queries.search_ids[index]) + '_'))

    # convert to set in order to remove duplicates
    eids = set(eids)

    # print the results to the command line for logging
    app.logger.info('project {}: found {} eids in Scopus'.format(
        project_id, len(eids)))

    # persist EIDs to file
    eids_service.save_eid_list(project_id=project_id, eids=eids)

    # set the total number of results to the relevance_measures measure save it to disk
    relevance_measure = RelevanceMeasure(
        number_of_search_results=eids.__len__())
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)

    # set the total number of results to the status save it to disk
    status.total = relevance_measure.number_of_search_results
    status_service.save_status(project_id, status)

    # set the status and save it to disk
    status = Status("EIDS_COLLECTED")
    status_service.save_status(project_id, status)

    # set the project boolean and save the project
    project.isEidslist = True
    project.isEidsCollected = True
    project.isEidsCollecting = False
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)