コード例 #1
0
def execute_query(project_id, query_id):
    """
    executes the defined and saved query in scopus
    :param project_id: the ID of the current project
    :return: 'finished' with a status of 204 when the query was executed successfully
    """
    app.logger.info('project {}: running query {}'.format(
        project_id, query_id))
    # reads the saved Scopus search string from disk
    scopus_queries = query_service.load_scopus_queries(project_id, query_id)

    # retrieve the project from disk, set the booleans and save the project
    project = project_service.load_project(project_id)
    project.isEidsCollected = False
    project.isEidsCollecting = True
    project_service.save_project(project)

    eids = scopus_service.execute_query(scopus_queries)

    # print the results to the command line for logging
    app.logger.info('project {}: found {} entries in Scopus'.format(
        project_id, len(eids)))

    # persist EIDs to file
    identifier_service.save_id_list(project_id=project_id,
                                    query_id=query_id,
                                    identifiers=eids)

    # set the project boolean and save the project
    project.isEidslist = True
    project.isEidsCollected = True
    project.isEidsCollecting = False
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)
コード例 #2
0
def count_keywords(project_id):
    """
    returns a list of abstracts contained in the project
    :param project_id: the ID of the current project
    :return: a JSON formatted list of the abstracts
    """
    project = project_service.load_project(project_id)
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    out_dir = location + '/out/' + project['project_id'] + '/'
    result = es.search(
        index=project['project_id'],
        doc_type='all_data',
        filter_path=[
            "hits.hits._source.scopus_abtract_retrieval.abstract",
            "hits.hits._id"
        ],
        request_timeout=600)
    keyword_list = []
    for hit in result["hits"]["hits"]:
        keyword_list.append(
            AbstractText(
                hit['_id'],
                hit["_source"]["scopus_abtract_retrieval"]["abstract"]))
    with open(out_dir + 'abstracts.json', 'w') as json_file:
        json_file.write(json.dumps([ob.__dict__ for ob in keyword_list]))
        json_file.close()
    return Response({"status": "FINISHED"}, status=204)
コード例 #3
0
def set_survey_id(project_id):
    survey_id = request.form['survey_id']
    project = project_service.load_project(project_id)
    project.survey_id = survey_id
    project_service.save_project(project)
    app.logger.info('project {}: connecting with survey {}'.format(
        project_id, survey_id))
    return Response('survey ID saved', status=204)
コード例 #4
0
def upload_sample_judgement_file(project_id):
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    print("saving sample test file for " + project_id)
    project = project_service.load_project(project_id)
    file = request.files['sample-judgement-file']
    path_to_save = location + '/out/' + project_id + '/'
    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)
    file.save(path_to_save + 'sample_judgement_eids_list.csv')
    project.isSampledata = True
    project_service.save_project(project)
    return Response('list saved', status=204)
コード例 #5
0
def upload_test_file(project_id):
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    print("saving test file for " + project_id)
    project = project_service.load_project(project_id)
    file = request.files['test-file']
    path_to_save = location + '/out/' + project_id + '/'
    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)
    file.save(path_to_save + 'test_eids_list.txt')
    project['isTestdata'] = True
    project_service.save_project(project)
    return Response('list saved', status=204)
コード例 #6
0
def collect_survey_results_data(project_id):
    """
    queries the SurveyGizmo API to collect the data for a given survey
    :param project_id: the ID of the current project
    :return: a JSON formatted list of SurveyGizmo survey results
    """
    project = project_service.load_project(project_id)
    survey = survey_service.collect_survey_data(project)
    survey_service.save_survey(survey)
    elasticsearch_service.delete_survey_index(survey)
    elasticsearch_service.save_survey(survey)
    app.logger.info('project {}: collecting survey data'.format(project_id))
    return Response(json.dumps(survey.survey_results, cls=HiddenEncoder),
                    status=200)
コード例 #7
0
def get_project(project_id):
    """
    loads a project by the project ID
    :param project_id: the ID of the project to be loaded
    :return: the project associated with that ID
    """
    try:
        project = project_service.load_project(project_id)
        app.logger.info('project {}: loaded'.format(project_id))
        return json.dumps(project, default=lambda o: o.__getstate__())
    except FileNotFoundError:
        app.logger.warn(
            'project {}: could not load project'.format(project_id))
        return Response("File not found", status=404)
コード例 #8
0
def upload_xml_file(project_id):
    """
    retrieves the query xml file from the request and saves it to disc
    :param project_id: the ID of the current project
    :return: returns a status of 204 when the file could be saved
    """
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    app.logger.info("project {}: saving uploaded xml file".format(project_id))
    if request.method == 'POST':
        project = project_service.load_project(project_id)
        file = request.files['query_xml']
        path_to_save = location + '/out/' + project_id + '/'
        if not os.path.exists(path_to_save):
            os.makedirs(path_to_save)
        file.save(path_to_save + 'query.xml')
        project.isQueryDefined = True
        project_service.save_project(project)
    return Response("OK", status=204)
コード例 #9
0
def save_query_as_xml(project_id):
    """
    saves the query as xml document in the working directory as query.json file. Creates a scopus search string and
    saves it as scopus_search_string.txt. Sets project.isQueryDefined = True
    :param project_id: the ID of the current project
    :return: the saved query
    """
    project = project_service.load_project(project_id)
    query_json = request.get_json(silent=True)
    query = query_service.from_json(query_json)
    try:
        query_service.save_query_to_xml(project_id, query)
        app.logger.info(
            'project {}: successfully saved query to xml'.format(project_id))
    except IOError:
        app.logger.warn(
            'project {}: could not save query to xml'.format(project_id))
        return Response("could not save query", status=500)
    query_service.create_scopus_queries(project_id, query)
    project.isQueryDefined = True
    app.logger.info('project {}: scopus queried defined'.format(project_id))
    project_service.save_project(project)
    return json.dumps(query, default=lambda o: o.__getstate__())
コード例 #10
0
def data_collection_execution(project_id):
    """
    run the data collection

    :parameter project_id the id of the current project

    """

    mode = ''

    if request.args.get('mode') is not None:
        mode = request.args.get('mode')

    app.logger.info('project {}: collecting data with mode {}'.format(
        project_id, mode))

    # load project, set status bools, and load and eid list. initialize missed eid list
    project = project_service.load_project(project_id)
    project.isDataCollecting = True
    project.isDataCollected = False
    eids = eids_service.load_eid_list(project_id, mode)
    missed_eids = []

    with app.app_context():
        keys = app.config.get("LIBINTEL_SCOPUS_KEYS")

    # initialize status, set to collecting and save status
    status = Status("DATA_COLLECTING")
    status.total = len(eids)
    status_service.save_status(project_id, status)

    if status.total > 0:
        if mode != 'missed':
            elasticsearch_service.delete_index(project.project_id)
        else:
            eids_service.deleteMissedEids()
        if type(keys) is tuple:

            # the number of threads is given by the number of available API keys
            number_of_threads = len(keys)
            app.logger.info('project {}: collecting data in {} threads'.format(
                project_id, number_of_threads))

            # gather the individual chunks provided to each process
            length_of_chunks = math.ceil(status.total / number_of_threads)
            list_chunks = list(chunks(eids, length_of_chunks))
            # make asynchronous calls and delegate the individual collection to the individual threads
            for key_index, key in enumerate(keys):
                if len(list_chunks) > key_index:
                    thread = Thread(target=collect_data,
                                    args=(list_chunks[key_index],
                                          project.project_id, project.name,
                                          key_index, key,
                                          app._get_current_object()))
                    thread.start()
            return Response('finished', status=204)

        collect_data(eids=eids,
                     project_id=project.project_id,
                     project_name=project.name,
                     i=0,
                     key=keys,
                     app=app._get_current_object())

        # if only one API-Key is given, collect data sequentially
        for idx, eid in enumerate(eids):

            # set scopus api-key to the provided key
            scopus.config['Authentication']['APIKEy'] = keys

            # update the progress status and save the status to disk
            status.progress = idx + 1
            status_service.save_status(project_id, status)

            # print progress
            app.logger.info('project {}: processing entry ' + str(idx) +
                            'of ' + str(status.total) + ' entries: ' +
                            str(idx / status.total * 100) + '%')

            # retrieve data from scopus
            try:
                scopus_abstract = scopus.AbstractRetrieval(identifier=eid,
                                                           id_type='eid',
                                                           view="FULL",
                                                           refresh=True)
                app.logger.info(
                    'project {}: collected scopus data for EID {}'.format(
                        project_id, eid))
            except Exception as inst:
                app.logger.error(
                    'project {}: could not collect scopus data for EID {}, exception: {}'
                    .format(project_id, eid, type(inst)))
                missed_eids.append(eid)
                continue

            # create new AllResponses object to hold the individual information
            response = AllResponses(eid, project.name, project.project_id)

            # add scopus abstract to AllResponses object
            response.scopus_abstract_retrieval = scopus_abstract

            # get doi and collect unpaywall data and Altmetric data
            doi = scopus_abstract.doi
            if doi is not None:
                if doi != "":
                    response.unpaywall_response = Unpaywall(doi)
                    response.altmetric_response = Altmetric(doi)
                    response.scival_data = Scival([])

            # send response to elastic search index
            elasticsearch_service.send_to_index(response, project.project_id)
            app.logger.info('project {}: saved EID {} to elasticsearch'.format(
                project_id, eid))
    eids_service.save_eid_list(project_id=project.project_id,
                               eids=missed_eids,
                               prefix='missed_')
    app.logger.info('project {}: all EID data collected'.format(project_id))
    status.status = "DATA_COLLECTED"
    status_service.save_status(project_id, status)
    project.isDataCollecting = False
    project.isDataCollected = True
    project_service.save_project(project)
    return Response({"status": "FINISHED"}, status=204)
コード例 #11
0
def references_collection_execution(project_id):
    """
    collects the references for a given collection of publications
    :param project_id: the ID of the current project
    :return: 204 if successful
    """
    # initialize lists, read sample size from request and load eid list
    sample_size = int(request.args.get('sample_size'))
    missed_eids = []
    references_eids = []
    eids = eids_service.load_eid_list(project_id)

    # load project and set booleans
    project = project_service.load_project(project_id)
    project.isReferencesCollecting = True
    project.isReferencesCollected = False
    project_service.save_project(project)

    # prepare status
    status = Status("REFERENCES_COLLECTING")
    status.total = eids.__len__()
    status_service.save_status(project_id, status)

    # if eids are given, cycle through all of them
    if status.total > 0:
        for idx, eid in enumerate(eids):
            # update the progress status and save the status to disk
            status.progress = idx + 1
            status_service.save_status(project_id, status)

            # print progress
            app.logger.info('project {}: processing entry ' + str(idx) +
                            'of ' + str(status.total) + ' entries: ' +
                            str(idx / status.total * 100) + '%')

            # retrieve refereces from scopus
            try:
                scopus_abstract = scopus.AbstractRetrieval(eid, view="FULL")
                app.logger.info(
                    'project {}: collected scopus data for EID {}'.format(
                        project_id, eid))
                if scopus_abstract.references is not None:
                    references_eids = references_eids + scopus_abstract.references
                else:
                    app.logger.warn(
                        'project {}: no references given in scopus export for EID {}.'
                        .format(project_id, eid))
            except IOError:
                app.logger.error(
                    'project {}: could not collect scopus data for EID {}'.
                    format(project_id, eid))
                missed_eids.append(eid)
                continue
    # transform references eids into tuple and calculate the occurences
    references_eids_tuple = tuple(references_eids)
    occurences = Counter(references_eids_tuple)
    most_occurences = occurences.most_common(sample_size)

    # save the counter with the most occurences to disk
    counter_service.save_counter(project_id, most_occurences, 'references_')
    eids_service.save_eid_list(project_id, missed_eids, prefix='missed_')

    # set the status and save it to disk
    status.status = "DATA_COLLECTED"
    status_service.save_status(project_id, status)

    # set the project booleans and save it to disk
    project.isReferencesCollecting = False
    project.isReferencesCollected = True
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)
コード例 #12
0
def query_execution(project_id):
    """
    executes the defined and saved query in scopus
    :param project_id: the ID of the current project
    :return: 'finished' with a status of 204 when the query was executed successfully
    """
    app.logger.info('project {}: running queries'.format(project_id))
    # reads the saved Scopus search string from disk
    scopus_queries = query_service.load_scopus_queries(project_id)

    # retrieve the project from disk, set the booleans and save the project
    project = project_service.load_project(project_id)
    project.isEidsCollected = False
    project.isEidsCollecting = True
    project_service.save_project(project)

    # prepares the status file
    status = Status("EIDS_COLLECTING")
    status_service.save_status(project_id, status)

    # prepare EIDs list
    eids = []

    for index, search_strings in enumerate(scopus_queries.search_strings):
        individual_eids = []
        for search_string in search_strings:
            app.logger.info('project {}: executing search {} - {}'.format(
                project_id, index, search_string))
            search = scopus.ScopusSearch(search_string,
                                         refresh=True,
                                         field='eid',
                                         view='STANDARD')
            if search.results is not None:
                app.logger.info(
                    'project {}: result search {} - {} entries found'.format(
                        project_id, index, len(search.results)))
                for result in search.results:
                    # add EID if it is not already in the list (from a former search)
                    eids.append(result.eid)
                    individual_eids.append(result.eid)
        eids_service.save_eid_list(
            project_id=project_id,
            eids=set(individual_eids),
            prefix=(str(scopus_queries.search_ids[index]) + '_'))

    # convert to set in order to remove duplicates
    eids = set(eids)

    # print the results to the command line for logging
    app.logger.info('project {}: found {} eids in Scopus'.format(
        project_id, len(eids)))

    # persist EIDs to file
    eids_service.save_eid_list(project_id=project_id, eids=eids)

    # set the total number of results to the relevance_measures measure save it to disk
    relevance_measure = RelevanceMeasure(
        number_of_search_results=eids.__len__())
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)

    # set the total number of results to the status save it to disk
    status.total = relevance_measure.number_of_search_results
    status_service.save_status(project_id, status)

    # set the status and save it to disk
    status = Status("EIDS_COLLECTED")
    status_service.save_status(project_id, status)

    # set the project boolean and save the project
    project.isEidslist = True
    project.isEidsCollected = True
    project.isEidsCollecting = False
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)