def execute_query(project_id, query_id):
    """
    executes the defined and saved query in scopus
    :param project_id: the ID of the current project
    :return: 'finished' with a status of 204 when the query was executed successfully
    """
    app.logger.info('project {}: running query {}'.format(
        project_id, query_id))
    # reads the saved Scopus search string from disk
    scopus_queries = query_service.load_scopus_queries(project_id, query_id)

    # retrieve the project from disk, set the booleans and save the project
    project = project_service.load_project(project_id)
    project.isEidsCollected = False
    project.isEidsCollecting = True
    project_service.save_project(project)

    eids = scopus_service.execute_query(scopus_queries)

    # print the results to the command line for logging
    app.logger.info('project {}: found {} entries in Scopus'.format(
        project_id, len(eids)))

    # persist EIDs to file
    identifier_service.save_id_list(project_id=project_id,
                                    query_id=query_id,
                                    identifiers=eids)

    # set the project boolean and save the project
    project.isEidslist = True
    project.isEidsCollected = True
    project.isEidsCollecting = False
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)
예제 #2
0
def set_survey_id(project_id):
    survey_id = request.form['survey_id']
    project = project_service.load_project(project_id)
    project.survey_id = survey_id
    project_service.save_project(project)
    app.logger.info('project {}: connecting with survey {}'.format(
        project_id, survey_id))
    return Response('survey ID saved', status=204)
def upload_sample_judgement_file(project_id):
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    print("saving sample test file for " + project_id)
    project = project_service.load_project(project_id)
    file = request.files['sample-judgement-file']
    path_to_save = location + '/out/' + project_id + '/'
    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)
    file.save(path_to_save + 'sample_judgement_eids_list.csv')
    project.isSampledata = True
    project_service.save_project(project)
    return Response('list saved', status=204)
def upload_test_file(project_id):
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    print("saving test file for " + project_id)
    project = project_service.load_project(project_id)
    file = request.files['test-file']
    path_to_save = location + '/out/' + project_id + '/'
    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)
    file.save(path_to_save + 'test_eids_list.txt')
    project['isTestdata'] = True
    project_service.save_project(project)
    return Response('list saved', status=204)
예제 #5
0
파일: decidr.py 프로젝트: lbracken/decidr
def save_project():

    # Get the client provided project
    project = get_project_from_req(request)
    response_body = {}

    if project:
        operation = "save" if "_id" in project else "create"
        project = project_service.save_project(project)

        if project:
            populate_response_with_project(response_body, project)
            
        else:
            response_body["error"] = True
            response_body["message"] = "Error saving the project"

        now = str(datetime.now())
        print ">> %s\t%s\t%s_project()\t[%s]" % \
                (request.remote_addr, now, operation, project["_id"])
    
    else:
        response_body["error"] = True
        response_body["message"] = "No project provided to save"

    # Create the response object
    response = make_response(jsonify(response_body))
    return response
def upload_xml_file(project_id):
    """
    retrieves the query xml file from the request and saves it to disc
    :param project_id: the ID of the current project
    :return: returns a status of 204 when the file could be saved
    """
    with app.app_context():
        location = app.config.get("LIBINTEL_DATA_DIR")
    app.logger.info("project {}: saving uploaded xml file".format(project_id))
    if request.method == 'POST':
        project = project_service.load_project(project_id)
        file = request.files['query_xml']
        path_to_save = location + '/out/' + project_id + '/'
        if not os.path.exists(path_to_save):
            os.makedirs(path_to_save)
        file.save(path_to_save + 'query.xml')
        project.isQueryDefined = True
        project_service.save_project(project)
    return Response("OK", status=204)
def save_query_as_xml(project_id):
    """
    saves the query as xml document in the working directory as query.json file. Creates a scopus search string and
    saves it as scopus_search_string.txt. Sets project.isQueryDefined = True
    :param project_id: the ID of the current project
    :return: the saved query
    """
    project = project_service.load_project(project_id)
    query_json = request.get_json(silent=True)
    query = query_service.from_json(query_json)
    try:
        query_service.save_query_to_xml(project_id, query)
        app.logger.info(
            'project {}: successfully saved query to xml'.format(project_id))
    except IOError:
        app.logger.warn(
            'project {}: could not save query to xml'.format(project_id))
        return Response("could not save query", status=500)
    query_service.create_scopus_queries(project_id, query)
    project.isQueryDefined = True
    app.logger.info('project {}: scopus queried defined'.format(project_id))
    project_service.save_project(project)
    return json.dumps(query, default=lambda o: o.__getstate__())
예제 #8
0
def data_collection_execution(project_id):
    """
    run the data collection

    :parameter project_id the id of the current project

    """

    mode = ''

    if request.args.get('mode') is not None:
        mode = request.args.get('mode')

    app.logger.info('project {}: collecting data with mode {}'.format(
        project_id, mode))

    # load project, set status bools, and load and eid list. initialize missed eid list
    project = project_service.load_project(project_id)
    project.isDataCollecting = True
    project.isDataCollected = False
    eids = eids_service.load_eid_list(project_id, mode)
    missed_eids = []

    with app.app_context():
        keys = app.config.get("LIBINTEL_SCOPUS_KEYS")

    # initialize status, set to collecting and save status
    status = Status("DATA_COLLECTING")
    status.total = len(eids)
    status_service.save_status(project_id, status)

    if status.total > 0:
        if mode != 'missed':
            elasticsearch_service.delete_index(project.project_id)
        else:
            eids_service.deleteMissedEids()
        if type(keys) is tuple:

            # the number of threads is given by the number of available API keys
            number_of_threads = len(keys)
            app.logger.info('project {}: collecting data in {} threads'.format(
                project_id, number_of_threads))

            # gather the individual chunks provided to each process
            length_of_chunks = math.ceil(status.total / number_of_threads)
            list_chunks = list(chunks(eids, length_of_chunks))
            # make asynchronous calls and delegate the individual collection to the individual threads
            for key_index, key in enumerate(keys):
                if len(list_chunks) > key_index:
                    thread = Thread(target=collect_data,
                                    args=(list_chunks[key_index],
                                          project.project_id, project.name,
                                          key_index, key,
                                          app._get_current_object()))
                    thread.start()
            return Response('finished', status=204)

        collect_data(eids=eids,
                     project_id=project.project_id,
                     project_name=project.name,
                     i=0,
                     key=keys,
                     app=app._get_current_object())

        # if only one API-Key is given, collect data sequentially
        for idx, eid in enumerate(eids):

            # set scopus api-key to the provided key
            scopus.config['Authentication']['APIKEy'] = keys

            # update the progress status and save the status to disk
            status.progress = idx + 1
            status_service.save_status(project_id, status)

            # print progress
            app.logger.info('project {}: processing entry ' + str(idx) +
                            'of ' + str(status.total) + ' entries: ' +
                            str(idx / status.total * 100) + '%')

            # retrieve data from scopus
            try:
                scopus_abstract = scopus.AbstractRetrieval(identifier=eid,
                                                           id_type='eid',
                                                           view="FULL",
                                                           refresh=True)
                app.logger.info(
                    'project {}: collected scopus data for EID {}'.format(
                        project_id, eid))
            except Exception as inst:
                app.logger.error(
                    'project {}: could not collect scopus data for EID {}, exception: {}'
                    .format(project_id, eid, type(inst)))
                missed_eids.append(eid)
                continue

            # create new AllResponses object to hold the individual information
            response = AllResponses(eid, project.name, project.project_id)

            # add scopus abstract to AllResponses object
            response.scopus_abstract_retrieval = scopus_abstract

            # get doi and collect unpaywall data and Altmetric data
            doi = scopus_abstract.doi
            if doi is not None:
                if doi != "":
                    response.unpaywall_response = Unpaywall(doi)
                    response.altmetric_response = Altmetric(doi)
                    response.scival_data = Scival([])

            # send response to elastic search index
            elasticsearch_service.send_to_index(response, project.project_id)
            app.logger.info('project {}: saved EID {} to elasticsearch'.format(
                project_id, eid))
    eids_service.save_eid_list(project_id=project.project_id,
                               eids=missed_eids,
                               prefix='missed_')
    app.logger.info('project {}: all EID data collected'.format(project_id))
    status.status = "DATA_COLLECTED"
    status_service.save_status(project_id, status)
    project.isDataCollecting = False
    project.isDataCollected = True
    project_service.save_project(project)
    return Response({"status": "FINISHED"}, status=204)
예제 #9
0
def references_collection_execution(project_id):
    """
    collects the references for a given collection of publications
    :param project_id: the ID of the current project
    :return: 204 if successful
    """
    # initialize lists, read sample size from request and load eid list
    sample_size = int(request.args.get('sample_size'))
    missed_eids = []
    references_eids = []
    eids = eids_service.load_eid_list(project_id)

    # load project and set booleans
    project = project_service.load_project(project_id)
    project.isReferencesCollecting = True
    project.isReferencesCollected = False
    project_service.save_project(project)

    # prepare status
    status = Status("REFERENCES_COLLECTING")
    status.total = eids.__len__()
    status_service.save_status(project_id, status)

    # if eids are given, cycle through all of them
    if status.total > 0:
        for idx, eid in enumerate(eids):
            # update the progress status and save the status to disk
            status.progress = idx + 1
            status_service.save_status(project_id, status)

            # print progress
            app.logger.info('project {}: processing entry ' + str(idx) +
                            'of ' + str(status.total) + ' entries: ' +
                            str(idx / status.total * 100) + '%')

            # retrieve refereces from scopus
            try:
                scopus_abstract = scopus.AbstractRetrieval(eid, view="FULL")
                app.logger.info(
                    'project {}: collected scopus data for EID {}'.format(
                        project_id, eid))
                if scopus_abstract.references is not None:
                    references_eids = references_eids + scopus_abstract.references
                else:
                    app.logger.warn(
                        'project {}: no references given in scopus export for EID {}.'
                        .format(project_id, eid))
            except IOError:
                app.logger.error(
                    'project {}: could not collect scopus data for EID {}'.
                    format(project_id, eid))
                missed_eids.append(eid)
                continue
    # transform references eids into tuple and calculate the occurences
    references_eids_tuple = tuple(references_eids)
    occurences = Counter(references_eids_tuple)
    most_occurences = occurences.most_common(sample_size)

    # save the counter with the most occurences to disk
    counter_service.save_counter(project_id, most_occurences, 'references_')
    eids_service.save_eid_list(project_id, missed_eids, prefix='missed_')

    # set the status and save it to disk
    status.status = "DATA_COLLECTED"
    status_service.save_status(project_id, status)

    # set the project booleans and save it to disk
    project.isReferencesCollecting = False
    project.isReferencesCollected = True
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)
def query_execution(project_id):
    """
    executes the defined and saved query in scopus
    :param project_id: the ID of the current project
    :return: 'finished' with a status of 204 when the query was executed successfully
    """
    app.logger.info('project {}: running queries'.format(project_id))
    # reads the saved Scopus search string from disk
    scopus_queries = query_service.load_scopus_queries(project_id)

    # retrieve the project from disk, set the booleans and save the project
    project = project_service.load_project(project_id)
    project.isEidsCollected = False
    project.isEidsCollecting = True
    project_service.save_project(project)

    # prepares the status file
    status = Status("EIDS_COLLECTING")
    status_service.save_status(project_id, status)

    # prepare EIDs list
    eids = []

    for index, search_strings in enumerate(scopus_queries.search_strings):
        individual_eids = []
        for search_string in search_strings:
            app.logger.info('project {}: executing search {} - {}'.format(
                project_id, index, search_string))
            search = scopus.ScopusSearch(search_string,
                                         refresh=True,
                                         field='eid',
                                         view='STANDARD')
            if search.results is not None:
                app.logger.info(
                    'project {}: result search {} - {} entries found'.format(
                        project_id, index, len(search.results)))
                for result in search.results:
                    # add EID if it is not already in the list (from a former search)
                    eids.append(result.eid)
                    individual_eids.append(result.eid)
        eids_service.save_eid_list(
            project_id=project_id,
            eids=set(individual_eids),
            prefix=(str(scopus_queries.search_ids[index]) + '_'))

    # convert to set in order to remove duplicates
    eids = set(eids)

    # print the results to the command line for logging
    app.logger.info('project {}: found {} eids in Scopus'.format(
        project_id, len(eids)))

    # persist EIDs to file
    eids_service.save_eid_list(project_id=project_id, eids=eids)

    # set the total number of results to the relevance_measures measure save it to disk
    relevance_measure = RelevanceMeasure(
        number_of_search_results=eids.__len__())
    relevance_measure_service.save_relevance_measures(project_id,
                                                      relevance_measure)

    # set the total number of results to the status save it to disk
    status.total = relevance_measure.number_of_search_results
    status_service.save_status(project_id, status)

    # set the status and save it to disk
    status = Status("EIDS_COLLECTED")
    status_service.save_status(project_id, status)

    # set the project boolean and save the project
    project.isEidslist = True
    project.isEidsCollected = True
    project.isEidsCollecting = False
    project_service.save_project(project)

    return Response({"status": "FINISHED"}, status=204)