def getPrecision(project_id): # load collected eids eids = eids_service.load_eid_list(project_id) relevance_measure = relevance_measure_service.load_relevance_measure( project_id) if relevance_measure is None: relevance_measure = RelevanceMeasure() relevance_measure.number_of_search_results = len(eids) judgement_list = eids_service.load_judgement_file(project_id) app.logger.info('project {}: loaded judgements'.format(project_id)) relevance_measure.number_sample_entries = len(judgement_list) relevance_measure.number_positive_sample_entries = 0 for judgement in judgement_list: if judgement.isRelevant: relevance_measure.number_positive_sample_entries = \ relevance_measure.number_positive_sample_entries + 1 if relevance_measure.number_sample_entries > 0: relevance_measure.precision = relevance_measure.number_positive_sample_entries / relevance_measure.number_sample_entries else: relevance_measure.precision = 0 relevance_measure_service.save_relevance_measures(project_id, relevance_measure) app.logger.info( 'project {}: calculated relevance measure precision'.format( project_id)) return jsonify(relevance_measure)
def check_test_eids(project_id): """ calcluates the Recall by comparing the list of EIDs retrieved from the query against lists of EIDs as obtained from the survey :param project_id: the ID of the current project :return: a JSON formatted relevance measure object. """ test_eids = eids_service.load_eid_list(project_id, 'test_') app.logger.info('project {}: loaded test eids'.format(project_id)) # load collected eids eids = eids_service.load_eid_list(project_id) relevance_measure = relevance_measure_service.load_relevance_measure( project_id) if relevance_measure is None: relevance_measure = RelevanceMeasure() relevance_measure.number_of_search_results = len(eids) relevance_measure.number_test_entries = len(test_eids) relevance_measure.number_test_entries_found = 0 for test_eid in test_eids: if test_eid in eids: relevance_measure.number_test_entries_found = relevance_measure.number_test_entries_found + 1 if relevance_measure.number_of_search_results > 0: relevance_measure.recall = relevance_measure.number_test_entries_found / relevance_measure.number_test_entries else: relevance_measure.recall = 0 relevance_measure_service.save_relevance_measures(project_id, relevance_measure) app.logger.info( 'project {}: calculated relevance measure recall'.format(project_id)) return jsonify(relevance_measure)
def check_sample_eids(project_id): # load collected eids eids = eids_service.load_eid_list(project_id) relevance_measure = relevance_measure_service.load_relevance_measure( project_id) if relevance_measure is None: relevance_measure = RelevanceMeasure() relevance_measure.number_of_search_results = len(eids) judgement_list = eids_service.load_judgement_file(project_id) relevance_measure['number_sample_entries'] = len(judgement_list) for judgement in judgement_list: if judgement['isRelevant']: relevance_measure['number_positive_sample_entries'] = \ relevance_measure['number_positive_sample_entries'] + 1 relevance_measure_service.save_relevance_measures(project_id, relevance_measure) return jsonify(relevance_measure.__dict__)
def load_relevance_measure(project_id): with app.app_context(): location = app.config.get("LIBINTEL_DATA_DIR") path_to_file = location + '/out/' + project_id + '/relevance_measures.json' with open(path_to_file) as json_file: try: relevance_measure = json.load(json_file) json_file.close() return RelevanceMeasure(**relevance_measure) except FileNotFoundError: return {} except JSONDecodeError: return {}
def check_test_eids(project_id): # load test eids test_eids = eids_service.load_eid_list(project_id, 'test_') # load collected eids eids = eids_service.load_eid_list(project_id) relevance_measure = relevance_measure_service.load_relevance_measure( project_id) if relevance_measure is None: relevance_measure = RelevanceMeasure() relevance_measure.number_of_search_results = len(eids) relevance_measure.number_test_entries = len(test_eids) for test_eid in test_eids: if test_eid in eids: relevance_measure.number_test_entries_found = relevance_measure.number_test_entries_found + 1 if relevance_measure.number_of_search_results > 0: relevance_measure.recall = relevance_measure.number_test_entries_found / relevance_measure.number_test_entries else: relevance_measure.recall = 0 relevance_measure_service.save_relevance_measures(project_id, relevance_measure) return jsonify(relevance_measure.__dict__)
def query_execution(project_id): """ executes the defined and saved query in scopus :param project_id: the ID of the current project :return: 'finished' with a status of 204 when the query was executed successfully """ app.logger.info('project {}: running queries'.format(project_id)) # reads the saved Scopus search string from disk scopus_queries = query_service.load_scopus_queries(project_id) # retrieve the project from disk, set the booleans and save the project project = project_service.load_project(project_id) project.isEidsCollected = False project.isEidsCollecting = True project_service.save_project(project) # prepares the status file status = Status("EIDS_COLLECTING") status_service.save_status(project_id, status) # prepare EIDs list eids = [] for index, search_strings in enumerate(scopus_queries.search_strings): individual_eids = [] for search_string in search_strings: app.logger.info('project {}: executing search {} - {}'.format( project_id, index, search_string)) search = scopus.ScopusSearch(search_string, refresh=True, field='eid', view='STANDARD') if search.results is not None: app.logger.info( 'project {}: result search {} - {} entries found'.format( project_id, index, len(search.results))) for result in search.results: # add EID if it is not already in the list (from a former search) eids.append(result.eid) individual_eids.append(result.eid) eids_service.save_eid_list( project_id=project_id, eids=set(individual_eids), prefix=(str(scopus_queries.search_ids[index]) + '_')) # convert to set in order to remove duplicates eids = set(eids) # print the results to the command line for logging app.logger.info('project {}: found {} eids in Scopus'.format( project_id, len(eids))) # persist EIDs to file eids_service.save_eid_list(project_id=project_id, eids=eids) # set the total number of results to the relevance_measures measure save it to disk relevance_measure = RelevanceMeasure( number_of_search_results=eids.__len__()) relevance_measure_service.save_relevance_measures(project_id, relevance_measure) # set the total number of results to the status save it to disk status.total = relevance_measure.number_of_search_results status_service.save_status(project_id, status) # set the status and save it to disk status = Status("EIDS_COLLECTED") status_service.save_status(project_id, status) # set the project boolean and save the project project.isEidslist = True project.isEidsCollected = True project.isEidsCollecting = False project_service.save_project(project) return Response({"status": "FINISHED"}, status=204)