def analyse(api_key, endpoint, doi, project_id, result_id, project_short_name, path, throttle, **kwargs): """Analyse Convert-a-Card results.""" e = enki.Enki(api_key, endpoint, project_short_name, all=1) result = enki.pbclient.find_results(project_id, id=result_id, limit=1, all=1)[0] df = helpers.get_task_run_df(e, result.task_id) df = df.loc[:, df.columns.isin(VALID_KEYS)] df = helpers.drop_empty_rows(df) n_task_runs = len(df.index) # Initialise the result defaults = {k: "" for k in df.keys()} result.info = helpers.init_result_info(doi, path, defaults) has_answers = not df.empty has_matches = helpers.has_n_matches(df, n_task_runs, MATCH_PERCENTAGE) # Matching answers if has_answers and has_matches: for k in df.keys(): result.info[k] = df[k].value_counts().idxmax() # Varied answers elif has_answers: result.info['analysis_complete'] = False enki.pbclient.update_result(result) time.sleep(throttle)
def download_task_runs(): with open(KEY_PATH, "r") as f: key = f.read() e = enki.Enki(api_key=key, endpoint='http://localhost:5000', project_short_name='opentaal', all=1) e.get_all() return e.task_runs_df
def test_basic(self): """Test basic method works.""" with patch('enki.Enki', autospec=True): enki_mock = enki.Enki(endpoint='server', api_key='api', project_short_name='project') enki_mock.tasks = [] res = basic(**self.payload) assert enki_mock.get_tasks.called assert enki_mock.get_task_runs.called assert res == 'OK', res
def analyse_all(**kwargs): """Analyse all Convert-a-Card results.""" print kwargs e = enki.Enki(kwargs['api_key'], kwargs['endpoint'], kwargs['project_short_name'], all=1) results = object_loader.load(enki.pbclient.find_results, project_id=e.project.id, all=1) for result in results: kwargs['project_id'] = e.project.id kwargs['result_id'] = result.id print kwargs analyse(**kwargs.copy())
def analyse_all_selections(**kwargs): """Analyse all In the Spotlight selection results.""" e = enki.Enki(kwargs['api_key'], kwargs['endpoint'], kwargs['project_short_name'], all=1) results = object_loader.load(enki.pbclient.find_results, project_id=e.project.id, all=1) for result in results: kwargs['project_id'] = e.project.id kwargs['result_id'] = result.id analyse_selections(**kwargs.copy()) helpers.send_mail({ 'recipients': kwargs['mail_recipients'], 'subject': 'Analysis complete', 'body': ''' All {0} results for {1} have been analysed. '''.format(len(results), e.project.name) })
def analyse_selections(api_key, endpoint, project_id, result_id, path, doi, project_short_name, throttle, **kwargs): """Analyse In the Spotlight selection results.""" e = enki.Enki(api_key, endpoint, project_short_name, all=1) result = enki.pbclient.find_results(project_id, id=result_id, limit=1, all=1)[0] df = helpers.get_task_run_df(e, result.task_id) # Flatten annotations into a single list anno_list = df['info'].tolist() anno_list = list(itertools.chain.from_iterable(anno_list)) defaults = {'annotations': []} result.info = helpers.init_result_info(doi, path, defaults) clusters = [] comments = [] # Cluster similar regions for anno in anno_list: if anno['motivation'] == 'commenting': comments.append(anno) continue elif anno['motivation'] == 'tagging': r1 = get_rect_from_selection(anno) matched = False for cluster in clusters: r2 = get_rect_from_selection(cluster) overlap_ratio = get_overlap_ratio(r1, r2) if overlap_ratio > MERGE_RATIO: matched = True r3 = merge_rects(r1, r2) update_selector(cluster, r3) if not matched: update_selector(anno, r1) # still update to round rect params clusters.append(anno) else: # pragma: no cover raise ValueError('Unhandled motivation') result.info['annotations'] = clusters + comments enki.pbclient.update_result(result) time.sleep(throttle)
def basic(**kwargs): """A basic analyzer.""" e = enki.Enki(endpoint=settings.endpoint, api_key=settings.api_key, project_short_name=kwargs['project_short_name']) e.get_tasks(task_id=kwargs['task_id']) e.get_task_runs() for t in e.tasks: # pragma: no cover desc = e.task_runs_df[t.id]['info'].describe() # print "The top answer for task.id %s is %s" % (t.id, desc['top']) value_counts = e.task_runs_df[t.id]['info'].value_counts() analysis = dict(value_counts) summary = dict(desc) result = enki.pbclient.find_results(project_id=kwargs['project_id'], id=kwargs['result_id'])[0] result.info = dict(summary=summary, analysis=analysis) enki.pbclient.update_result(result) with open('./static/results.json', 'w') as f: f.write(json.dumps(kwargs)) return "OK"
def relevance(**kwargs): headers = { 'Content-Type': 'application/json', } # Obtain task run results from Pybossa: e = enki.Enki(api_key=settings.api_key, endpoint=settings.endpoint, project_short_name=kwargs['project_short_name'], all=1) e.get_tasks(task_id=kwargs['task_id']) post_id = e.tasks_df['_id'][kwargs['task_id']] e.get_task_runs() df = e.task_runs_df[kwargs['task_id']] # Calculate total number of answers for the task and create a subset DF: total = len(df[df.columns[0]]) subset = df[df.columns[0]].apply(pd.Series)[0].apply(pd.Series) # Count the number of times the image was considered relevant: trueCount = len(subset[subset[subset.columns[0]]==True]) # Calculate relevance: relevance = trueCount/total #Prepare payload: payload = { "name":"relevance", "post":post_id, "value":str(relevance) } # e2mc data API credentials: username = # e2mc data api username. Although harcoded here, it could be easily included in the settings file password = # e2mc data api password. Although harcoded here, it could be easily included in the settings file # Push payload into data API r = requests.post('http://131.175.120.92:5555/e2mc/datapi/v1.1/tags', auth=(username, password), headers=headers, data=json.dumps(payload)) # Create record file: with open('./static/results.json', 'w') as f: f.write(json.dumps(kwargs)) return r
def basic(**kwargs): """A basic analyzer.""" e = enki.Enki(endpoint=settings.endpoint, api_key=settings.api_key, project_short_name=kwargs['project_short_name'], all=1) if kwargs['task_id'] != 95049: e.get_tasks(task_id=kwargs['task_id']) e.get_task_runs() labels = ['task_run_id', 'speciesID', 'speciesScientificName', 'speciesCommonName', 'animalCount'] for t in e.tasks: data = [] project_id = t.project_id for tr in e.task_runs[t.id]: for datum in tr.info['answer']: data.append(datum) df = pd.DataFrame(data) # If 5 first answers is nan (nothing here) mark task # as completed vc = get_count_nan(df) print vc.index[0] print vc.values[0] if len(e.task_runs[t.id]) == 5: msg = "The five taskruns reported no animal" if type(vc) == pd.Series and ((str(vc.index[0]) == 'nan' or vc.index[0] == -1) and vc.values[0] == 5): result = enki.pbclient.find_results(project_id=kwargs['project_id'], id=kwargs['result_id'],all=1) if len(result) > 0: return create_result(t, settings.no_animal, result[0]) else: task = get_task(t.project_id, t.id) task.n_answers += 1 task.state = 'ongoing' return enki.pbclient.update_task(task) else: if (str(vc.index[0]) == 'nan' or vc.index[0] == -1) and vc.values[0] >= 10: msg = "10 taskruns reported no animal" result = enki.pbclient.find_results(project_id=kwargs['project_id'], id=kwargs['result_id'],all=1) if len(result) > 0: return create_result(t, settings.no_animal, result[0]) else: answers = get_consensus(df, th=10) if len(answers) == 0: if len(e.task_runs[t.id]) < 25: msg = "No consensus. Asking for one more answer." task = get_task(t.project_id, t.id) task.n_answers += 1 task.state = 'ongoing' return enki.pbclient.update_task(task) else: result = enki.pbclient.find_results(project_id=kwargs['project_id'], id=kwargs['result_id'],all=1) if len(result) > 0: return create_result(t, settings.no_consensus, result[0]) else: for a in answers: iucn_red_list_status, species = get_red_list_status(a['speciesScientificName'], project_id) a['speciesCommonName'] = species a['iucn_red_list_status'] = iucn_red_list_status a['imageURL'] = t.info.get('image', None) a['deploymentID'] = t.info.get('deploymentID', None) a['deploymentLocationID'] = t.info.get('deploymentLocationID', None) a['Create_time'] = t.info.get('Create_time') result = enki.pbclient.find_results(project_id=kwargs['project_id'], id=kwargs['result_id'],all=1) if len(result) > 0: result = result[0] if len(answers) == 1: result.info = answers[0] if len(answers) >= 2: result.info = dict(answers=answers) give_badges(e, t, answers, result) result = enki.pbclient.update_result(result) return 'OK' return "OK"