def _get_stmt_row(stmt, source, model, cur_counts, test_corpus=None, path_counts=None, cur_dict=None, with_evid=False): stmt_hash = str(stmt.get_hash()) english = _format_stmt_text(stmt) evid_count = len(stmt.evidence) evid = [] if with_evid and cur_dict is not None: evid = _format_evidence_text( stmt, cur_dict, ['correct', 'act_vs_amt', 'hypothesis'])[:10] params = { 'stmt_hash': stmt_hash, 'source': source, 'model': model, 'format': 'json' } if test_corpus: params.update({'test_corpus': test_corpus}) url_param = parse.urlencode(params) json_link = f'/evidence?{url_param}' path_count = 0 if path_counts: path_count = path_counts.get(stmt_hash) badges = _make_badges(evid_count, json_link, path_count, cur_counts.get(stmt_hash)) stmt_row = [(stmt.get_hash(), english, evid, evid_count, badges)] return stmt_row
def _make_english_from_meta(interaction): stmt_type = interaction.get('type') agent_json = interaction['agents'] if stmt_type is None: if len(agent_json) == 0: eng = '' else: ag_list = list(agent_json.values()) eng = f'<b>{ag_list[0]}</b>' if len(agent_json) > 1: eng += ' affects ' + f'<b>{ag_list[1]}</b>' if len(agent_json) > 3: eng += ', ' \ + ', '.join(f'<b>{ag}</b>' for ag in ag_list[2:-1]) if len(agent_json) > 2: eng += ', and ' + f'<b>{ag_list[-1]}</b>' else: eng += ' is modified' else: eng = _format_stmt_text(stmt_from_interaction(interaction)) return eng
def decorator(*args, **kwargs): tracker = LogTracker() start_time = datetime.now() logger.info("Got query for %s at %s!" % (get_db_query.__name__, start_time)) web_query = request.args.copy() offs = _pop(web_query, 'offset', type_cast=int) ev_lim = _pop(web_query, 'ev_limit', type_cast=int) best_first = _pop(web_query, 'best_first', True, bool) max_stmts = min(_pop(web_query, 'max_stmts', MAX_STATEMENTS, int), MAX_STATEMENTS) fmt = _pop(web_query, 'format', 'json') w_english = _pop(web_query, 'with_english', False, bool) w_cur_counts = _pop(web_query, 'with_cur_counts', False, bool) # Figure out authorization. has = dict.fromkeys(['elsevier', 'medscan'], False) if not TESTING: user, roles = resolve_auth(web_query) for role in roles: for resource in has.keys(): has[resource] |= role.permissions.get(resource, False) logger.info('Auths: %s' % str(has)) else: web_query.pop('api_key', None) has['elsevier'] = False has['medscan'] = False # Actually run the function. logger.info("Running function %s after %s seconds." % (get_db_query.__name__, sec_since(start_time))) db_query = get_db_query(web_query, *args, **kwargs) if isinstance(db_query, Response): return db_query elif not isinstance(db_query, QueryCore): raise RuntimeError("Result should be a child of QueryCore.") if ev_lim is None: if get_db_query is get_statement_by_hash: ev_lim = 10000 else: ev_lim = 10 if not has['medscan']: minus_q = ~HasOnlySource('medscan') db_query &= minus_q ev_filter = minus_q.ev_filter() else: ev_filter = None result = db_query.get_statements(offset=offs, limit=max_stmts, ev_limit=ev_lim, best_first=best_first, evidence_filter=ev_filter) logger.info("Finished function %s after %s seconds." % (get_db_query.__name__, sec_since(start_time))) # Handle any necessary redactions res_json = result.json() stmts_json = res_json.pop('results') elsevier_redactions = 0 source_counts = result.source_counts if not all(has.values()) or fmt == 'json-js' or w_english: for h, stmt_json in stmts_json.copy().items(): if w_english: stmt = stmts_from_json([stmt_json])[0] stmt_json['english'] = _format_stmt_text(stmt) stmt_json['evidence'] = _format_evidence_text(stmt) if has['elsevier'] and fmt != 'json-js' and not w_english: continue if not has['medscan']: source_counts[h].pop('medscan', 0) for ev_json in stmt_json['evidence'][:]: if fmt == 'json-js': ev_json['source_hash'] = str(ev_json['source_hash']) # Check for elsevier and redact if necessary if not has['elsevier'] and \ get_source(ev_json) == 'elsevier': text = ev_json['text'] if len(text) > 200: ev_json['text'] = text[:200] + REDACT_MESSAGE elsevier_redactions += 1 logger.info(f"Redacted {elsevier_redactions} pieces of elsevier " f"evidence.") logger.info("Finished redacting evidence for %s after %s seconds." % (get_db_query.__name__, sec_since(start_time))) # Get counts of the curations for the resulting statements. if w_cur_counts: curations = get_curations(pa_hash=set(stmts_json.keys())) logger.info("Found %d curations" % len(curations)) cur_counts = {} for curation in curations: # Update the overall counts. if curation.pa_hash not in cur_counts: cur_counts[curation.pa_hash] = 0 cur_counts[curation.pa_hash] += 1 # Work these counts into the evidence dict structure. for ev_json in stmts_json[curation.pa_hash]['evidence']: if str(ev_json['source_hash']) == str( curation.source_hash): ev_json['num_curations'] = \ ev_json.get('num_curations', 0) + 1 break res_json['num_curations'] = cur_counts # Add derived values to the res_json. res_json['offset'] = offs res_json['evidence_limit'] = ev_lim res_json['statement_limit'] = MAX_STATEMENTS res_json['statements_returned'] = len(stmts_json) res_json['end_of_statements'] = (len(stmts_json) < MAX_STATEMENTS) res_json['statements_removed'] = 0 res_json['evidence_returned'] = result.returned_evidence if fmt == 'html': title = TITLE + ': ' + 'Results' ev_totals = res_json.pop('evidence_totals') stmts = stmts_from_json(stmts_json.values()) html_assembler = HtmlAssembler(stmts, res_json, ev_totals, source_counts, title=title, db_rest_url=request.url_root[:-1]) idbr_template = env.get_template('idbr_statements_view.html') identity = user.identity() if user else None content = html_assembler.make_model(idbr_template, identity=identity) if tracker.get_messages(): level_stats = [ '%d %ss' % (n, lvl.lower()) for lvl, n in tracker.get_level_stats().items() ] msg = ' '.join(level_stats) content = html_assembler.append_warning(msg) mimetype = 'text/html' else: # Return JSON for all other values of the format argument res_json.update(tracker.get_level_stats()) res_json['statements'] = stmts_json res_json['source_counts'] = source_counts content = json.dumps(res_json) mimetype = 'application/json' resp = Response(content, mimetype=mimetype) logger.info("Exiting with %d statements with %d/%d evidence of size " "%f MB after %s seconds." % (res_json['statements_returned'], res_json['evidence_returned'], res_json['total_evidence'], sys.getsizeof(resp.data) / 1e6, sec_since(start_time))) return resp
def process_entries(self, result): if result.result_type == 'hashes': # There is really nothing to do for hashes. return elsevier_redactions = 0 if not all(self.has.values()) or self.fmt == 'json-js' \ or self.w_english: for key, entry in result.results.copy().items(): # Build english reps of each result (unless their just hashes) if self.w_english and result.result_type != 'hashes': stmt = None # Fix the agent order if self.strict: if result.result_type == 'statements': stmt = stmts_from_json([entry])[0] if type(stmt) == Complex: id_lookup = {v: int(k) for k, v in self.agent_dict.items()} stmt.members.sort( key=lambda ag: id_lookup.get(ag.name, 10) ) agent_set = {ag.name for ag in stmt.agent_list() if ag is not None} else: agent_set = set(entry['agents'].values()) if result.result_type == 'relations' \ and entry['type'] == 'Complex': entry['agents'] = self.agent_dict if agent_set < self.agent_set: result.results.pop(key, None) continue # Construct the english. if result.result_type == 'statements': if stmt is None: stmt = stmts_from_json([entry])[0] eng = _format_stmt_text(stmt) entry['evidence'] = _format_evidence_text(stmt) else: eng = _make_english_from_meta(entry['agents'], entry.get('type')) if not eng: logger.warning(f"English not formed for {key}:\n" f"{entry}") entry['english'] = eng # Filter out medscan if user does not have medscan privileges. if not self.has['medscan']: if result.result_type == 'statements': result.source_counts[key].pop('medscan', 0) else: result.evidence_counts[key] -= \ entry['source_counts'].pop('medscan', 0) entry['total_count'] = result.evidence_counts[key] if not entry['source_counts']: logger.warning("Censored content present.") # In most cases we can stop here if self.has['elsevier'] and self.fmt != 'json-js' \ and not self.w_english: continue if result.result_type == 'statements': # If there is evidence, loop through it if necessary. for ev_json in entry['evidence'][:]: if self.fmt == 'json-js': ev_json['source_hash'] = str(ev_json['source_hash']) # Check for elsevier and redact if necessary if not self.has['elsevier'] and \ get_source(ev_json) == 'elsevier': text = ev_json['text'] if len(text) > 200: ev_json['text'] = text[:200] + REDACT_MESSAGE elsevier_redactions += 1 elif result.result_type != 'hashes' and self.fmt == 'json-js': # Stringify lists of hashes. if 'hashes' in entry and entry['hashes'] is not None: entry['hashes'] = [str(h) for h in entry['hashes']] elif 'hash' in entry: entry['hash'] = str(entry['hash']) if result.result_type == 'statements': logger.info(f"Redacted {elsevier_redactions} pieces of elsevier " f"evidence.") logger.info(f"Process entries for {self.__class__.__name__} after " f"{sec_since(self.start_time)} seconds.") return