Beispiel #1
0
def test_assembler():
    stmt = make_stmt()
    ha = HtmlAssembler([stmt])
    result = ha.make_model()
    assert isinstance(result, str)
    # Read from the template file and make sure the beginning and end of the
    # content matches
    template, _, _ = loader.get_source(None, 'indra/template.html')
    assert result.startswith(template[0:100])
    # Make sure assembler works with other parameters provided
    stmt2 = make_bad_stmt()
    ha = HtmlAssembler(source_counts={
        stmt.get_hash(): {
            'test': 1
        },
        stmt2.get_hash(): {
            'test': 1
        }
    },
                       ev_counts={
                           stmt.get_hash(): 1,
                           stmt2.get_hash(): 1
                       },
                       db_rest_url='test.db.url')
    ha.add_statements([stmt, stmt2])
    result = ha.make_model(grouping_level='agent-pair')
    assert isinstance(result, str)
    result = ha.make_model(grouping_level='statement')
    assert isinstance(result, str)
    # Check simple=False
    result = ha.make_model(grouping_level='statement', simple=False)
    assert isinstance(result, str)
    # Test belief badges
    result = ha.make_model(grouping_level='statement', show_belief=True)
    assert isinstance(result, str)
    assert '<small\n' \
           '      class="badge badge-pill badge-belief"\n' \
           '      title="Belief score for this statement">1.0</small>' in result
    result = ha.make_model(grouping_level='statement', show_belief=False)
    assert isinstance(result, str)
    assert '<small\n' \
           '      class="badge badge-pill badge-belief"\n' \
           '      title="Belief score for this statement">1</small>' \
           not in result
    # Test if source URL exists
    assert 'http://www.causalbionet.com/' in result
    # Make sure warning can be appended
    ha.append_warning('warning')
    assert ('\t<span style="color:red;">(CAUTION: warning occurred when '
            'creating this page.)</span>' in ha.model)
    # Make sure model is created before saving
    ha = HtmlAssembler([stmt])
    assert not ha.model
    ha.save_model('tempfile.html')
    assert ha.model
def test_assembler():
    stmt = make_stmt()
    ha = HtmlAssembler([stmt])
    result = ha.make_model()
    assert isinstance(result, str)
    # Read from the template file and make sure the beginning and end of the
    # content matches
    template, _, _ = loader.get_source(None, 'indra/template.html')
    assert result.startswith(template[0:100])
    # Make sure assembler works with other parameters provided
    stmt2 = make_bad_stmt()
    ha = HtmlAssembler(source_counts={
        stmt.get_hash(): {
            'test': 1
        },
        stmt2.get_hash(): {
            'test': 1
        }
    },
                       ev_totals={
                           stmt.get_hash(): 1,
                           stmt2.get_hash(): 1
                       },
                       db_rest_url='test.db.url')
    ha.add_statements([stmt, stmt2])
    result = ha.make_model(with_grouping=True)
    assert isinstance(result, str)
    result = ha.make_model(with_grouping=False)
    assert isinstance(result, str)
    # Make sure warning can be appended
    ha.append_warning('warning')
    assert ('\t<span style="color:red;">(CAUTION: warning occurred when '
            'creating this page.)</span>' in ha.model)
    # Make sure model is created before saving
    ha = HtmlAssembler([stmt])
    assert not ha.model
    ha.save_model('tempfile.html')
    assert ha.model
Beispiel #3
0
    def decorator(*args, **kwargs):
        tracker = LogTracker()
        start_time = datetime.now()
        logger.info("Got query for %s at %s!" %
                    (get_db_query.__name__, start_time))

        web_query = request.args.copy()
        offs = _pop(web_query, 'offset', type_cast=int)
        ev_lim = _pop(web_query, 'ev_limit', type_cast=int)
        best_first = _pop(web_query, 'best_first', True, bool)
        max_stmts = min(_pop(web_query, 'max_stmts', MAX_STATEMENTS, int),
                        MAX_STATEMENTS)
        fmt = _pop(web_query, 'format', 'json')
        w_english = _pop(web_query, 'with_english', False, bool)
        w_cur_counts = _pop(web_query, 'with_cur_counts', False, bool)

        # Figure out authorization.
        has = dict.fromkeys(['elsevier', 'medscan'], False)
        if not TESTING:
            user, roles = resolve_auth(web_query)
            for role in roles:
                for resource in has.keys():
                    has[resource] |= role.permissions.get(resource, False)
            logger.info('Auths: %s' % str(has))
        else:
            web_query.pop('api_key', None)
            has['elsevier'] = False
            has['medscan'] = False

        # Actually run the function.
        logger.info("Running function %s after %s seconds." %
                    (get_db_query.__name__, sec_since(start_time)))
        db_query = get_db_query(web_query, *args, **kwargs)
        if isinstance(db_query, Response):
            return db_query
        elif not isinstance(db_query, QueryCore):
            raise RuntimeError("Result should be a child of QueryCore.")

        if ev_lim is None:
            if get_db_query is get_statement_by_hash:
                ev_lim = 10000
            else:
                ev_lim = 10

        if not has['medscan']:
            minus_q = ~HasOnlySource('medscan')
            db_query &= minus_q
            ev_filter = minus_q.ev_filter()
        else:
            ev_filter = None

        result = db_query.get_statements(offset=offs,
                                         limit=max_stmts,
                                         ev_limit=ev_lim,
                                         best_first=best_first,
                                         evidence_filter=ev_filter)

        logger.info("Finished function %s after %s seconds." %
                    (get_db_query.__name__, sec_since(start_time)))

        # Handle any necessary redactions
        res_json = result.json()
        stmts_json = res_json.pop('results')
        elsevier_redactions = 0
        source_counts = result.source_counts
        if not all(has.values()) or fmt == 'json-js' or w_english:
            for h, stmt_json in stmts_json.copy().items():
                if w_english:
                    stmt = stmts_from_json([stmt_json])[0]
                    stmt_json['english'] = _format_stmt_text(stmt)
                    stmt_json['evidence'] = _format_evidence_text(stmt)

                if has['elsevier'] and fmt != 'json-js' and not w_english:
                    continue

                if not has['medscan']:
                    source_counts[h].pop('medscan', 0)

                for ev_json in stmt_json['evidence'][:]:
                    if fmt == 'json-js':
                        ev_json['source_hash'] = str(ev_json['source_hash'])

                    # Check for elsevier and redact if necessary
                    if not has['elsevier'] and \
                            get_source(ev_json) == 'elsevier':
                        text = ev_json['text']
                        if len(text) > 200:
                            ev_json['text'] = text[:200] + REDACT_MESSAGE
                            elsevier_redactions += 1

        logger.info(f"Redacted {elsevier_redactions} pieces of elsevier "
                    f"evidence.")

        logger.info("Finished redacting evidence for %s after %s seconds." %
                    (get_db_query.__name__, sec_since(start_time)))

        # Get counts of the curations for the resulting statements.
        if w_cur_counts:
            curations = get_curations(pa_hash=set(stmts_json.keys()))
            logger.info("Found %d curations" % len(curations))
            cur_counts = {}
            for curation in curations:
                # Update the overall counts.
                if curation.pa_hash not in cur_counts:
                    cur_counts[curation.pa_hash] = 0
                cur_counts[curation.pa_hash] += 1

                # Work these counts into the evidence dict structure.
                for ev_json in stmts_json[curation.pa_hash]['evidence']:
                    if str(ev_json['source_hash']) == str(
                            curation.source_hash):
                        ev_json['num_curations'] = \
                            ev_json.get('num_curations', 0) + 1
                        break
            res_json['num_curations'] = cur_counts

        # Add derived values to the res_json.
        res_json['offset'] = offs
        res_json['evidence_limit'] = ev_lim
        res_json['statement_limit'] = MAX_STATEMENTS
        res_json['statements_returned'] = len(stmts_json)
        res_json['end_of_statements'] = (len(stmts_json) < MAX_STATEMENTS)
        res_json['statements_removed'] = 0
        res_json['evidence_returned'] = result.returned_evidence

        if fmt == 'html':
            title = TITLE + ': ' + 'Results'
            ev_totals = res_json.pop('evidence_totals')
            stmts = stmts_from_json(stmts_json.values())
            html_assembler = HtmlAssembler(stmts,
                                           res_json,
                                           ev_totals,
                                           source_counts,
                                           title=title,
                                           db_rest_url=request.url_root[:-1])
            idbr_template = env.get_template('idbr_statements_view.html')
            identity = user.identity() if user else None
            content = html_assembler.make_model(idbr_template,
                                                identity=identity)
            if tracker.get_messages():
                level_stats = [
                    '%d %ss' % (n, lvl.lower())
                    for lvl, n in tracker.get_level_stats().items()
                ]
                msg = ' '.join(level_stats)
                content = html_assembler.append_warning(msg)
            mimetype = 'text/html'
        else:  # Return JSON for all other values of the format argument
            res_json.update(tracker.get_level_stats())
            res_json['statements'] = stmts_json
            res_json['source_counts'] = source_counts
            content = json.dumps(res_json)
            mimetype = 'application/json'

        resp = Response(content, mimetype=mimetype)
        logger.info("Exiting with %d statements with %d/%d evidence of size "
                    "%f MB after %s seconds." %
                    (res_json['statements_returned'],
                     res_json['evidence_returned'], res_json['total_evidence'],
                     sys.getsizeof(resp.data) / 1e6, sec_since(start_time)))
        return resp
Beispiel #4
0
    def produce_response(self, result):
        if result.result_type == 'statements':
            res_json = result.json()

            # Add derived values to the res_json.
            if self.w_cur_counts:
                res_json['num_curations'] = self.get_curation_counts(result)
            res_json['statement_limit'] = MAX_STMTS
            res_json['statements_returned'] = len(result.results)
            res_json['end_of_statements'] = \
                (len(result.results) < MAX_STMTS)
            res_json['statements_removed'] = 0
            res_json['evidence_returned'] = result.returned_evidence

            # Build the HTML if HTML, else just tweak the JSON.
            stmts_json = result.results
            if self.fmt == 'html':
                title = TITLE
                ev_counts = res_json.pop('evidence_counts')
                beliefs = res_json.pop('belief_scores')
                stmts = stmts_from_json(stmts_json.values())
                db_rest_url = request.url_root[:-1] \
                    + self._env.globals['url_for']('root')[:-1]
                html_assembler = \
                    HtmlAssembler(stmts, summary_metadata=res_json,
                                  ev_counts=ev_counts, beliefs=beliefs,
                                  sort_by=self.sort_by, title=title,
                                  source_counts=result.source_counts,
                                  db_rest_url=db_rest_url)
                idbr_template = \
                    self._env.get_template('idbr_statements_view.html')
                if not TESTING['status']:
                    identity = self.user.identity() if self.user else None
                else:
                    identity = None
                source_info, source_colors = get_html_source_info()
                resp_content = html_assembler.make_model(
                    idbr_template, identity=identity, source_info=source_info,
                    source_colors=source_colors, simple=False
                )
                if self.tracker.get_messages():
                    level_stats = ['%d %ss' % (n, lvl.lower())
                                   for lvl, n
                                   in self.tracker.get_level_stats().items()]
                    msg = ' '.join(level_stats)
                    resp_content = html_assembler.append_warning(msg)
                mimetype = 'text/html'
            else:  # Return JSON for all other values of the format argument
                res_json.update(self.tracker.get_level_stats())
                res_json['statements'] = stmts_json
                resp_content = json.dumps(res_json)
                mimetype = 'application/json'

            resp = Response(resp_content, mimetype=mimetype)
            logger.info("Exiting with %d statements with %d/%d evidence of "
                        "size %f MB after %s seconds."
                        % (res_json['statements_returned'],
                           res_json['evidence_returned'],
                           res_json['total_evidence'],
                           sys.getsizeof(resp.data) / 1e6,
                           sec_since(self.start_time)))
        elif result.result_type != 'hashes':
            # Look up curations, if result with_curations was set.
            if self.w_cur_counts:
                rel_hash_lookup = defaultdict(list)
                if result.result_type == 'interactions':
                    for h, rel in result.results.items():
                        rel['cur_count'] = 0
                        rel_hash_lookup[int(h)].append(rel)
                else:
                    for rel in result.results.values():
                        for h in rel['hashes']:
                            rel['cur_count'] = 0
                            rel_hash_lookup[int(h)].append(rel)
                        if not self.special['with_hashes']:
                            rel['hashes'] = None
                curations = get_curations(pa_hash=set(rel_hash_lookup.keys()))
                for cur in curations:
                    for rel in rel_hash_lookup[cur['pa_hash']]:
                        rel['cur_count'] += 1

            logger.info("Returning with %s results after %.2f seconds."
                        % (len(result.results), sec_since(self.start_time)))

            res_json = result.json()
            res_json['relations'] = list(res_json['results'].values())
            if result.result_type == 'agents' and self.fmt == 'json-js':
                res_json['complexes_covered'] = \
                    [str(h) for h in res_json['complexes_covered']]
            res_json.pop('results')
            res_json['query_str'] = str(self.db_query)
            resp = Response(json.dumps(res_json), mimetype='application/json')

            logger.info("Result prepared after %.2f seconds."
                        % sec_since(self.start_time))
        else:
            return super(StatementApiCall, self).produce_response(result)
        return resp