def author_name(claimReview, defValue="unknown author"):
    name = dictu.get_in(claimReview, ['author', 'name'])
    if name is None:
        url = dictu.get_in(claimReview, ['author', 'url'])
        name = content.domain_from_url(url)
        if name.startswith('www.'):
            name = name.replace('www.', '')
        if name.endswith('.com'):
            name = name.replace('.com', '')
    return name or defValue
Exemple #2
0
def from_old_DomainCredibility(dom_cred, cfg):
    """Converts a `DomainCredibility` into a `WebSiteCredReview`

    :param dom_cred: a `DomainCredibility` dict
    :param cfg: configuration options
    :returns: a `WebSiteCredReview`
    :rtype: dict
    """
    domain_url = dom_cred.get('itemReviewed', 'missing_website')  # str
    itemReviewed = content.str_as_website(domain_url)  # reconstruct WebSite

    ratingVal = dictu.get_in(dom_cred, ['credibility', 'value'], 0.0)
    explanation = 'based on %d review(s) by external rater(s)%s' % (len(
        dom_cred['assessments']), example_raters_markdown(dom_cred))
    return {
        '@context':
        'http://coinform.eu',
        '@type':
        'WebSiteCredReview',
        'additionalType':
        content.super_types('WebSiteCredReview'),
        'itemReviewed':
        itemReviewed,
        'text':
        'Site `%s` seems *%s* %s' %
        (itemReviewed.get('name', '??'),
         credlabel.describe_credval(ratingVal, None), explanation),
        'author':
        misinfoMeSourceCredReviewer(),
        'reviewRating': {
            '@type':
            'AggregateRating',
            'reviewAspect':
            'credibility',
            'ratingValue':
            ratingVal,
            'confidence':
            dictu.get_in(dom_cred, ['credibility', 'confidence'], 0.5),
            'ratingExplanation':
            explanation,
            'reviewCount':
            len(dom_cred['assessments']),
            'ratingCount':
            len(dom_cred['assessments'])
        },
        'dateCreated':
        dom_cred.get('dateCreated', isodate.now_utc_timestamp()),
        'reviewAspect':
        'credibility',
        'isBasedOn': [],  # TODO:
        'isBasedOn_assessments':
        dom_cred['assessments'],
        'timings':
        dom_cred.get('timings', {})
    }
Exemple #3
0
def aggregate_sub_creds(sub_creds, scope_name, cfg):
    """Aggregates a list of credibility dicts into a single credibility
      dict. This is done by (i) filtering over a minimum confidence and 
    (ii) selecting the least credible sub credibility.

    *deprecated* you should be moving towards using
     `aggregate_subReviews` which uses the schema.org compliant
     Reviews and Ratings.

    :param sub_creds: a list of dicts. Should have field `credibility`
      with a `value` and `confidence`
    :param scope_name: string to denote the scope where the sub_creds
      were taken from, e.g. `document`
    :param cfg: config options. Currently for the
      `cred_conf_threshold`.
    :returns: the aggregate credibility dict
    :rtype: dict
    """
    # simplest case:
    if sub_creds is None or len(sub_creds) == 0:
        return {
            'credibility': 0.0,
            'confidence': 0.0,
            'credibility_label': 'not verifiable',
            'explanation': "No textual content found"
        }

    #  filter credibilities by confidence
    conf_threshold = float(cfg.get('cred_conf_threshold', 0.7))
    conf_subcreds = [
        sc for sc in sub_creds if
        dictu.get_in(sc, ['credibility', 'confidence'], 0.0) > conf_threshold
    ]
    # not enough confidence in sub creds
    if len(conf_subcreds) == 0:
        sub_str = '%d sentences in %s' % (len(sub_creds), scope_name)
        msg = 'Could not assess credibility of %s with %s' % (
            sub_str, 'sufficient confidence')
        return {
            'credibility': 0.0,
            'confidence': 0.0,
            'credibility_label': 'not verifiable',
            'explanation': msg
        }

    #  select minimum credibility value (with sufficient confidence)
    sc_by_val = [sc for sc in conf_subcreds]
    sc_by_val = sorted(sc_by_val, key=lambda sc: sc['credibility']['value'])
    minval_sc = sc_by_val[0]
    msc_cred = minval_sc['credibility']
    msg = 'Sentence in %s: %s' % (scope_name, msc_cred['explanation'])
    credval = msc_cred['value']
    return {
        'credibility': credval,
        'confidence': msc_cred['confidence'],
        'credibility_label': credlabel.describe_credval(credval,
                                                        cred_dict=None),
        'explanation': msg
    }
Exemple #4
0
def sa_resp_to_aw_facts(sem_analysis):
    result = {}
    for fact in sem_analysis.get('facts', []):
        taxonomy = fact['type']
        codepath = fact['factName']
        ent_type = dictu.get_in(fact, ['entity', 'type'])
        entity = dictu.get_in(fact, ['entity', 'value'])
        hl = fact['hl']

        aw_field = 'fact_%s_tax' % taxonomy.lower()
        append_field_val(result, aw_field,
                         '%s/%s/%s' % (codepath, norm_title(ent_type), entity))
        append_field_val(result, '%s_hl' % aw_field, codepath + hl)

        append_field_val(result, 'facts_domain_%s_tax' % taxonomy.lower(),
                         codepath)
    return result
Exemple #5
0
def predict_stances(qclaim_doc_bodies):
    url = stance_pred_url + '/predict_stance'
    req = qclaim_doc_bodies
    resp = requests.post(url, json=req, verify=False)
    logger.info("Response from %s %s" % (url, resp))
    jresp = resp.json()
    return jresp['labels'], jresp['confidences'], dictu.get_in(
        jresp, ['meta', 'model_info'])
Exemple #6
0
def websiteCredRev_as_qclaimCredRating(websiteCredRev, cfg):
    wscr = websiteCredRev
    result = {
        '@type': 'AggregateRating',
        'reviewAspect': 'credibility',
        'reviewCount': dictu.get_in(wscr, ['reviewRating', 'reviewCount'], 0),
        'ratingCount': dictu.get_in(wscr, ['reviewRating', 'ratingCount'], 0),
        'ratingValue': dictu.get_in(wscr, ['reviewRating', 'ratingValue'], 0.0),
        'dateCreated': isodate.now_utc_timestamp()
    }
    if is_by_factchecker(websiteCredRev, cfg):
        # reduce domain credibility for fact-checkers, as we want to
        #  focus on their claim reviews even if their confidence is
        #  relatively low.
        #  Refactoring of website_credrev.penalise_credibility
        penalty = float(cfg.get('factchecker_website_to_qclaim_confidence_penalty_factor', 0.5))
        return {
            **result,
            'confidence': dictu.get_in(wscr, ['reviewRating', 'confidence'], 0.0) * penalty,
            'ratingExplanation': "as it was published in site `%s`. %s %s" % (
                dictu.get_in(websiteCredRev, ['itemReviewed', 'name']),
                websiteCredRev.get('text', '(Explanation for website credibility missing)'),
                "However, the site is a factchecker so it publishes sentences with different credibility values.")
        }
    else:
        return {
            **result,
            'confidence': dictu.get_in(wscr, ['reviewRating', 'confidence'], 0.0),
            'ratingExplanation': "as it was published on site `%s`. %s" % (
                dictu.get_in(websiteCredRev, ['itemReviewed', 'name']),
                websiteCredRev.get('text', '(Explanation for website credibility missing)'))
        }
Exemple #7
0
def stancePredictor(cfg):
    if 'dev_mock_stancePredictor' in cfg:
        return cfg['dev_mock_stancePredictor']
    claim_search_url, auth, search_verify = read_claim_search_req_params(cfg)
    resp = requests.post(claim_search_url,
                         json={},
                         verify=search_verify,
                         auth=auth)
    resp.raise_for_status()
    return dictu.get_in(resp.json(), ['bots', 'stancePred'])
Exemple #8
0
def semSentenceEncoder(cfg):
    if 'dev_mock_semSentenceEncoder' in cfg:
        return cfg['dev_mock_semSentenceEncoder']
    claim_search_url, auth, search_verify = read_claim_search_req_params(cfg)
    resp = requests.post(claim_search_url,
                         json={},
                         verify=search_verify,
                         auth=auth)
    resp.raise_for_status()
    return dictu.get_in(resp.json(), ['bots', 'simReviewer', 'isBasedOn'])[0]
Exemple #9
0
def partition_factual_sentences(items, cfg):
    """Process the incoming `items` and split it between worthy and unworthy

    :param items: list of items
    :type items: list of dicts
    :return: a list of factual statements and a list of non-factual
    :rtype: lists
    """
    worth_item_revs = rev_item_worthiness(items, cfg)
    logger.info("Reviewed sentence worthiness")
    factual_items = [
        it for it in worth_item_revs if dictu.get_in(
            it, ['worthinessReview', 'reviewRating', 'ratingValue'], 'worthy')
        == 'worthy'
    ]
    nfs_items = [
        it for it in worth_item_revs if dictu.get_in(
            it, ['worthinessReview', 'reviewRating', 'ratingValue']) ==
        'unworthy'
    ]
    assert len(items) == len(factual_items) + len(nfs_items), '%s' % (
        'The total number of factual and non factual items '
        'must be the same as the initial number of items sent to the process')
    return factual_items, nfs_items
Exemple #10
0
def route_template(item_or_typename):
    """Returns the route template for the item

    :param item_or_typename: either an item (dict with `@type` field)
      or a typename (str)
    :returns: a "new style" python string template
    :rtype: str
    """
    if is_item(item_or_typename):
        return route_template(item_or_typename['@type'])
    typename = item_or_typename
    assert type(typename) is str, 'Not a type name: %s %s' % (type(typename),
                                                              typename)
    if typename in _acred_schema:
        return dictu.get_in(_acred_schema, [typename, 'route_template'])
    else:
        raise ValueError('Type name %s has not been registered' % typename)
def test_similarSent_as_SentStanceReview_05():
    review = sscr.similarSent_as_SentStanceReview(relSent05, mockSimResult, {})
    assert review is not None
    expectedFields = [
        '@context', '@type', 'additionalType', 'reviewAspect', 'itemReviewed',
        'reviewRating', 'dateCreated', 'author'
    ]

    isval, msg = dictu.is_value(review)
    assert isval, msg

    # with open('test/SentStanceReview/ssr05.json', 'w') as f:
    #     json.dump(review, f, indent=2)

    assert set(expectedFields) == set(list(review.keys()))
    assert 'stance' == review['reviewAspect']
    assert content.is_sentence_pair(review['itemReviewed'])
    assert dictu.get_in(review, ['author', '@type']) == 'SentStanceReviewer'
Exemple #12
0
def super_types(item_or_typename):
    """Returns a list of super type names for an item or typename

    :param item_or_typename: either an item (dict with `@type` field)
      or a typename (str)
    :returns: a list of type names
    :rtype: list
    """
    if is_item(item_or_typename):
        return super_types(item_or_typename['@type'])
    typename = item_or_typename
    assert type(typename) is str, 'Not a type name: %s %s' % (type(typename),
                                                              typename)
    if typename in _acred_schema:
        return dictu.get_in(_acred_schema, [typename, 'super_types'])
    else:
        logger.warning('Type name %s has not been registered' % typename)
        return []
Exemple #13
0
def ident_keys(item_or_typename):
    """Returns a list of ident keys for item or typename

    :param item_or_typename: either an item (dict with `@type` field)
      or a typename (str)
    :returns: a list of keys whose values uniquely identify the given
      item or typename
    :rtype: list
    """
    if is_item(item_or_typename):
        return ident_keys(item_or_typename['@type'])
    typename = item_or_typename
    assert type(typename) is str, 'Not a type name: %s %s' % (type(typename),
                                                              typename)
    if typename in _acred_schema:
        return dictu.get_in(_acred_schema, [typename, 'ident_keys'])
    else:
        raise ValueError('Type name %s has not been registered' % typename)
Exemple #14
0
def select_most_confident_review(reviews, cfg):
    if len(reviews) == 0:
        return None

    for rev in reviews:
        assert content.is_review(rev), rev

    sorted_revs = sorted(
        reviews,
        # Note: we can sort by multiple values by returning a tuple
        #  with confidence first it may be a good idea to do this to
        #  make this less random if ther are multiple maxima
        key=lambda rev: dictu.get_in(rev, ['reviewRating', 'confidence'], -1.0
                                     ),
        reverse=True)
    if len(sorted_revs) > 0:
        return sorted_revs[0]  # most confident
    else:
        return None
Exemple #15
0
def itemref_keys(item_or_typename):
    """Returns a list of itemRef keys for item or typename

    An itemRef key is a key whose value is another (single or a list
    of) item. Therefore, the values can be represented either as
    expanded items, but also as references to those items, typically a
    string with the identifier (but also possibly a url).

    :param item_or_typename: either an item (dict with `@type` field)
      or a typename (str)
    :returns: a list of keys for the type which refer to other items
    :rtype: list
    """
    if is_item(item_or_typename):
        return itemref_keys(item_or_typename['@type'])
    typename = item_or_typename
    assert type(typename) is str, 'Not a type name: %s %s' % (type(typename),
                                                              typename)
    if typename in _acred_schema:
        return dictu.get_in(_acred_schema, [typename, 'itemref_keys'])
    else:
        raise ValueError('Type name %s has not been registered' % typename)
Exemple #16
0
def sa_resp_to_aw_relations(sem_analysis):
    nrels = [normalise_relation(r) for r in sem_analysis.get('relations', [])]
    if len(nrels) == 0:
        return {}
    result = {'relations': json.dumps(nrels)}
    for rel in nrels:
        src_type = dictu.get_in(rel, ['source', 'type'])
        src_val = dictu.get_in(rel, ['source', 'value'])
        dest_type = dictu.get_in(rel, ['destination', 'type'])
        dest_val = dictu.get_in(rel, ['destination', 'value'])
        act_type = dictu.get_in(rel, ['action', 'classification'])
        act_val = dictu.get_in(rel, ['action', 'value'])

        append_field_val(result, 'relations_entities',
                         '%s/%s' % (src_type, src_val))
        append_field_val(result, 'relations_entities',
                         '%s/%s' % (dest_type, dest_val))
        append_field_val(result, 'relations_actions',
                         '%s/%s' % (act_type, act_val))
    return result
Exemple #17
0
def backward_compatible_tweetcred_predictions(preds):
    """Ensure that each prediction contains fields needed for backward compatibility

    These are fields which are used by the co-inform rule-engine:
    `tweet_id`, `credibility`, `confidence` and `explanation`. These
    should already be there if the requested acred reviewFormat was
    `cred_assessment`, but should be missing if the it was
    `schema.org` (the new, recommended output).

    :param preds: a list of (or an individual) prediction dicts
    :returns: the same list of predictions but with any missing fields
    for backward compatibility
    :rtype: list or dict
    """
    if type(preds) is list:
        return [
            backward_compatible_tweetcred_predictions(pred) for pred in preds
        ]
    assert type(preds) is dict
    pred = preds  # single
    if 'tweet_id' not in pred:
        # assume schema.org format
        pred['tweet_id'] = dictu.get_in(pred, ['itemReviewed', 'tweet_id'])
    if 'credibility' not in pred:
        pred['credibility'] = dictu.get_in(pred,
                                           ['reviewRating', 'ratingValue'])
    if 'confidence' not in pred:
        pred['confidence'] = dictu.get_in(pred, ['reviewRating', 'confidence'],
                                          0.0)
    if 'explanation' not in pred:
        pred['explanation'] = dictu.get_in(
            pred, ['reviewRating', 'ratingExplanation'])
    if 'ratingExplanation' not in pred:
        pred['ratingExplanation'] = dictu.get_in(
            pred, ['text'],
            dictu.get_in(pred, ['reviewRating', 'ratingExplanation']))
    if 'ratingExplanationFormat' not in pred:
        pred['ratingExplanationFormat'] = 'markdown'
    return pred
def aggregate_subReviews(simple_sentSimReview, stanceReview, cfg):
    """Aggregates a similarity and stance review into a polar similarity review

    :param simple_sentSimReview: a (non-polar) `SentSimilarityReview`
      for a `sentPair`
    :param stanceReview: a `SentStanceReview` for the same `sentPair`
      as `simple_sentSimReview`
    :param cfg: configuration options
    :returns: a `SentPolarSimilarityReview`
    :rtype: dict
    """
    assert simple_sentSimReview is not None
    if stanceReview is None:
        return simple_sentSimReview
    sim = dictu.get_in(simple_sentSimReview, ['reviewRating', 'ratingValue'])
    sent_stance = dictu.get_in(stanceReview, ['reviewRating', 'ratingValue'],
                               'unrelated')
    stance_conf = dictu.get_in(stanceReview, ['reviewRating', 'confidence'],
                               '0.5')

    sent_pair = simple_sentSimReview['itemReviewed']
    assert stanceReview['itemReviewed'] == sent_pair, '%s != %s' % (
        stanceReview['itemReviewed'], sent_pair)

    agg_sim = calc_agg_polarsim(sim=sim,
                                sent_stance=sent_stance,
                                sent_stance_conf=stance_conf,
                                cfg=cfg)
    sub_reviews = [
        sr for sr in [simple_sentSimReview, stanceReview] if sr is not None
    ]
    sub_ratings = [
        srev.get('reviewRating') for srev in sub_reviews
        if srev.get('reviewRating') is not None
    ]
    headline = simlabel.claim_rel_str(sim, sent_stance)
    # TODO: more than an explanation this is the review body
    #  the explanation would be that one model said the sentences were x similar
    #  while another said they were (stance)
    explanation = 'Sentence `%s` %s `%s`' % (dictu.get_in(
        sent_pair, ['sentA', 'text'
                    ]), headline, dictu.get_in(sent_pair, ['sentB', 'text']))
    sub_bots = [
        simple_sentSimReview.get('author', {}),
        stanceReview.get('author', {})
    ]
    return {
        '@context': 'http://coinform.eu',
        '@type': 'SentPolarSimilarityReview',
        'additionalType': content.super_types('SentPolarSimilarityReview'),
        'itemReviewed': sent_pair,
        'headline': headline,
        'reviewAspect': 'polarSimilarity',
        'reviewBody': explanation,
        'reviewRating': {
            '@type': 'AggregateRating',
            'reviewAspect': 'polarSimilarity',
            'ratingValue': agg_sim,
            'confidence': stance_conf,
            'reviewCount': len(sub_reviews),
            'ratingCount': agg.total_ratingCount(sub_ratings),
            'ratingExplanation': explanation
        },
        'isBasedOn': sub_reviews,
        'dateCreated': isodate.now_utc_timestamp(),
        'author': bot_info(sub_bots, cfg)
    }
Exemple #19
0
def aggregate_subReviews(db_Sentence, claimReview, webSiteCred, cfg):
    """Aggregates (claim and WebSite) reviews about a DB Sentence into a
       credibility review

    :param db_Sentence: a `Sentence` in the Co-inform database

    :param claimReview: a `ClaimReview` for the db_Sentence. May be
      None if no claim review is available for the sentence. In
      general, the claim review will not have been normalised
      (i.e. mapped onto the co-inform accuracy/credibility scales)

    :param webSiteCred: a `WebSiteCredReview` for a webSite where the
    `db_Sentence` was published.

    :param cfg: configuration options
    :returns: a `DBSentCredReview`
    :rtype: dict
    """
    nClaimReview = crn.normalise(claimReview, cfg)
    if nClaimReview is None:
        nClaimReview = {}

    nWebSiteRating = None
    if webSiteCred:
        nWebSiteRating = websiteCredRev_as_qclaimCredRating(webSiteCred, cfg)
        assert type(nWebSiteRating['confidence']) == float
        
    assert type(dictu.get_in(nClaimReview, ['reviewRating', 'confidence'], 0.0)) == float
    subRatings = [nWebSiteRating, nClaimReview.get('reviewRating', None)]
    subRatings = [r for r in subRatings if r is not None]
    sel_rating = agg.select_most_confident_rating(subRatings) or {
        'ratingValue': 0.0,
        'confidence': 0.0,
        'ratingExplanation': 'No website or claimReview associated with this sentence'
    }

    isBasedOn = [webSiteCred, nClaimReview]
    isBasedOn = [ibo for ibo in isBasedOn
                 if ibo is not None and ibo != {}]

    reviewCount = agg.total_reviewCount(subRatings) + len(isBasedOn)
    ratingCount = agg.total_ratingCount(subRatings)

    # should be a superset of [ibo.get('author') for ibo in isBasedOn]
    sub_bots = default_sub_bots(cfg)
    appears_in_docs = db_Sentence.get('appearance', [])
    appears_in_doc = appears_in_docs[0] if appears_in_docs else None
    link_to_doc = md_link_to_doc(appears_in_doc)
    revRating = {
        '@type': 'AggregateRating',
        'reviewAspect': 'credibility',
        'reviewCount': reviewCount,
        'ratingCount': ratingCount,
        'ratingValue': sel_rating.get('ratingValue', 0.0),
        'confidence': sel_rating.get('confidence', 0.0),
        'ratingExplanation': sel_rating.get('ratingExplanation')
    }
    return {
        '@context': "http://coinform.eu",
        '@type': "DBSentCredReview",
        'additionalType': content.super_types('DBSentCredReview'),
        'itemReviewed': db_Sentence,
        'text': 'Sentence `%s` %sseems *%s* %s' % (
            db_Sentence.get('text', '??'),
            ', in %s, ' % (link_to_doc) if link_to_doc else '',
            credlabel.rating_label(revRating, cfg),
            sel_rating.get('ratingExplanation')
        ),
        'reviewRating': revRating, 
        'reviewAspect': 'credibility',
        'isBasedOn': isBasedOn,
        'dateCreated': isodate.now_utc_timestamp(),
        'author': bot_info(sub_bots, cfg)
    }
Exemple #20
0
def aggregate_sentReviews(sentReviews, adoc, cfg):
    """Combines CredReviews for sentences in adoc into an ArticleCredReview

    Refactoring of `aggregate_sub_creds`

    :param sentReviews: list of sentence CredibilityReviews. In
      practice, we expect a list of `AggQSentCredReview`s.
    :param adoc: an analysed document. The item to be reviewed.
    :param cfg: config options. Currently for the `cred_conf_threshold`
    :returns: an `ArticleCredReview` aggregating the credibility
      reviews of sentences in the article.
    :rtype: dict
    """
    doc_mdref = markdown_ref_for_article(adoc, cfg)
    sub_bots = [
    ]  # extract sub_bot from sentReviews and make sure they match default sub_bots?
    author = default_bot_info(cfg)
    partial_ArticleCredRev = {
        **base_ArticleCredReview(cfg), 'author': author,
        'itemReviewed': adoc,
        'isBasedOn': sentReviews
    }
    # simplest case
    if sentReviews is None or len(sentReviews) == 0:
        explanation = 'we could not find any relevant claims in it.'
        return {
            **partial_ArticleCredRev, 'text':
            '%s is *not verifiable* as %s' % (doc_mdref, explanation),
            'reviewRating': {
                '@type': 'Rating',
                'reviewAspect': 'credibility',
                'ratingValue': 0.0,
                'confidence': 0.0,
                'ratingExplanation': explanation
            }
        }

    subRatings = [
        sr.get('reviewRating') for sr in sentReviews
        if sr.get('reviewRating') is not None
    ]
    for sr in subRatings:  # really, just validating
        assert 'ratingValue' in sr, '%s' % (sr)
        assert sr['ratingValue'] is not None, '%s' % (sr)
        assert 'confidence' in sr, '%s' % (sr)
        assert sr['confidence'] is not None, '%s' % (sr)

    # filter by confidence
    conf_threshold = float(cfg.get('cred_conf_threshold', 0.7))
    filter_fn = agg.filter_review_by_min_confidence(conf_threshold)
    conf_subRevs = [sr for sr in sentReviews if filter_fn(sr)]
    igno_subRevs = [sr for sr in sentReviews if not filter_fn(sr)]

    # not enough confidence in sentReviews
    if len(conf_subRevs) == 0:
        msg = 'we could not assess credibility of %d of its sentences with %s.%s' % (
            len(sentReviews), 'sufficient confidence', ' An example: %s ' %
            igno_subRevs[0]['text'] if len(igno_subRevs) > 0 else '')
        return {
            **partial_ArticleCredRev, 'text':
            '%s is *not verifiable* as %s.' % (doc_mdref, msg),
            'reviewRating': {
                '@type': 'AggregateRating',
                'reviewAspect': 'credibility',
                'ratingValue': 0.0,
                'confidence': 0.0,
                'ratingExplanation': msg,
                'ratingCount': agg.total_ratingCount(subRatings),
                'reviewCount':
                agg.total_reviewCount(subRatings) + len(sentReviews)
            }
        }

    # select least credible above the confidence threshold
    subRevs_by_val = sorted([sr for sr in conf_subRevs],
                            key=lambda rev: dictu.get_in(
                                rev, ['reviewRating', 'ratingValue'], 0.0))
    least_cred_rev = subRevs_by_val[0]
    msg = 'like its least credible Sentence `%s` which %s' % (
        dictu.get_in(least_cred_rev, ['itemReviewed', 'text'],
                     '(missing sentence)'),
        dictu.get_in(least_cred_rev, ['reviewRating', 'ratingExplanation'],
                     '(missing explanation)'))
    revRating = {
        '@type':
        'AggregateRating',
        'reviewAspect':
        'credibility',
        'ratingValue':
        dictu.get_in(least_cred_rev, ['reviewRating', 'ratingValue'], 0.0),
        'confidence':
        dictu.get_in(least_cred_rev, ['reviewRating', 'confidence'], 0.0),
        'ratingExplanation':
        msg,
        'ratingCount':
        agg.total_ratingCount(subRatings),
        'reviewCount':
        agg.total_reviewCount(subRatings) + len(sentReviews)
    }
    return {
        **partial_ArticleCredRev, 'isBasedOn':
        subRevs_by_val + igno_subRevs,
        'text':
        '%s is *%s* %s' %
        (doc_mdref, credlabel.rating_label(revRating, cfg), msg),
        'reviewRating':
        revRating
    }
Exemple #21
0
def aggregate_subReviews(domcredReview, content_credReview, adoc, cfg):
    """Combines the domain and content credibility reviews for adoc into
      an AggregateRating.

    Refactoring of `aggregate_article_cred`

    :param domcredReview: a `WebsiteCredReview` for the domain/url of adoc
    :param content_credReview: a ``
    :param adoc: the article being rated, useful for generating explanations
    :param cfg: config options
    :returns: an `AggregateRating`
    :rtype: dict
    """
    doc_mdref = markdown_ref_for_article(adoc, cfg)
    thresh = cfg.get('cred_conf_threshold', 0.7)
    content_conf = dictu.get_in(content_credReview,
                                ['reviewRating', 'confidence'], 0.0)
    domcred_conf = dictu.get_in(domcredReview, ['reviewRating', 'confidence'],
                                0.0)
    if content_conf >= thresh:
        credval = dictu.get_in(content_credReview,
                               ['reviewRating', 'ratingValue'], 0.0)
        cred_conf = content_conf
        explanation = dictu.get_in(content_credReview,
                                   ['reviewRating', 'ratingExplanation'], '')
        if domcred_conf >= thresh:
            explanation += '\nTake into account that it appeared in website `%s`. %s' % (
                dictu.get_in(
                    domcredReview, ['itemReviewed', 'name'],
                    dictu.get_in(domcredReview, ['itemReviewed', 'url'],
                                 '(missing)')),
                domcredReview.get(
                    'text', '(Explanation for site credibility missing)'))
    elif domcred_conf >= thresh:
        credval = dictu.get_in(domcredReview, ['reviewRating', 'ratingValue'],
                               0.0)
        penalty_factor = float(cfg.get('article_from_website_conf_factor',
                                       0.9))
        webcred_thresh = float(
            cfg.get('article_from_website_cred_threshold_penalise', 0.2))
        # penalise confidence if above a threshold
        #  credible website can still publish false claims
        #  but all claims in non-credible website should be questioned
        cred_conf = domcred_conf * penalty_factor if credval >= webcred_thresh else domcred_conf
        explanation = "as it appeared in website `%s`. %s" % (
            dictu.get_in(
                domcredReview, ['itemReviewed', 'name'],
                dictu.get_in(domcredReview, ['itemReviewed', 'url'],
                             '(missing)')),
            domcredReview.get('text',
                              '(Explanation for site credibility missing)'))
    else:
        credval = 0.0
        cred_conf = 0.0
        explanation = 'we have insufficient credibility signals from text and website analyses.'
        contentExpl = dictu.get_in(content_credReview, ['text'])
        websiteExpl = dictu.get_in(domcredReview, ['text'])
        if contentExpl or websiteExpl:
            explanation += 'In case it is useful, we include the **weak** credibility signals we found:%s%s' % (
                '\n * %s' % contentExpl if contentExpl else '',
                '\n * %s' % websiteExpl if websiteExpl else '')
    subRatings = [
        r['reviewRating'] for r in [domcredReview, content_credReview]
    ]
    return {
        '@type': 'AggregateRating',
        'reviewAspect': 'credibility',
        'ratingValue': credval,
        'confidence': cred_conf,
        'ratingExplanation': explanation,
        'ratingCount': agg.total_ratingCount(subRatings),
        'reviewCount': agg.total_reviewCount(subRatings) + 2
    }
Exemple #22
0
    for f, df in f2df.items():
        clef_pred = []
        handled_ids = []
        claims = df.to_dict(orient='records')
        for ci, claim in enumerate(claims):
            logger.info('Claim %d of %d in %s' % (ci, len(claims), f))
            cid = int(claim['claim_number'])
            if cid in handled_ids:
                logger.info('Skipping as previously handled')
                continue
            url = '%s/api/v1/claim/predict/credibility?claim=%s' % (
                acredapi_url, claim['normalized_claim'])
            resp = requests.get(url, verify=False)
            resp.raise_for_status()
            claimcreds = resp.json()
            credRating = dictu.get_in(claimcreds[0], cred_path)
            clef_pred.append({
                'id':
                cid,
                'label':
                acred_as_clef_label(credRating, cred_thresh)
            })
            handled_ids.append(cid)

            out_dir = '%s/reviews' % (args.outFolder)
            if not os.path.exists(out_dir):
                print('Creating dir %s for the reviews' % (out_dir))
                os.makedirs(out_dir)

            # write CredibilityReview to outFolder
            fname = f.replace('.txt', '_%s.json' % cid)
Exemple #23
0
def aggregate_subReviews(subReviews, tweet, cfg):
    """Creates an aggregate review based on subReviews for tweet

    Refactoring of `aggregate_tweet_cred`

    :param subReviews: list of credibility reviews for (parts of) the
      tweet to review.
    :param cfg: config options
    :returns: a credibility review for the `tweet` to review that
      contains an `AggregateRating` based on the `subReviews`
    :rtype: dict
    """
    # extract sub_bots and compare to default_sub_bots
    partial_TweetCredReview = {
        '@context': ci_context,
        '@type': 'TweetCredReview',
        'itemReviewed': tweet,
        'isBasedOn': subReviews,
        'dateCreated': isodate.now_utc_timestamp(),
        'author': default_bot_info(cfg)
    }
    tweet_mdref = markdown_ref_for_tweet(tweet, cfg)
    if subReviews is None:
        subReviews = []

    subRatings = [
        sr.get('reviewRating') for sr in subReviews
        if sr.get('reviewRating') is not None
    ]

    # filter by min confidence
    conf_threshold = float(cfg.get('cred_conf_threshold', 0.7))
    filter_fn = agg.filter_review_by_min_confidence(conf_threshold)
    conf_subRevs = [sr for sr in subReviews if filter_fn(sr)]
    igno_subRevs = [sr for sr in subReviews if not filter_fn(sr)]

    # no (confident) subReviews
    if len(conf_subRevs) == 0:
        part_rating = {
            '@type': 'Rating',
            'ratingValue': 0.0,
            'confidence': 0.0,
            'reviewAspect': 'credibility'
        }
        if len(subReviews) == 0:
            msg = "we could not extract (or assess credibility of) its sentences or linked documents" % (
                tweet_mdref)
            rating = {**part_rating, 'ratingExplanation': msg}
        else:
            msg = 'we could not assess the credibility of its %d sentences or linked documents.%s' % (
                len(subReviews), '\nFor example:\n * %s' %
                (igno_subRevs[0]['text']))
            rating = {
                **part_rating, '@type': 'AggregateRating',
                'ratingExplanation': msg,
                'ratingCount': agg.total_ratingCount(subRatings),
                'reviewCount':
                agg.total_reviewCount(subRatings) + len(subReviews)
            }
        return {
            **partial_TweetCredReview, 'text':
            '%s seems *%s* as %s' %
            (tweet_mdref, credlabel.rating_label(rating, cfg), msg),
            'reviewRating':
            rating
        }

    # select least credible subReview
    subRevs_by_val = sorted([sr for sr in conf_subRevs],
                            key=lambda rev: dictu.get_in(
                                rev, ['reviewRating', 'ratingValue'], 0.0))
    least_cred_rev = subRevs_by_val[0]
    msg = 'based on its least credible part:\n%s' % (dictu.get_in(
        least_cred_rev, ['text'], '(missing explanation for part)'))
    revRating = {
        '@type':
        'AggregateRating',
        'reviewAspect':
        'credibility',
        'ratingValue':
        dictu.get_in(least_cred_rev, ['reviewRating', 'ratingValue'], 0.0),
        'confidence':
        dictu.get_in(least_cred_rev, ['reviewRating', 'confidence'], 0.0),
        'ratingExplanation':
        msg,
        'ratingCount':
        agg.total_ratingCount(subRatings),
        'reviewCount':
        agg.total_reviewCount(subRatings) + len(subReviews)
    }
    return {
        **partial_TweetCredReview,
        'isBasedOn':
        subRevs_by_val + igno_subRevs,  # just a re-ordering
        'text':
        '%s seems *%s* %s' %
        (tweet_mdref, credlabel.rating_label(revRating, cfg), msg),
        'reviewRating':
        revRating
    }
Exemple #24
0
def claimsim_result_as_aggQSentCredReview(claimsim_result, worth_rev, cfg):
    """Convert a `SemanticClaimSimilarityResult` into a `AggQSentCredReview`

    This refactors `claimsim_result_as_claimcred`.

    :param claimsim_result: list of SimSent reviews
    :param worth_rev: dict with check worthiness review
    :param cfg: config options
    :returns: a `AggQSentCredReview`
    :rtype: dict
    """
    qsent = claimsim_result['q_claim']  # qsent
    relsents = claimsim_result['results']  # simsents

    itemReviewed = content.as_sentence(qsent, cfg=cfg)
    if len(relsents) == 0:
        rating = default_rating()
        aggqsent = {
            **base_AggQSentCredReview(cfg), 'itemReviewed':
            itemReviewed,
            'text':
            'Sentence `%s` seems *not verifiable* as it %s' %
            (itemReviewed['text'], rating['ratingExplanation']),
            'reviewRating': {
                **rating, 'identifier': itnorm.calc_identifier(rating, cfg)
            },
            'isBasedOn': [worth_rev] if worth_rev else []
        }
        result = {
            **aggqsent,
            # 'identifier': itnorm.calc_identifier(aggqsent, cfg),
        }
        return result

    qsent_credrevs = [
        qsent_credrev.similarSent_as_QSentCredReview(simSent, claimsim_result,
                                                     cfg)
        for simSent in relsents
    ]
    # TODO: remove subReviews if based on websiteCredRev for a factchecker (but not a claimReview)
    for qscr in qsent_credrevs:
        assert qscr['itemReviewed'] == itemReviewed
        assert dictu.get_in(qscr,
                            ['reviewRating', 'reviewAspect']) == 'credibility'

    subRatings = [
        rev.get('reviewRating')
        for rev in qsent_credrevs if rev.get('reviewRating') is not None
    ] + ([worth_rev.get('reviewRating')]
         if worth_rev and worth_rev.get('reviewRating') is not None else [])

    top_qscr = agg.select_most_confident_review(qsent_credrevs, cfg)
    top_rating = top_qscr.get('reviewRating', {})
    reviewRating = {
        '@type':
        'AggregateRating',
        'reviewAspect':
        'credibility',
        'ratingValue':
        top_rating.get('ratingValue', 0.0),
        'confidence':
        top_rating.get('confidence', 0.0),
        'ratingExplanation':
        top_rating.get('ratingExplanation', None),
        'ratingCount':
        agg.total_ratingCount(subRatings),
        'reviewCount':
        agg.total_reviewCount(subRatings) + len(qsent_credrevs) +
        len([worth_rev] if worth_rev else [])
    }

    result = {
        **base_AggQSentCredReview(cfg), 'itemReviewed':
        itemReviewed,
        'text':
        'Sentence `%s` seems *%s* as it %s' % (itemReviewed.get(
            'text', '??'), credlabel.rating_label(
                reviewRating, cfg), reviewRating['ratingExplanation']),
        'reviewRating': {
            **reviewRating, 'identifier':
            itnorm.calc_identifier(reviewRating, cfg)
        },
        'isBasedOn':
        qsent_credrevs + ([worth_rev] if worth_rev else [])
    }
    return result
def author_url(claimReview, defValue="unknownUrl"):
    return dictu.get_in(claimReview, ['author', 'url'], defValue)
Exemple #26
0
 def filter_fn(review):
     return dictu.get_in(review, ['reviewRating', 'confidence'],
                         0.0) >= threshold