コード例 #1
0
ファイル: predictor.py プロジェクト: expertailab/acred
def assess_doc_cred(doc, cfg):
    """Main credibility assessment for a single doc

    :param doc: a validated and normalised document, ready for credibility
      assessment
    :param cfg: any configs we need to execute/customise the assessment
    :returns: a credibility assessment for the doc
    :rtype: dict
    """
    start = citimings.start()
    if content.is_tweet_doc(doc):
        result = tweet_credrev.review(doc, cfg)
        return result
    elif content.is_article_doc(doc):
        result = article_credrev.review(doc, cfg)
        return result
    else:
        rev_format = cfg.get('acred_review_format', 'schema.org')
        msg = 'Unsupported document (not a %s))' % supported_doc_types
        if rev_format == 'cred_assessment':
            return {
                '@context': ci_context,
                '@type': 'DocumentCredibilityAssessment',
                'doc_url': doc['url'],
                'item_assessed': doc,
                'cred_assessment_error': msg,
                'date_assessed': isodate.now_utc_timestamp(),
                'timings': citimings.timing('assess_doc_cred', start),
                'credibility': 0,
                'confidence': 0,
                'explanation': msg}
        else:
            rating = {
                '@type': 'Rating',
                'ratingValue': 0.0,
                'confidence': 0.0,
                'ratingExplanation': msg}
            result = {
                '@context': ci_context,
                '@type': 'DocumentCredReview',
                'reviewAspect': 'credibility',
                'itemReviewed': doc,
                'dateCreated': isodate.now_utc_timestamp(),
                'author': bot_info([], cfg),
                'reviewRating': {
                    **rating,
                    'identifier': itnorm.calc_identifier(rating, cfg)}
            }
            return {
                **result,
                'identifier': itnorm.calc_identifier(result, cfg)
            }
コード例 #2
0
ファイル: semsent_simrev.py プロジェクト: expertailab/acred
def similarSent_as_SentSimilarityReview(simSent, simResult, cfg):
    qSent = simResult['q_claim']
    simReviewer = simResult['simReviewer']
    simVal = simSent['similarity']
    return {
        '@context':
        'http://coinform.eu',
        '@type':
        'SentSimilarityReview',
        'itemReviewed':
        content.as_dbq_sentpair(dbSent=simSent['sentence'],
                                qSent=qSent,
                                cfg=cfg),
        'headline':
        simlabel.claim_rel_str(simVal, None),
        'reviewRating': {
            '@type': 'Rating',
            'reviewAspect': 'similarity',
            'ratingValue': simVal
        },
        'dateCreated':
        simResult.get('dateCreated', isodate.now_utc_timestamp()),
        'author':
        simReviewer
    }
コード例 #3
0
ファイル: website_credrev.py プロジェクト: expertailab/acred
def calc_domain_credibility(domain, cfg={}):
    """Calculates a `DomainCredibility` for a domain via MisinfoMe

    Note that `DomainCredibility` is deprecated, use the `review` method 
    which produces a `WebSiteCredReview` instead.

    :param domain: str e.g. `www.snopes.com`
    :returns: a `DomainCredibility`
    :rtype: dict
    """
    if domain is None:
        return default_domain_crediblity(
            domain, "Default credibility for unknown domain")
    else:
        assert type(domain) == str, 'Expecting str, but was %s' (type(domain))
        start = citimings.start()
        try:
            return {
                **misinfome_source_credibility(domain), '@context':
                'DomainCredibility',
                '@type': 'DomainCredibility',
                'dateCreated': isodate.now_utc_timestamp(),
                'timings': citimings.timing('misinfome_source_credibility',
                                            start)
            }
        except Exception as e:
            logger.error("Failed misinfome source credibility. " + str(e))
            return default_domain_crediblity(
                domain, "Unable to retrieve credibility assessment")
コード例 #4
0
def review_article(adoc, cfg):
    """Main credibility review for a single article

    Refactoring of `assess_article_cred`

    :param adoc: analyzed doc as returned by `analyzed_doc`
    :param cfg: config to guide this assessment
    :returns: a `ArticleCredReview`
    :rtype: dict
    """
    # TODO: ? start = citimings.start()
    domcredReview = adoc_to_website_credReview(adoc, cfg)
    content_credReview = review_doc_content_cred(adoc, cfg)

    # TODO: ? extract sub_bots from website_cr and aggqsent_cr and make sure it matches default_sub_bots?
    agg_rating = aggregate_subReviews(domcredReview, content_credReview, adoc,
                                      cfg)

    return {
        **base_ArticleCredReview(cfg),
        'author':
        default_bot_info(cfg),
        'dateCreated':
        isodate.now_utc_timestamp(),
        'itemReviewed':
        adoc,  # maybe just return ref?
        'text':
        '%s seems *%s* %s' %
        (markdown_ref_for_article(
            adoc, cfg), credlabel.rating_label(agg_rating, cfg),
         agg_rating.get('ratingExplanation', '(missing explanation)')),
        'reviewRating':
        agg_rating,
        'isBasedOn': [domcredReview, content_credReview]
    }
コード例 #5
0
ファイル: dbsent_credrev.py プロジェクト: expertailab/acred
def websiteCredRev_as_qclaimCredRating(websiteCredRev, cfg):
    wscr = websiteCredRev
    result = {
        '@type': 'AggregateRating',
        'reviewAspect': 'credibility',
        'reviewCount': dictu.get_in(wscr, ['reviewRating', 'reviewCount'], 0),
        'ratingCount': dictu.get_in(wscr, ['reviewRating', 'ratingCount'], 0),
        'ratingValue': dictu.get_in(wscr, ['reviewRating', 'ratingValue'], 0.0),
        'dateCreated': isodate.now_utc_timestamp()
    }
    if is_by_factchecker(websiteCredRev, cfg):
        # reduce domain credibility for fact-checkers, as we want to
        #  focus on their claim reviews even if their confidence is
        #  relatively low.
        #  Refactoring of website_credrev.penalise_credibility
        penalty = float(cfg.get('factchecker_website_to_qclaim_confidence_penalty_factor', 0.5))
        return {
            **result,
            'confidence': dictu.get_in(wscr, ['reviewRating', 'confidence'], 0.0) * penalty,
            'ratingExplanation': "as it was published in site `%s`. %s %s" % (
                dictu.get_in(websiteCredRev, ['itemReviewed', 'name']),
                websiteCredRev.get('text', '(Explanation for website credibility missing)'),
                "However, the site is a factchecker so it publishes sentences with different credibility values.")
        }
    else:
        return {
            **result,
            'confidence': dictu.get_in(wscr, ['reviewRating', 'confidence'], 0.0),
            'ratingExplanation': "as it was published on site `%s`. %s" % (
                dictu.get_in(websiteCredRev, ['itemReviewed', 'name']),
                websiteCredRev.get('text', '(Explanation for website credibility missing)'))
        }
コード例 #6
0
def base_ArticleCredReview(cfg):
    return {
        '@context': content.ci_context,
        '@type': 'ArticleCredReview',
        'additionalType': content.super_types('ArticleCredReview'),
        'dateCreated': isodate.now_utc_timestamp(),
    }
コード例 #7
0
def search_claim(q_claim):
    """finds similar claims or sentences in a claim database

    Finding similar claims or sentences in the co-inform claim database # noqa: E501

    :param q_claim: This should be an English sentence or claim. Multiple sentences are not allowed.
    :type q_claim: str
    :rtype: dict
    """
    if type(q_claim) is str:
        q_claims = [q_claim]
    if type(q_claim) is list:
        q_claims = q_claim
    if q_claim is None:
        raise InvalidUsage("Claim is mandatory")
    start = citimings.start()
    logger.info('Searching semantic vector space for %s claim(s)' % len(
        q_claims))
    topn = 5
    preds, claim_ids, simReviewer = search_semantic_vecspace(q_claims, topn=topn)
    search_semspace_t = citimings.timing('search_semantic_vecspace', start)

    assert len(preds) == len(claim_ids)
    assert len(q_claims) == len(preds)
    q_resp, claim_retrieve_t = retrieve_result_claims(claim_ids, q_claims, topn)

    start3 = citimings.start()
    results, sub_build_ts = [], []
    for i in range(len(q_claims)):
        start4 = citimings.start()
        claim_id2pred = {idx: float(pred) for idx, pred in zip(
            claim_ids[i], preds[i])}
        relsents, sub_ts = q_resp_to_related_sent(
            q_resp, claim_id2pred)
        qclaim = q_claims[i]
        results.append({
            '@context': ci_context,
            '@type': 'SemanticClaimSimilarityResult',
            'dateCreated': isodate.now_utc_timestamp(),
            'q_claim': qclaim,
            'simReviewer': simReviewer,
            'results': relsents})
        sub_build_ts.append(citimings.timing('build_result', start4, sub_ts))
    result_build_t = citimings.timing('build_results', start3, sub_build_ts)

    results, stance_pred_t = add_stance_detection(
        results, sim_threshold=stance_min_sim_threshold)

    timing = citimings.timing(
        'search_claim', start,
        [search_semspace_t, claim_retrieve_t,
         result_build_t, stance_pred_t])
    return {
        'results': results,
        'resultsHeader': {
            'QTime': timing['total_ms'],
            'timings': timing,
            'params': {
                'claim': q_claim
            }}}
コード例 #8
0
def base_AggQSentCredReview(cfg):
    return {
        '@context': ci_context,
        '@type': 'AggQSentCredReview',
        'additionalType': ['CredibilityReview', 'Review'],
        'dateCreated': isodate.now_utc_timestamp(),
        'author': default_bot_info(cfg)  # default sub_bots
    }
コード例 #9
0
def claimsim_result_as_claimcred(claimsim_result, cfg):
    """Convert a `SemanticClaimSimilarityResult` into a `ClaimCredibility`

    :param claimsim_results: 
    :param cfg: 
    :returns: 
    :rtype: 
    """
    # TODO: delegate to reviewers to convert claimsim_result into
    # QSentCredReview, DBClaimCredibilityReview, WebSiteCredReview, etc.
    agg_start = citimings.start()
    qsent = claimsim_result['q_claim']  # qsent
    relsents = claimsim_result['results']  # simsents

    # sentSimReviews = [ # TODO: remove, just for feedback during refactoring
    #     semsent_simrev.similarSent_as_SentSimilarityReview(simSent, claimsim_result, cfg)
    #     for simSent in relsents]

    for rs in relsents:
        # claim search no longer does domain credibility, so we have to do it here
        if 'domain_credibility' not in rs:
            rs['domain_credibility'] = website_credrev.calc_domain_credibility(
                rs['domain'])

    relsents = [add_relative_credibility(rs, cfg) for rs in relsents]
    cred_dict = aggregate_credibility(relsents, cfg)
    cred_dict['source'] = 'credibility of %d related claims ' % len(relsents)
    agg_t = citimings.timing('claim_relsent_agg', agg_start)
    return {
        '@context':
        ci_context,
        '@type':
        'ClaimCredibility',
        'claim':
        qsent,
        'item_assessed': {
            '@context': ci_context,
            '@type': 'Claim',
            'claim': qsent
        },
        # 'sentenceSimilarityReview': sentSimReviews,
        'aggQSentCredReview':
        claimsim_result_as_aggQSentCredReview(claimsim_result, cfg),
        'related_claims':
        _partition_related_sents(relsents, cfg),
        'date_assessed':
        isodate.now_utc_timestamp(),
        'assessor': {
            '@context': ci_context,
            '@type': 'CredibilityAssessor',
            'name': 'SemanticSimilarityClaimCredibilityAssessor',
            'version': '20200208'
        },
        'credibility':
        cred_dict,
        'timings':
        agg_t
    }
コード例 #10
0
def normalise(claimReview, cfg):
    if claimReview is None:
        return None
    assert content.is_claimReview(claimReview), "%s" % (claimReview)
    sub_ratings = normalised_claimReview_ratings(claimReview)
    most_confident = agg.select_most_confident_rating(sub_ratings)
    if most_confident is None:
        agg_rating = {
            '@type':
            'AggregateRating',
            'reviewAspect':
            'credibility',
            'reviewCount':
            1,  # the original claimReview
            'ratingCount':
            len(sub_ratings),
            'ratingValue':
            0.0,
            'confidence':
            0.0,
            'ratingExplanation':
            'Failed to interpret original [review](claimReview,get("url", "missing_url"))'
        }
    else:
        agg_rating = {
            **most_confident, '@type': 'AggregateRating',
            'reviewCount': 1,
            'ratingCount': len(sub_ratings)
        }
        assert type(agg_rating['confidence']) == float
        assert 'ratingExplanation' in agg_rating, '%s' % (most_confident)
    return {
        '@context':
        ci_context,
        '@type':
        'NormalisedClaimReview',
        'additionalType':
        content.super_types('NormalisedClaimReview'),
        'author':
        bot_info(cfg),
        'text':
        'Claim `%s` is *%s* %s' %
        (claimReview.get('claimReviewed'),
         credlabel.rating_label(agg_rating, cfg),
         agg_rating.get('ratingExplanation', '(missing explanation)')),
        'claimReviewed':
        claimReview.get('claimReviewed'),
        'dateCreated':
        isodate.now_utc_timestamp(),
        'isBasedOn': [claimReview] + sub_ratings,
        'reviewAspect':
        'credibility',
        'reviewRating':
        agg_rating
    }
コード例 #11
0
ファイル: website_credrev.py プロジェクト: expertailab/acred
def from_old_DomainCredibility(dom_cred, cfg):
    """Converts a `DomainCredibility` into a `WebSiteCredReview`

    :param dom_cred: a `DomainCredibility` dict
    :param cfg: configuration options
    :returns: a `WebSiteCredReview`
    :rtype: dict
    """
    domain_url = dom_cred.get('itemReviewed', 'missing_website')  # str
    itemReviewed = content.str_as_website(domain_url)  # reconstruct WebSite

    ratingVal = dictu.get_in(dom_cred, ['credibility', 'value'], 0.0)
    explanation = 'based on %d review(s) by external rater(s)%s' % (len(
        dom_cred['assessments']), example_raters_markdown(dom_cred))
    return {
        '@context':
        'http://coinform.eu',
        '@type':
        'WebSiteCredReview',
        'additionalType':
        content.super_types('WebSiteCredReview'),
        'itemReviewed':
        itemReviewed,
        'text':
        'Site `%s` seems *%s* %s' %
        (itemReviewed.get('name', '??'),
         credlabel.describe_credval(ratingVal, None), explanation),
        'author':
        misinfoMeSourceCredReviewer(),
        'reviewRating': {
            '@type':
            'AggregateRating',
            'reviewAspect':
            'credibility',
            'ratingValue':
            ratingVal,
            'confidence':
            dictu.get_in(dom_cred, ['credibility', 'confidence'], 0.5),
            'ratingExplanation':
            explanation,
            'reviewCount':
            len(dom_cred['assessments']),
            'ratingCount':
            len(dom_cred['assessments'])
        },
        'dateCreated':
        dom_cred.get('dateCreated', isodate.now_utc_timestamp()),
        'reviewAspect':
        'credibility',
        'isBasedOn': [],  # TODO:
        'isBasedOn_assessments':
        dom_cred['assessments'],
        'timings':
        dom_cred.get('timings', {})
    }
コード例 #12
0
def assess_article_cred(article, cfg):
    """Main credibility assessment for a single article

    *Deprecated* you should move to `review_article`

    :param article: valid and normalised article
    :param cfg: config to guide this assessment
    :returns: a credibility assessment for the article
    :rtype: dict
    """
    start = citimings.start()

    adoc = analyzed_doc(article, cfg)
    adoc_t = adoc['timings']

    domcred = adoc_to_domain_cred(adoc, cfg)
    content_cred = assess_doc_content_cred(adoc, cfg)

    agg_cred = aggregate_article_cred(domcred, content_cred, cfg)

    return {
        '@context':
        content.ci_context,
        '@type':
        'ArticleCredibilityAssessment',
        'doc_url':
        article['url'],
        'item_assessed':
        article,
        'date_asessed':
        isodate.now_utc_timestamp(),
        'assessor': {
            '@context': content.ci_context,
            '@type': 'CredibilityAssessor',
            'name': 'ArticleCredibilityAssessor',
            'version': '20200207'
        },
        'doc_resolved_url':
        adoc.get('resolved_url', adoc.get('url')),
        'analyzed_doc':
        adoc,
        **agg_cred, 'sub_assessments': [domcred, content_cred],
        'timings':
        citimings.timing('assess_article_cred', start, [
            adoc_t,
            domcred.get('timings', None),
            content_cred.get('timings', None)
        ])
        # 'claims_in_doc': claim_creds,
        # 'domain_credibility': domcred,
        # 'content_credibility': content_cred
    }
コード例 #13
0
ファイル: predictor.py プロジェクト: expertailab/acred
def dummyPrediction(tweet):
    start = citimings.start()
    return {
        '@context': ci_context,
        '@type': 'TweetCredibilityAssessment',
        'tweet_id': int(tweet['tweet_id']),
        'item_assessed': tweet,
        'credibility': random.random(),
        'confidence': 0.0,
        'explanation': 'Dummy prediction, no actual analysis performed.',
        'sub_assessments': [],
        'date_assessed': isodate.now_utc_timestamp(),
        'assessor': {'@context': ci_context,
                     'name': 'dummyCredibilityPredictor'},
        'timings': citimings.timing('dummyPrediction', start)
        # deprecated, now as sub_assessments
        # 'sentences_in_tweets': [],
        # 'sentences_linked': []
    }
コード例 #14
0
ファイル: sent_worthrev.py プロジェクト: expertailab/acred
def worthinesspreds_as_SentCheckWorthinessReview(mapped_pred, config):
    result = {
        "@context": ci_context,
        "@type": "SentCheckWorthinessReview",
        "additionalType": content.super_types('SentCheckWorthinessReview'),
        'reviewAspect': 'checkworthiness',
        'itemReviewed': content.as_sentence(mapped_pred['sentence']),
        'reviewRating': {
            '@type':
            'Rating',
            'reviewAspect':
            'checkworthiness',
            'ratingValue':
            mapped_pred['ratingValue'],
            'confidence':
            mapped_pred['confidence'],
            'ratingExplanation':
            rating_exp(mapped_pred['ratingValue'], mapped_pred['sentence'])
        },
        'dateCreated': isodate.now_utc_timestamp(),
        "author": checkWorthinessReviewer(config)
    }
    result['identifier'] = calc_worth_review_id(result)
    return result
コード例 #15
0
ファイル: dbsent_credrev.py プロジェクト: expertailab/acred
def aggregate_subReviews(db_Sentence, claimReview, webSiteCred, cfg):
    """Aggregates (claim and WebSite) reviews about a DB Sentence into a
       credibility review

    :param db_Sentence: a `Sentence` in the Co-inform database

    :param claimReview: a `ClaimReview` for the db_Sentence. May be
      None if no claim review is available for the sentence. In
      general, the claim review will not have been normalised
      (i.e. mapped onto the co-inform accuracy/credibility scales)

    :param webSiteCred: a `WebSiteCredReview` for a webSite where the
    `db_Sentence` was published.

    :param cfg: configuration options
    :returns: a `DBSentCredReview`
    :rtype: dict
    """
    nClaimReview = crn.normalise(claimReview, cfg)
    if nClaimReview is None:
        nClaimReview = {}

    nWebSiteRating = None
    if webSiteCred:
        nWebSiteRating = websiteCredRev_as_qclaimCredRating(webSiteCred, cfg)
        assert type(nWebSiteRating['confidence']) == float
        
    assert type(dictu.get_in(nClaimReview, ['reviewRating', 'confidence'], 0.0)) == float
    subRatings = [nWebSiteRating, nClaimReview.get('reviewRating', None)]
    subRatings = [r for r in subRatings if r is not None]
    sel_rating = agg.select_most_confident_rating(subRatings) or {
        'ratingValue': 0.0,
        'confidence': 0.0,
        'ratingExplanation': 'No website or claimReview associated with this sentence'
    }

    isBasedOn = [webSiteCred, nClaimReview]
    isBasedOn = [ibo for ibo in isBasedOn
                 if ibo is not None and ibo != {}]

    reviewCount = agg.total_reviewCount(subRatings) + len(isBasedOn)
    ratingCount = agg.total_ratingCount(subRatings)

    # should be a superset of [ibo.get('author') for ibo in isBasedOn]
    sub_bots = default_sub_bots(cfg)
    appears_in_docs = db_Sentence.get('appearance', [])
    appears_in_doc = appears_in_docs[0] if appears_in_docs else None
    link_to_doc = md_link_to_doc(appears_in_doc)
    revRating = {
        '@type': 'AggregateRating',
        'reviewAspect': 'credibility',
        'reviewCount': reviewCount,
        'ratingCount': ratingCount,
        'ratingValue': sel_rating.get('ratingValue', 0.0),
        'confidence': sel_rating.get('confidence', 0.0),
        'ratingExplanation': sel_rating.get('ratingExplanation')
    }
    return {
        '@context': "http://coinform.eu",
        '@type': "DBSentCredReview",
        'additionalType': content.super_types('DBSentCredReview'),
        'itemReviewed': db_Sentence,
        'text': 'Sentence `%s` %sseems *%s* %s' % (
            db_Sentence.get('text', '??'),
            ', in %s, ' % (link_to_doc) if link_to_doc else '',
            credlabel.rating_label(revRating, cfg),
            sel_rating.get('ratingExplanation')
        ),
        'reviewRating': revRating, 
        'reviewAspect': 'credibility',
        'isBasedOn': isBasedOn,
        'dateCreated': isodate.now_utc_timestamp(),
        'author': bot_info(sub_bots, cfg)
    }
コード例 #16
0
ファイル: tweet_credrev.py プロジェクト: expertailab/acred
def aggregate_subReviews(subReviews, tweet, cfg):
    """Creates an aggregate review based on subReviews for tweet

    Refactoring of `aggregate_tweet_cred`

    :param subReviews: list of credibility reviews for (parts of) the
      tweet to review.
    :param cfg: config options
    :returns: a credibility review for the `tweet` to review that
      contains an `AggregateRating` based on the `subReviews`
    :rtype: dict
    """
    # extract sub_bots and compare to default_sub_bots
    partial_TweetCredReview = {
        '@context': ci_context,
        '@type': 'TweetCredReview',
        'itemReviewed': tweet,
        'isBasedOn': subReviews,
        'dateCreated': isodate.now_utc_timestamp(),
        'author': default_bot_info(cfg)
    }
    tweet_mdref = markdown_ref_for_tweet(tweet, cfg)
    if subReviews is None:
        subReviews = []

    subRatings = [
        sr.get('reviewRating') for sr in subReviews
        if sr.get('reviewRating') is not None
    ]

    # filter by min confidence
    conf_threshold = float(cfg.get('cred_conf_threshold', 0.7))
    filter_fn = agg.filter_review_by_min_confidence(conf_threshold)
    conf_subRevs = [sr for sr in subReviews if filter_fn(sr)]
    igno_subRevs = [sr for sr in subReviews if not filter_fn(sr)]

    # no (confident) subReviews
    if len(conf_subRevs) == 0:
        part_rating = {
            '@type': 'Rating',
            'ratingValue': 0.0,
            'confidence': 0.0,
            'reviewAspect': 'credibility'
        }
        if len(subReviews) == 0:
            msg = "we could not extract (or assess credibility of) its sentences or linked documents" % (
                tweet_mdref)
            rating = {**part_rating, 'ratingExplanation': msg}
        else:
            msg = 'we could not assess the credibility of its %d sentences or linked documents.%s' % (
                len(subReviews), '\nFor example:\n * %s' %
                (igno_subRevs[0]['text']))
            rating = {
                **part_rating, '@type': 'AggregateRating',
                'ratingExplanation': msg,
                'ratingCount': agg.total_ratingCount(subRatings),
                'reviewCount':
                agg.total_reviewCount(subRatings) + len(subReviews)
            }
        return {
            **partial_TweetCredReview, 'text':
            '%s seems *%s* as %s' %
            (tweet_mdref, credlabel.rating_label(rating, cfg), msg),
            'reviewRating':
            rating
        }

    # select least credible subReview
    subRevs_by_val = sorted([sr for sr in conf_subRevs],
                            key=lambda rev: dictu.get_in(
                                rev, ['reviewRating', 'ratingValue'], 0.0))
    least_cred_rev = subRevs_by_val[0]
    msg = 'based on its least credible part:\n%s' % (dictu.get_in(
        least_cred_rev, ['text'], '(missing explanation for part)'))
    revRating = {
        '@type':
        'AggregateRating',
        'reviewAspect':
        'credibility',
        'ratingValue':
        dictu.get_in(least_cred_rev, ['reviewRating', 'ratingValue'], 0.0),
        'confidence':
        dictu.get_in(least_cred_rev, ['reviewRating', 'confidence'], 0.0),
        'ratingExplanation':
        msg,
        'ratingCount':
        agg.total_ratingCount(subRatings),
        'reviewCount':
        agg.total_reviewCount(subRatings) + len(subReviews)
    }
    return {
        **partial_TweetCredReview,
        'isBasedOn':
        subRevs_by_val + igno_subRevs,  # just a re-ordering
        'text':
        '%s seems *%s* %s' %
        (tweet_mdref, credlabel.rating_label(revRating, cfg), msg),
        'reviewRating':
        revRating
    }
コード例 #17
0
def aggregate_subReviews(simple_sentSimReview, stanceReview, cfg):
    """Aggregates a similarity and stance review into a polar similarity review

    :param simple_sentSimReview: a (non-polar) `SentSimilarityReview`
      for a `sentPair`
    :param stanceReview: a `SentStanceReview` for the same `sentPair`
      as `simple_sentSimReview`
    :param cfg: configuration options
    :returns: a `SentPolarSimilarityReview`
    :rtype: dict
    """
    assert simple_sentSimReview is not None
    if stanceReview is None:
        return simple_sentSimReview
    sim = dictu.get_in(simple_sentSimReview, ['reviewRating', 'ratingValue'])
    sent_stance = dictu.get_in(stanceReview, ['reviewRating', 'ratingValue'],
                               'unrelated')
    stance_conf = dictu.get_in(stanceReview, ['reviewRating', 'confidence'],
                               '0.5')

    sent_pair = simple_sentSimReview['itemReviewed']
    assert stanceReview['itemReviewed'] == sent_pair, '%s != %s' % (
        stanceReview['itemReviewed'], sent_pair)

    agg_sim = calc_agg_polarsim(sim=sim,
                                sent_stance=sent_stance,
                                sent_stance_conf=stance_conf,
                                cfg=cfg)
    sub_reviews = [
        sr for sr in [simple_sentSimReview, stanceReview] if sr is not None
    ]
    sub_ratings = [
        srev.get('reviewRating') for srev in sub_reviews
        if srev.get('reviewRating') is not None
    ]
    headline = simlabel.claim_rel_str(sim, sent_stance)
    # TODO: more than an explanation this is the review body
    #  the explanation would be that one model said the sentences were x similar
    #  while another said they were (stance)
    explanation = 'Sentence `%s` %s `%s`' % (dictu.get_in(
        sent_pair, ['sentA', 'text'
                    ]), headline, dictu.get_in(sent_pair, ['sentB', 'text']))
    sub_bots = [
        simple_sentSimReview.get('author', {}),
        stanceReview.get('author', {})
    ]
    return {
        '@context': 'http://coinform.eu',
        '@type': 'SentPolarSimilarityReview',
        'additionalType': content.super_types('SentPolarSimilarityReview'),
        'itemReviewed': sent_pair,
        'headline': headline,
        'reviewAspect': 'polarSimilarity',
        'reviewBody': explanation,
        'reviewRating': {
            '@type': 'AggregateRating',
            'reviewAspect': 'polarSimilarity',
            'ratingValue': agg_sim,
            'confidence': stance_conf,
            'reviewCount': len(sub_reviews),
            'ratingCount': agg.total_ratingCount(sub_ratings),
            'ratingExplanation': explanation
        },
        'isBasedOn': sub_reviews,
        'dateCreated': isodate.now_utc_timestamp(),
        'author': bot_info(sub_bots, cfg)
    }