def bot_info(sub_bots, cfg): """Returns a description for this TweetCredReviewer :param sub_bots: a list of bot items used by this TweetCredReviewer :param cfg: config options :returns: a `TweetCredReviewer` item :rtype: dict """ result = { '@context': ci_context, '@type': 'TweetCredReviewer', 'additionalType': content.super_types('TweetCredReviewer'), 'name': 'ESI Tweet Credibility Reviewer', 'description': 'Reviews the credibility of a tweet by reviewing the sentences in the tweet and the (textual) documents linked by the tweet', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-02T18:00:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python', 'nltk', 'Cogito'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': {}, 'taskConfiguration': {} } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def base_ArticleCredReview(cfg): return { '@context': content.ci_context, '@type': 'ArticleCredReview', 'additionalType': content.super_types('ArticleCredReview'), 'dateCreated': isodate.now_utc_timestamp(), }
def bot_info(sub_bots, cfg): """Returns a description for this AggQSentCredReviewer :param cfg: :returns: :rtype: """ result = { '@context': ci_context, '@type': 'AggQSentCredReviewer', 'additionalType': content.super_types('AggQSentCredReviewer'), 'name': 'ESI Aggregate Query Sentence Credibility Reviewer', 'description': 'Reviews the credibility of a query setence by comparing it to semantically similar sentences in the Co-inform DB and the credibility of those.', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-03-19T15:09:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': { 'acred_pred_claim_search_url': cfg.get('acred_pred_claim_search_url', 'http://localhost:8070/test/api/v1/claim/internal-search') } } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'DBSentCredReviewer', 'name': 'ESI DB Sentence Credibility Reviewer', 'description': 'Estimates the credibility of a sentence in the Co-inform DB based on known ClaimReviews or websites where the sentence has been published.', 'additionalType': content.super_types('DBSentCredReviewer'), 'author': bot_describer.esiLab_organization(), 'dateCreated': dateCreated, 'softwareVersion': version, 'url': 'http://coinform.eu/bot/DBSentCredReviewer/%s' % version, 'applicationSuite': 'Co-inform', 'isBasedOn': sub_bots, 'launchConfiguration': { 'factchecker_website_to_qclaim_confidence_penalty_factor': float( cfg.get('factchecker_website_to_qclaim_confidence_penalty_factor', 0.5)), 'acred_factchecker_urls': cfg.get('acred_factchecker_urls', []) } } ident = hashu.hash_dict(dictu.select_keys( result, content.ident_keys(result))) return { **result, 'identifier': ident }
def from_old_DomainCredibility(dom_cred, cfg): """Converts a `DomainCredibility` into a `WebSiteCredReview` :param dom_cred: a `DomainCredibility` dict :param cfg: configuration options :returns: a `WebSiteCredReview` :rtype: dict """ domain_url = dom_cred.get('itemReviewed', 'missing_website') # str itemReviewed = content.str_as_website(domain_url) # reconstruct WebSite ratingVal = dictu.get_in(dom_cred, ['credibility', 'value'], 0.0) explanation = 'based on %d review(s) by external rater(s)%s' % (len( dom_cred['assessments']), example_raters_markdown(dom_cred)) return { '@context': 'http://coinform.eu', '@type': 'WebSiteCredReview', 'additionalType': content.super_types('WebSiteCredReview'), 'itemReviewed': itemReviewed, 'text': 'Site `%s` seems *%s* %s' % (itemReviewed.get('name', '??'), credlabel.describe_credval(ratingVal, None), explanation), 'author': misinfoMeSourceCredReviewer(), 'reviewRating': { '@type': 'AggregateRating', 'reviewAspect': 'credibility', 'ratingValue': ratingVal, 'confidence': dictu.get_in(dom_cred, ['credibility', 'confidence'], 0.5), 'ratingExplanation': explanation, 'reviewCount': len(dom_cred['assessments']), 'ratingCount': len(dom_cred['assessments']) }, 'dateCreated': dom_cred.get('dateCreated', isodate.now_utc_timestamp()), 'reviewAspect': 'credibility', 'isBasedOn': [], # TODO: 'isBasedOn_assessments': dom_cred['assessments'], 'timings': dom_cred.get('timings', {}) }
def normalise(claimReview, cfg): if claimReview is None: return None assert content.is_claimReview(claimReview), "%s" % (claimReview) sub_ratings = normalised_claimReview_ratings(claimReview) most_confident = agg.select_most_confident_rating(sub_ratings) if most_confident is None: agg_rating = { '@type': 'AggregateRating', 'reviewAspect': 'credibility', 'reviewCount': 1, # the original claimReview 'ratingCount': len(sub_ratings), 'ratingValue': 0.0, 'confidence': 0.0, 'ratingExplanation': 'Failed to interpret original [review](claimReview,get("url", "missing_url"))' } else: agg_rating = { **most_confident, '@type': 'AggregateRating', 'reviewCount': 1, 'ratingCount': len(sub_ratings) } assert type(agg_rating['confidence']) == float assert 'ratingExplanation' in agg_rating, '%s' % (most_confident) return { '@context': ci_context, '@type': 'NormalisedClaimReview', 'additionalType': content.super_types('NormalisedClaimReview'), 'author': bot_info(cfg), 'text': 'Claim `%s` is *%s* %s' % (claimReview.get('claimReviewed'), credlabel.rating_label(agg_rating, cfg), agg_rating.get('ratingExplanation', '(missing explanation)')), 'claimReviewed': claimReview.get('claimReviewed'), 'dateCreated': isodate.now_utc_timestamp(), 'isBasedOn': [claimReview] + sub_ratings, 'reviewAspect': 'credibility', 'reviewRating': agg_rating }
def bot_info(sub_bots, cfg): """Returns a description for this ArticleCredReviewer :param sub_bots: bot items used by this ArticleCredReviewer :param cfg: config options :returns: an `ArticleCredReviewer` :rtype: dict """ result = { '@context': content.ci_context, '@type': 'ArticleCredReviewer', 'additionalType': content.super_types('ArticleCredReviewer'), 'name': 'ESI Article Credibility Reviewer', 'description': 'Reviews the credibility of an article by (i) semantically analysing it to detect relevant claims (ii) getting credibility reviews for the claims and (iii) getting a credibility reviews for the site(s) that published the article.', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-01T17:02:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python', 'Cogito'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': { # any launch configs? }, 'taskConfiguration': { 'cred_conf_threshold': cfg.get('cred_conf_threshold', 0.7), 'max_claims_in_doc': int(cfg.get('max_claims_in_doc', 5)), 'relsents_in_colls': cfg.get('relsents_in_colls', [ 'generic', 'pilot-se', 'pilot-gr', 'pilot-at', 'factcheckers', 'fc-dev' ]), 'target_url_collect_coll': cfg.get('target_url_collect_coll', cfg.get('default_url_collect_coll', None)), 'acred_review_format': cfg.get('acred_review_format', 'schema.org') } } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'SentPolarityReviewer', 'name': 'ESI Sentence Polarity Reviewer', 'description': 'Estimates the polar similarity between two sentences', 'additionalType': content.super_types('SentPolarityReviewer'), 'softwareVersion': version, 'dateCreated': '2020-03-27T22:54:00Z', 'url': 'http://coinform.eu/bot/SentencePolarSimilarityReviewer/%s' % version, 'applicationSuite': 'Co-inform', 'author': bot_describer.esiLab_organization(), 'isBasedOn': sub_bots, 'launchConfiguration': {} } ident = hashu.hash_dict( dictu.select_keys(result, content.ident_keys(result))) return {**result, 'identifier': ident}
def bot_info(cfg): result = { '@context': ci_context, '@type': 'ClaimReviewNormalizer', 'name': 'ESI ClaimReview Credibility Normalizer', 'description': 'Analyses the alternateName and numerical rating value for a ClaimReview and tries to convert that into a normalised credibility rating', 'additionalType': content.super_types('ClaimReviewNormalizer'), 'author': bot_describer.esiLab_organization(), 'dateCreated': dateCreated, 'softwareVersion': version, 'url': 'http://coinform.eu/bot/ClaimReviewNormalizer/%s' % version, 'applicationSuite': 'Co-inform', 'isBasedOn': [], # no dependencies 'launchConfiguration': {} # no configs? } ident = hashu.hash_dict( dictu.select_keys(result, content.ident_keys(result))) return {**result, 'identifier': ident}
def misinfoMeSourceCredReviewer(): result = { '@context': 'http://coinform.eu', '@type': 'MisinfoMeSourceCredReviewer', # Since we don't control this bot, we assume versions, # an thus results, may change on a weekly basis # So we use the start of the current week as the version 'softwareVersion': isodate.start_of_week_utc_timestamp(datetime.datetime.utcnow()), 'additionalType': content.super_types('MisinfoMeSourceCredReviewer'), 'url': misinfome_url, 'applicationSuite': 'MisinfoMe' } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'CredReviewer', 'additionalType': content.super_types('CredReviewer'), 'name': 'ESI Top-level Credibility Reviewer', 'description': 'Reviews the credibility of various supported content items, mainly by delegating to the appropriate content-level reviewer', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-02T18:05:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': {}, 'taskConfiguration': {} } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys( result, content.itemref_keys(result) ))}
def worthinesspreds_as_SentCheckWorthinessReview(mapped_pred, config): result = { "@context": ci_context, "@type": "SentCheckWorthinessReview", "additionalType": content.super_types('SentCheckWorthinessReview'), 'reviewAspect': 'checkworthiness', 'itemReviewed': content.as_sentence(mapped_pred['sentence']), 'reviewRating': { '@type': 'Rating', 'reviewAspect': 'checkworthiness', 'ratingValue': mapped_pred['ratingValue'], 'confidence': mapped_pred['confidence'], 'ratingExplanation': rating_exp(mapped_pred['ratingValue'], mapped_pred['sentence']) }, 'dateCreated': isodate.now_utc_timestamp(), "author": checkWorthinessReviewer(config) } result['identifier'] = calc_worth_review_id(result) return result
def aggregate_subReviews(simple_sentSimReview, stanceReview, cfg): """Aggregates a similarity and stance review into a polar similarity review :param simple_sentSimReview: a (non-polar) `SentSimilarityReview` for a `sentPair` :param stanceReview: a `SentStanceReview` for the same `sentPair` as `simple_sentSimReview` :param cfg: configuration options :returns: a `SentPolarSimilarityReview` :rtype: dict """ assert simple_sentSimReview is not None if stanceReview is None: return simple_sentSimReview sim = dictu.get_in(simple_sentSimReview, ['reviewRating', 'ratingValue']) sent_stance = dictu.get_in(stanceReview, ['reviewRating', 'ratingValue'], 'unrelated') stance_conf = dictu.get_in(stanceReview, ['reviewRating', 'confidence'], '0.5') sent_pair = simple_sentSimReview['itemReviewed'] assert stanceReview['itemReviewed'] == sent_pair, '%s != %s' % ( stanceReview['itemReviewed'], sent_pair) agg_sim = calc_agg_polarsim(sim=sim, sent_stance=sent_stance, sent_stance_conf=stance_conf, cfg=cfg) sub_reviews = [ sr for sr in [simple_sentSimReview, stanceReview] if sr is not None ] sub_ratings = [ srev.get('reviewRating') for srev in sub_reviews if srev.get('reviewRating') is not None ] headline = simlabel.claim_rel_str(sim, sent_stance) # TODO: more than an explanation this is the review body # the explanation would be that one model said the sentences were x similar # while another said they were (stance) explanation = 'Sentence `%s` %s `%s`' % (dictu.get_in( sent_pair, ['sentA', 'text' ]), headline, dictu.get_in(sent_pair, ['sentB', 'text'])) sub_bots = [ simple_sentSimReview.get('author', {}), stanceReview.get('author', {}) ] return { '@context': 'http://coinform.eu', '@type': 'SentPolarSimilarityReview', 'additionalType': content.super_types('SentPolarSimilarityReview'), 'itemReviewed': sent_pair, 'headline': headline, 'reviewAspect': 'polarSimilarity', 'reviewBody': explanation, 'reviewRating': { '@type': 'AggregateRating', 'reviewAspect': 'polarSimilarity', 'ratingValue': agg_sim, 'confidence': stance_conf, 'reviewCount': len(sub_reviews), 'ratingCount': agg.total_ratingCount(sub_ratings), 'ratingExplanation': explanation }, 'isBasedOn': sub_reviews, 'dateCreated': isodate.now_utc_timestamp(), 'author': bot_info(sub_bots, cfg) }
def aggregate_subReviews(db_Sentence, claimReview, webSiteCred, cfg): """Aggregates (claim and WebSite) reviews about a DB Sentence into a credibility review :param db_Sentence: a `Sentence` in the Co-inform database :param claimReview: a `ClaimReview` for the db_Sentence. May be None if no claim review is available for the sentence. In general, the claim review will not have been normalised (i.e. mapped onto the co-inform accuracy/credibility scales) :param webSiteCred: a `WebSiteCredReview` for a webSite where the `db_Sentence` was published. :param cfg: configuration options :returns: a `DBSentCredReview` :rtype: dict """ nClaimReview = crn.normalise(claimReview, cfg) if nClaimReview is None: nClaimReview = {} nWebSiteRating = None if webSiteCred: nWebSiteRating = websiteCredRev_as_qclaimCredRating(webSiteCred, cfg) assert type(nWebSiteRating['confidence']) == float assert type(dictu.get_in(nClaimReview, ['reviewRating', 'confidence'], 0.0)) == float subRatings = [nWebSiteRating, nClaimReview.get('reviewRating', None)] subRatings = [r for r in subRatings if r is not None] sel_rating = agg.select_most_confident_rating(subRatings) or { 'ratingValue': 0.0, 'confidence': 0.0, 'ratingExplanation': 'No website or claimReview associated with this sentence' } isBasedOn = [webSiteCred, nClaimReview] isBasedOn = [ibo for ibo in isBasedOn if ibo is not None and ibo != {}] reviewCount = agg.total_reviewCount(subRatings) + len(isBasedOn) ratingCount = agg.total_ratingCount(subRatings) # should be a superset of [ibo.get('author') for ibo in isBasedOn] sub_bots = default_sub_bots(cfg) appears_in_docs = db_Sentence.get('appearance', []) appears_in_doc = appears_in_docs[0] if appears_in_docs else None link_to_doc = md_link_to_doc(appears_in_doc) revRating = { '@type': 'AggregateRating', 'reviewAspect': 'credibility', 'reviewCount': reviewCount, 'ratingCount': ratingCount, 'ratingValue': sel_rating.get('ratingValue', 0.0), 'confidence': sel_rating.get('confidence', 0.0), 'ratingExplanation': sel_rating.get('ratingExplanation') } return { '@context': "http://coinform.eu", '@type': "DBSentCredReview", 'additionalType': content.super_types('DBSentCredReview'), 'itemReviewed': db_Sentence, 'text': 'Sentence `%s` %sseems *%s* %s' % ( db_Sentence.get('text', '??'), ', in %s, ' % (link_to_doc) if link_to_doc else '', credlabel.rating_label(revRating, cfg), sel_rating.get('ratingExplanation') ), 'reviewRating': revRating, 'reviewAspect': 'credibility', 'isBasedOn': isBasedOn, 'dateCreated': isodate.now_utc_timestamp(), 'author': bot_info(sub_bots, cfg) }