def bot_info(sub_bots, cfg): """Returns a description for this AggQSentCredReviewer :param cfg: :returns: :rtype: """ result = { '@context': ci_context, '@type': 'AggQSentCredReviewer', 'additionalType': content.super_types('AggQSentCredReviewer'), 'name': 'ESI Aggregate Query Sentence Credibility Reviewer', 'description': 'Reviews the credibility of a query setence by comparing it to semantically similar sentences in the Co-inform DB and the credibility of those.', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-03-19T15:09:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': { 'acred_pred_claim_search_url': cfg.get('acred_pred_claim_search_url', 'http://localhost:8070/test/api/v1/claim/internal-search') } } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): """Returns a description for this TweetCredReviewer :param sub_bots: a list of bot items used by this TweetCredReviewer :param cfg: config options :returns: a `TweetCredReviewer` item :rtype: dict """ result = { '@context': ci_context, '@type': 'TweetCredReviewer', 'additionalType': content.super_types('TweetCredReviewer'), 'name': 'ESI Tweet Credibility Reviewer', 'description': 'Reviews the credibility of a tweet by reviewing the sentences in the tweet and the (textual) documents linked by the tweet', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-02T18:00:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python', 'nltk', 'Cogito'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': {}, 'taskConfiguration': {} } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'DBSentCredReviewer', 'name': 'ESI DB Sentence Credibility Reviewer', 'description': 'Estimates the credibility of a sentence in the Co-inform DB based on known ClaimReviews or websites where the sentence has been published.', 'additionalType': content.super_types('DBSentCredReviewer'), 'author': bot_describer.esiLab_organization(), 'dateCreated': dateCreated, 'softwareVersion': version, 'url': 'http://coinform.eu/bot/DBSentCredReviewer/%s' % version, 'applicationSuite': 'Co-inform', 'isBasedOn': sub_bots, 'launchConfiguration': { 'factchecker_website_to_qclaim_confidence_penalty_factor': float( cfg.get('factchecker_website_to_qclaim_confidence_penalty_factor', 0.5)), 'acred_factchecker_urls': cfg.get('acred_factchecker_urls', []) } } ident = hashu.hash_dict(dictu.select_keys( result, content.ident_keys(result))) return { **result, 'identifier': ident }
def calc_worth_review_id(worth_review): """Calculates a unique id code for a worth review :param worth_review: a `SentWorthReview` dict :returns: a hashcode that tries to capture the identity of the worth review :rtype: str """ return hashu.hash_dict( dictu.select_keys(worth_review, sentWorthReview_schema['ident_keys']))
def calc_stance_reviewer_id(stance_reviewer): """Calculates a unique id code for a stance reviewer :param stance_reviewer: a `SentStanceReviewer` dict :returns: a hashcode that tries to capture the identity of the stance reviewer :rtype: str """ return hashu.hash_dict( dictu.select_keys(stance_reviewer, sentStanceReviewer_schema['ident_keys']))
def test_select_keys_02(): d1 = {'a': 1, 'b': 2, 'c': { 'd': 'e', 'f': 'g'}} d2 = {'a': 1, 'c': { 'd': 'e', 'f': 'g'}} d3 = dictu.select_keys(d1, ['a', 'c']) assert d2 == d3
def calc_sim_reviewer_id(sim_reviewer): """Calculates a unique id code for a sim_reviewer :param sim_reviewer a `SemSentSimReviewer` dict :returns: a hashcode that tries to capture the identit of the sim_reviewer :rtype: str """ return hashu.hash_dict( dictu.select_keys(sim_reviewer, [ '@type', 'name', 'dateCreated', 'softwareVersion', 'isBasedOn', 'launchConfiguration' ]))
def calc_identifier(item, cfg): """Given a data item, calculate its identifier Any nested items must already have an identifier. The default identifier is given by a subset of its fields. :param item: The item for which to calculate the identifier :param cfg: config options :returns: a unique identifier within acred. :rtype: str """ assert content.is_item(item) assert 'identifier' not in item to_id = item_with_refs(dictu.select_keys(item, ident_keys(item, cfg)), cfg) return hashu.hash_dict(to_id)
def bot_info(sub_bots, cfg): """Returns a description for this ArticleCredReviewer :param sub_bots: bot items used by this ArticleCredReviewer :param cfg: config options :returns: an `ArticleCredReviewer` :rtype: dict """ result = { '@context': content.ci_context, '@type': 'ArticleCredReviewer', 'additionalType': content.super_types('ArticleCredReviewer'), 'name': 'ESI Article Credibility Reviewer', 'description': 'Reviews the credibility of an article by (i) semantically analysing it to detect relevant claims (ii) getting credibility reviews for the claims and (iii) getting a credibility reviews for the site(s) that published the article.', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-01T17:02:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python', 'Cogito'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': { # any launch configs? }, 'taskConfiguration': { 'cred_conf_threshold': cfg.get('cred_conf_threshold', 0.7), 'max_claims_in_doc': int(cfg.get('max_claims_in_doc', 5)), 'relsents_in_colls': cfg.get('relsents_in_colls', [ 'generic', 'pilot-se', 'pilot-gr', 'pilot-at', 'factcheckers', 'fc-dev' ]), 'target_url_collect_coll': cfg.get('target_url_collect_coll', cfg.get('default_url_collect_coll', None)), 'acred_review_format': cfg.get('acred_review_format', 'schema.org') } } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'SentPolarityReviewer', 'name': 'ESI Sentence Polarity Reviewer', 'description': 'Estimates the polar similarity between two sentences', 'additionalType': content.super_types('SentPolarityReviewer'), 'softwareVersion': version, 'dateCreated': '2020-03-27T22:54:00Z', 'url': 'http://coinform.eu/bot/SentencePolarSimilarityReviewer/%s' % version, 'applicationSuite': 'Co-inform', 'author': bot_describer.esiLab_organization(), 'isBasedOn': sub_bots, 'launchConfiguration': {} } ident = hashu.hash_dict( dictu.select_keys(result, content.ident_keys(result))) return {**result, 'identifier': ident}
def bot_info(cfg): result = { '@context': ci_context, '@type': 'ClaimReviewNormalizer', 'name': 'ESI ClaimReview Credibility Normalizer', 'description': 'Analyses the alternateName and numerical rating value for a ClaimReview and tries to convert that into a normalised credibility rating', 'additionalType': content.super_types('ClaimReviewNormalizer'), 'author': bot_describer.esiLab_organization(), 'dateCreated': dateCreated, 'softwareVersion': version, 'url': 'http://coinform.eu/bot/ClaimReviewNormalizer/%s' % version, 'applicationSuite': 'Co-inform', 'isBasedOn': [], # no dependencies 'launchConfiguration': {} # no configs? } ident = hashu.hash_dict( dictu.select_keys(result, content.ident_keys(result))) return {**result, 'identifier': ident}
def misinfoMeSourceCredReviewer(): result = { '@context': 'http://coinform.eu', '@type': 'MisinfoMeSourceCredReviewer', # Since we don't control this bot, we assume versions, # an thus results, may change on a weekly basis # So we use the start of the current week as the version 'softwareVersion': isodate.start_of_week_utc_timestamp(datetime.datetime.utcnow()), 'additionalType': content.super_types('MisinfoMeSourceCredReviewer'), 'url': misinfome_url, 'applicationSuite': 'MisinfoMe' } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'CredReviewer', 'additionalType': content.super_types('CredReviewer'), 'name': 'ESI Top-level Credibility Reviewer', 'description': 'Reviews the credibility of various supported content items, mainly by delegating to the appropriate content-level reviewer', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-02T18:05:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': {}, 'taskConfiguration': {} } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys( result, content.itemref_keys(result) ))}
def do_add_stance_labels(claim_sim_results, sim_threshold=0.7, max_len=128): start = citimings.start() def trim(s, max_len): s_toks = s.split(' ') if len(s_toks) > max_len: return ' '.join(s_toks[:max_len]) else: return s stance_reqs = [] for cresult in claim_sim_results: q_claim = cresult['q_claim'] q_claim_toks = q_claim.split(' ') if len(q_claim_toks) > (2*max_len/3): logger.warning('Skip stance_pred: q_claim is too large %d ' % ( len(q_claim_toks))) continue bods, rs_targets = [], [] for rs in cresult['results']: if rs['similarity'] < sim_threshold: continue # docbod = rs.get('doc_content', None) # if docbod is not None: # bods.append(trim(docbod, max_len)) # rs_targets.append({"rs": rs, # 'field': 'doc_stance'}) sent = rs.get('sentence', None) if sent is not None: bods.append(sent) rs_targets.append({'rs': rs, 'field': 'sent_stance'}) if len(bods) > 0: stance_reqs.append({ 'qclaim': q_claim, 'doc_bodies': bods, 'rs_targets': rs_targets}) if len(stance_reqs) == 0: return claim_sim_results, citimings.timing( 'predict_stances', start) labels, confs, stanceRev = predict_stances( # don't send the rs_targets to server [dictu.select_keys(sr, ['qclaim', 'doc_bodies']) for sr in stance_reqs]) for csr in claim_sim_results: csr['stanceReviewer'] = stanceRev stance_docs_t = citimings.timing('doc_stance_pred', start) logger.info("Predicted stances %s with scores %s" % (labels, confs)) rs_targets = [rs_target for req in stance_reqs for rs_target in req['rs_targets']] assert len(rs_targets) == len(labels) assert len(confs) == len(labels) for rs_target, label, conf in zip(rs_targets, labels, confs): rs = rs_target['rs'] field = rs_target['field'] rs[field] = label rs['%s_confidence' % field] = conf return claim_sim_results, citimings.timing( 'predict_stances', start, [stance_docs_t])
def test_select_keys_01(): d1 = {'a': 1, 'b': 2} d2 = {'a': 1} d3 = dictu.select_keys(d1, ['a']) assert d2 == d3
def test_select_keys_04(): d1 = {'a': 1, 'b': 2} d3 = dictu.select_keys(d1, ['e']) assert {} == d3