def test_hash_dict_02(): # test order independence in nested values d1 = {'a': {'a1': 'b1', 'a2': 'b2', 'a3': 8.12541}} d2 = {'a': {'a3': 8.12541, 'a1': 'b1', 'a2': 'b2'}} hash1 = hashu.hash_dict(d1) hash2 = hashu.hash_dict(d2) assert hash1 == hash2
def bot_info(sub_bots, cfg): """Returns a description for this TweetCredReviewer :param sub_bots: a list of bot items used by this TweetCredReviewer :param cfg: config options :returns: a `TweetCredReviewer` item :rtype: dict """ result = { '@context': ci_context, '@type': 'TweetCredReviewer', 'additionalType': content.super_types('TweetCredReviewer'), 'name': 'ESI Tweet Credibility Reviewer', 'description': 'Reviews the credibility of a tweet by reviewing the sentences in the tweet and the (textual) documents linked by the tweet', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-02T18:00:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python', 'nltk', 'Cogito'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': {}, 'taskConfiguration': {} } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): """Returns a description for this AggQSentCredReviewer :param cfg: :returns: :rtype: """ result = { '@context': ci_context, '@type': 'AggQSentCredReviewer', 'additionalType': content.super_types('AggQSentCredReviewer'), 'name': 'ESI Aggregate Query Sentence Credibility Reviewer', 'description': 'Reviews the credibility of a query setence by comparing it to semantically similar sentences in the Co-inform DB and the credibility of those.', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-03-19T15:09:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': { 'acred_pred_claim_search_url': cfg.get('acred_pred_claim_search_url', 'http://localhost:8070/test/api/v1/claim/internal-search') } } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'DBSentCredReviewer', 'name': 'ESI DB Sentence Credibility Reviewer', 'description': 'Estimates the credibility of a sentence in the Co-inform DB based on known ClaimReviews or websites where the sentence has been published.', 'additionalType': content.super_types('DBSentCredReviewer'), 'author': bot_describer.esiLab_organization(), 'dateCreated': dateCreated, 'softwareVersion': version, 'url': 'http://coinform.eu/bot/DBSentCredReviewer/%s' % version, 'applicationSuite': 'Co-inform', 'isBasedOn': sub_bots, 'launchConfiguration': { 'factchecker_website_to_qclaim_confidence_penalty_factor': float( cfg.get('factchecker_website_to_qclaim_confidence_penalty_factor', 0.5)), 'acred_factchecker_urls': cfg.get('acred_factchecker_urls', []) } } ident = hashu.hash_dict(dictu.select_keys( result, content.ident_keys(result))) return { **result, 'identifier': ident }
def calc_worth_review_id(worth_review): """Calculates a unique id code for a worth review :param worth_review: a `SentWorthReview` dict :returns: a hashcode that tries to capture the identity of the worth review :rtype: str """ return hashu.hash_dict( dictu.select_keys(worth_review, sentWorthReview_schema['ident_keys']))
def calc_stance_reviewer_id(stance_reviewer): """Calculates a unique id code for a stance reviewer :param stance_reviewer: a `SentStanceReviewer` dict :returns: a hashcode that tries to capture the identity of the stance reviewer :rtype: str """ return hashu.hash_dict( dictu.select_keys(stance_reviewer, sentStanceReviewer_schema['ident_keys']))
def calc_sim_reviewer_id(sim_reviewer): """Calculates a unique id code for a sim_reviewer :param sim_reviewer a `SemSentSimReviewer` dict :returns: a hashcode that tries to capture the identit of the sim_reviewer :rtype: str """ return hashu.hash_dict( dictu.select_keys(sim_reviewer, [ '@type', 'name', 'dateCreated', 'softwareVersion', 'isBasedOn', 'launchConfiguration' ]))
def calc_identifier(item, cfg): """Given a data item, calculate its identifier Any nested items must already have an identifier. The default identifier is given by a subset of its fields. :param item: The item for which to calculate the identifier :param cfg: config options :returns: a unique identifier within acred. :rtype: str """ assert content.is_item(item) assert 'identifier' not in item to_id = item_with_refs(dictu.select_keys(item, ident_keys(item, cfg)), cfg) return hashu.hash_dict(to_id)
def bot_info(sub_bots, cfg): """Returns a description for this ArticleCredReviewer :param sub_bots: bot items used by this ArticleCredReviewer :param cfg: config options :returns: an `ArticleCredReviewer` :rtype: dict """ result = { '@context': content.ci_context, '@type': 'ArticleCredReviewer', 'additionalType': content.super_types('ArticleCredReviewer'), 'name': 'ESI Article Credibility Reviewer', 'description': 'Reviews the credibility of an article by (i) semantically analysing it to detect relevant claims (ii) getting credibility reviews for the claims and (iii) getting a credibility reviews for the site(s) that published the article.', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-01T17:02:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python', 'Cogito'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': { # any launch configs? }, 'taskConfiguration': { 'cred_conf_threshold': cfg.get('cred_conf_threshold', 0.7), 'max_claims_in_doc': int(cfg.get('max_claims_in_doc', 5)), 'relsents_in_colls': cfg.get('relsents_in_colls', [ 'generic', 'pilot-se', 'pilot-gr', 'pilot-at', 'factcheckers', 'fc-dev' ]), 'target_url_collect_coll': cfg.get('target_url_collect_coll', cfg.get('default_url_collect_coll', None)), 'acred_review_format': cfg.get('acred_review_format', 'schema.org') } } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'SentPolarityReviewer', 'name': 'ESI Sentence Polarity Reviewer', 'description': 'Estimates the polar similarity between two sentences', 'additionalType': content.super_types('SentPolarityReviewer'), 'softwareVersion': version, 'dateCreated': '2020-03-27T22:54:00Z', 'url': 'http://coinform.eu/bot/SentencePolarSimilarityReviewer/%s' % version, 'applicationSuite': 'Co-inform', 'author': bot_describer.esiLab_organization(), 'isBasedOn': sub_bots, 'launchConfiguration': {} } ident = hashu.hash_dict( dictu.select_keys(result, content.ident_keys(result))) return {**result, 'identifier': ident}
def bot_info(cfg): result = { '@context': ci_context, '@type': 'ClaimReviewNormalizer', 'name': 'ESI ClaimReview Credibility Normalizer', 'description': 'Analyses the alternateName and numerical rating value for a ClaimReview and tries to convert that into a normalised credibility rating', 'additionalType': content.super_types('ClaimReviewNormalizer'), 'author': bot_describer.esiLab_organization(), 'dateCreated': dateCreated, 'softwareVersion': version, 'url': 'http://coinform.eu/bot/ClaimReviewNormalizer/%s' % version, 'applicationSuite': 'Co-inform', 'isBasedOn': [], # no dependencies 'launchConfiguration': {} # no configs? } ident = hashu.hash_dict( dictu.select_keys(result, content.ident_keys(result))) return {**result, 'identifier': ident}
def misinfoMeSourceCredReviewer(): result = { '@context': 'http://coinform.eu', '@type': 'MisinfoMeSourceCredReviewer', # Since we don't control this bot, we assume versions, # an thus results, may change on a weekly basis # So we use the start of the current week as the version 'softwareVersion': isodate.start_of_week_utc_timestamp(datetime.datetime.utcnow()), 'additionalType': content.super_types('MisinfoMeSourceCredReviewer'), 'url': misinfome_url, 'applicationSuite': 'MisinfoMe' } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result))) }
def bot_info(sub_bots, cfg): result = { '@context': ci_context, '@type': 'CredReviewer', 'additionalType': content.super_types('CredReviewer'), 'name': 'ESI Top-level Credibility Reviewer', 'description': 'Reviews the credibility of various supported content items, mainly by delegating to the appropriate content-level reviewer', 'author': bot_describer.esiLab_organization(), 'dateCreated': '2020-04-02T18:05:00Z', 'applicationCategory': ['Disinformation Detection'], 'softwareRequirements': ['python'], 'softwareVersion': version, 'executionEnvironment': bot_describer.inspect_execution_env(), 'isBasedOn': sub_bots, 'launchConfiguration': {}, 'taskConfiguration': {} } return { **result, 'identifier': hashu.hash_dict(dictu.select_keys( result, content.itemref_keys(result) ))}
def test_hash_dict_01(): # test order independence hash1 = hashu.hash_dict({'a': 'b', 'c': 'd'}) hash2 = hashu.hash_dict({'c': 'd', 'a': 'b'}) assert hash1 == hash2