예제 #1
0
def bot_info(sub_bots, cfg):
    """Returns a description for this AggQSentCredReviewer

    :param cfg: 
    :returns: 
    :rtype: 
    """
    result = {
        '@context': ci_context,
        '@type': 'AggQSentCredReviewer',
        'additionalType': content.super_types('AggQSentCredReviewer'),
        'name': 'ESI Aggregate Query Sentence Credibility Reviewer',
        'description':
        'Reviews the credibility of a query setence by comparing it to semantically similar sentences in the Co-inform DB and the credibility of those.',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-03-19T15:09:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {
            'acred_pred_claim_search_url':
            cfg.get('acred_pred_claim_search_url',
                    'http://localhost:8070/test/api/v1/claim/internal-search')
        }
    }
    return {
        **result, 'identifier':
        hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result)))
    }
예제 #2
0
def bot_info(sub_bots, cfg):
    """Returns a description for this TweetCredReviewer

    :param sub_bots: a list of bot items used by this TweetCredReviewer
    :param cfg: config options
    :returns: a `TweetCredReviewer` item
    :rtype: dict
    """
    result = {
        '@context': ci_context,
        '@type': 'TweetCredReviewer',
        'additionalType': content.super_types('TweetCredReviewer'),
        'name': 'ESI Tweet Credibility Reviewer',
        'description':
        'Reviews the credibility of a tweet by reviewing the sentences in the tweet and the (textual) documents linked by the tweet',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-04-02T18:00:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python', 'nltk', 'Cogito'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {},
        'taskConfiguration': {}
    }
    return {
        **result, 'identifier':
        hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result)))
    }
예제 #3
0
def bot_info(sub_bots, cfg):
    result = {
        '@context': ci_context,
        '@type': 'DBSentCredReviewer',
        'name': 'ESI DB Sentence Credibility Reviewer',
        'description': 'Estimates the credibility of a sentence in the Co-inform DB based on known ClaimReviews or websites where the sentence has been published.',
        'additionalType': content.super_types('DBSentCredReviewer'),
        'author': bot_describer.esiLab_organization(),
        'dateCreated': dateCreated,
        'softwareVersion': version,
        'url': 'http://coinform.eu/bot/DBSentCredReviewer/%s' % version,
        'applicationSuite': 'Co-inform',
        'isBasedOn': sub_bots, 
        'launchConfiguration': {
            'factchecker_website_to_qclaim_confidence_penalty_factor': float(
                cfg.get('factchecker_website_to_qclaim_confidence_penalty_factor', 0.5)),
            'acred_factchecker_urls': cfg.get('acred_factchecker_urls', [])
        }
    }
    ident = hashu.hash_dict(dictu.select_keys(
        result, content.ident_keys(result)))
    return {
        **result,
        'identifier': ident
    }
예제 #4
0
def calc_worth_review_id(worth_review):
    """Calculates a unique id code for a worth review

    :param worth_review: a `SentWorthReview` dict
    :returns: a hashcode that tries to capture the identity of the worth review
    :rtype: str
    """
    return hashu.hash_dict(
        dictu.select_keys(worth_review, sentWorthReview_schema['ident_keys']))
예제 #5
0
def calc_stance_reviewer_id(stance_reviewer):
    """Calculates a unique id code for a stance reviewer

    :param stance_reviewer: a `SentStanceReviewer` dict
    :returns: a hashcode that tries to capture the identity of the stance reviewer
    :rtype: str
    """
    return hashu.hash_dict(
        dictu.select_keys(stance_reviewer,
                          sentStanceReviewer_schema['ident_keys']))
예제 #6
0
def test_select_keys_02():
    d1 = {'a': 1, 'b': 2,
          'c': {
              'd': 'e',
              'f': 'g'}}
    d2 = {'a': 1,
          'c': {
              'd': 'e',
              'f': 'g'}}
    d3 = dictu.select_keys(d1, ['a', 'c'])
    assert d2 == d3
예제 #7
0
def calc_sim_reviewer_id(sim_reviewer):
    """Calculates a unique id code for a sim_reviewer
    
    :param sim_reviewer a `SemSentSimReviewer` dict
    :returns: a hashcode that tries to capture the identit of the sim_reviewer
    :rtype: str
    """
    return hashu.hash_dict(
        dictu.select_keys(sim_reviewer, [
            '@type', 'name', 'dateCreated', 'softwareVersion', 'isBasedOn',
            'launchConfiguration'
        ]))
예제 #8
0
파일: itnorm.py 프로젝트: expertailab/acred
def calc_identifier(item, cfg):
    """Given a data item, calculate its identifier

    Any nested items must already have an identifier.

    The default identifier is given by a subset of its fields.

    :param item: The item for which to calculate the identifier
    :param cfg: config options
    :returns: a unique identifier within acred.
    :rtype: str
    """
    assert content.is_item(item)
    assert 'identifier' not in item
    to_id = item_with_refs(dictu.select_keys(item, ident_keys(item, cfg)), cfg)
    return hashu.hash_dict(to_id)
예제 #9
0
def bot_info(sub_bots, cfg):
    """Returns a description for this ArticleCredReviewer

    :param sub_bots: bot items used by this ArticleCredReviewer
    :param cfg: config options
    :returns: an `ArticleCredReviewer`
    :rtype: dict
    """
    result = {
        '@context': content.ci_context,
        '@type': 'ArticleCredReviewer',
        'additionalType': content.super_types('ArticleCredReviewer'),
        'name': 'ESI Article Credibility Reviewer',
        'description':
        'Reviews the credibility of an article by (i) semantically analysing it to detect relevant claims (ii) getting credibility reviews for the claims and (iii) getting a credibility reviews for the site(s) that published the article.',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-04-01T17:02:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python', 'Cogito'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {
            # any launch configs?
        },
        'taskConfiguration': {
            'cred_conf_threshold':
            cfg.get('cred_conf_threshold', 0.7),
            'max_claims_in_doc':
            int(cfg.get('max_claims_in_doc', 5)),
            'relsents_in_colls':
            cfg.get('relsents_in_colls', [
                'generic', 'pilot-se', 'pilot-gr', 'pilot-at', 'factcheckers',
                'fc-dev'
            ]),
            'target_url_collect_coll':
            cfg.get('target_url_collect_coll',
                    cfg.get('default_url_collect_coll', None)),
            'acred_review_format':
            cfg.get('acred_review_format', 'schema.org')
        }
    }
    return {
        **result, 'identifier':
        hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result)))
    }
예제 #10
0
def bot_info(sub_bots, cfg):
    result = {
        '@context': ci_context,
        '@type': 'SentPolarityReviewer',
        'name': 'ESI Sentence Polarity Reviewer',
        'description': 'Estimates the polar similarity between two sentences',
        'additionalType': content.super_types('SentPolarityReviewer'),
        'softwareVersion': version,
        'dateCreated': '2020-03-27T22:54:00Z',
        'url':
        'http://coinform.eu/bot/SentencePolarSimilarityReviewer/%s' % version,
        'applicationSuite': 'Co-inform',
        'author': bot_describer.esiLab_organization(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {}
    }
    ident = hashu.hash_dict(
        dictu.select_keys(result, content.ident_keys(result)))
    return {**result, 'identifier': ident}
예제 #11
0
def bot_info(cfg):
    result = {
        '@context': ci_context,
        '@type': 'ClaimReviewNormalizer',
        'name': 'ESI ClaimReview Credibility Normalizer',
        'description':
        'Analyses the alternateName and numerical rating value for a ClaimReview and tries to convert that into a normalised credibility rating',
        'additionalType': content.super_types('ClaimReviewNormalizer'),
        'author': bot_describer.esiLab_organization(),
        'dateCreated': dateCreated,
        'softwareVersion': version,
        'url': 'http://coinform.eu/bot/ClaimReviewNormalizer/%s' % version,
        'applicationSuite': 'Co-inform',
        'isBasedOn': [],  # no dependencies
        'launchConfiguration': {}  # no configs?
    }
    ident = hashu.hash_dict(
        dictu.select_keys(result, content.ident_keys(result)))
    return {**result, 'identifier': ident}
예제 #12
0
def misinfoMeSourceCredReviewer():
    result = {
        '@context':
        'http://coinform.eu',
        '@type':
        'MisinfoMeSourceCredReviewer',
        # Since we don't control this bot, we assume versions,
        #  an thus results, may change on a weekly basis
        #  So we use the start of the current week as the version
        'softwareVersion':
        isodate.start_of_week_utc_timestamp(datetime.datetime.utcnow()),
        'additionalType':
        content.super_types('MisinfoMeSourceCredReviewer'),
        'url':
        misinfome_url,
        'applicationSuite':
        'MisinfoMe'
    }
    return {
        **result, 'identifier':
        hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result)))
    }
예제 #13
0
def bot_info(sub_bots, cfg):
    result = {
        '@context': ci_context,
        '@type': 'CredReviewer',
        'additionalType': content.super_types('CredReviewer'),
        'name': 'ESI Top-level Credibility Reviewer',
        'description': 'Reviews the credibility of various supported content items, mainly by delegating to the appropriate content-level reviewer',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-04-02T18:05:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {},
        'taskConfiguration': {}
    }
    return {
        **result,
        'identifier': hashu.hash_dict(dictu.select_keys(
            result,
            content.itemref_keys(result)
        ))}
예제 #14
0
def do_add_stance_labels(claim_sim_results, sim_threshold=0.7, max_len=128):
    start = citimings.start()

    def trim(s, max_len):
        s_toks = s.split(' ')
        if len(s_toks) > max_len:
            return ' '.join(s_toks[:max_len])
        else:
            return s
    
    stance_reqs = []
    for cresult in claim_sim_results:
        q_claim = cresult['q_claim']
        q_claim_toks = q_claim.split(' ')
        if len(q_claim_toks) > (2*max_len/3):
            logger.warning('Skip stance_pred: q_claim is too large %d ' % (
                len(q_claim_toks)))
            continue
        bods, rs_targets = [], []
        for rs in cresult['results']:
            if rs['similarity'] < sim_threshold:
                continue
            # docbod = rs.get('doc_content', None)
            # if docbod is not None:
            #     bods.append(trim(docbod, max_len))
            #     rs_targets.append({"rs": rs,
            #                        'field': 'doc_stance'})
            sent = rs.get('sentence', None)
            if sent is not None:
                bods.append(sent)
                rs_targets.append({'rs': rs,
                                   'field': 'sent_stance'})
        if len(bods) > 0:
            stance_reqs.append({
                'qclaim': q_claim,
                'doc_bodies': bods,
                'rs_targets': rs_targets})

    if len(stance_reqs) == 0:
        return claim_sim_results, citimings.timing(
            'predict_stances', start)

    labels, confs, stanceRev = predict_stances(
        # don't send the rs_targets to server
        [dictu.select_keys(sr, ['qclaim', 'doc_bodies'])
         for sr in stance_reqs])

    for csr in claim_sim_results:
        csr['stanceReviewer'] = stanceRev
    stance_docs_t = citimings.timing('doc_stance_pred', start)
    logger.info("Predicted stances %s with scores %s" % (labels, confs))

    rs_targets = [rs_target for req in stance_reqs
                  for rs_target in req['rs_targets']]
    assert len(rs_targets) == len(labels)
    assert len(confs) == len(labels)
    for rs_target, label, conf in zip(rs_targets, labels, confs):
        rs = rs_target['rs']
        field = rs_target['field']
        rs[field] = label
        rs['%s_confidence' % field] = conf


    return claim_sim_results, citimings.timing(
        'predict_stances', start,
        [stance_docs_t])
예제 #15
0
def test_select_keys_01():
    d1 = {'a': 1, 'b': 2}
    d2 = {'a': 1}
    d3 = dictu.select_keys(d1, ['a'])
    assert d2 == d3
예제 #16
0
def test_select_keys_04():
    d1 = {'a': 1, 'b': 2}
    d3 = dictu.select_keys(d1, ['e'])
    assert {} == d3