Ejemplo n.º 1
0
def bot_info(sub_bots, cfg):
    """Returns a description for this AggQSentCredReviewer

    :param cfg: 
    :returns: 
    :rtype: 
    """
    result = {
        '@context': ci_context,
        '@type': 'AggQSentCredReviewer',
        'additionalType': content.super_types('AggQSentCredReviewer'),
        'name': 'ESI Aggregate Query Sentence Credibility Reviewer',
        'description':
        'Reviews the credibility of a query setence by comparing it to semantically similar sentences in the Co-inform DB and the credibility of those.',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-03-19T15:09:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {
            'acred_pred_claim_search_url':
            cfg.get('acred_pred_claim_search_url',
                    'http://localhost:8070/test/api/v1/claim/internal-search')
        }
    }
    return {
        **result, 'identifier':
        hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result)))
    }
Ejemplo n.º 2
0
def bot_info(sub_bots, cfg):
    """Returns a description for this TweetCredReviewer

    :param sub_bots: a list of bot items used by this TweetCredReviewer
    :param cfg: config options
    :returns: a `TweetCredReviewer` item
    :rtype: dict
    """
    result = {
        '@context': ci_context,
        '@type': 'TweetCredReviewer',
        'additionalType': content.super_types('TweetCredReviewer'),
        'name': 'ESI Tweet Credibility Reviewer',
        'description':
        'Reviews the credibility of a tweet by reviewing the sentences in the tweet and the (textual) documents linked by the tweet',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-04-02T18:00:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python', 'nltk', 'Cogito'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {},
        'taskConfiguration': {}
    }
    return {
        **result, 'identifier':
        hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result)))
    }
Ejemplo n.º 3
0
def bot_info(sub_bots, cfg):
    result = {
        '@context': ci_context,
        '@type': 'DBSentCredReviewer',
        'name': 'ESI DB Sentence Credibility Reviewer',
        'description': 'Estimates the credibility of a sentence in the Co-inform DB based on known ClaimReviews or websites where the sentence has been published.',
        'additionalType': content.super_types('DBSentCredReviewer'),
        'author': bot_describer.esiLab_organization(),
        'dateCreated': dateCreated,
        'softwareVersion': version,
        'url': 'http://coinform.eu/bot/DBSentCredReviewer/%s' % version,
        'applicationSuite': 'Co-inform',
        'isBasedOn': sub_bots, 
        'launchConfiguration': {
            'factchecker_website_to_qclaim_confidence_penalty_factor': float(
                cfg.get('factchecker_website_to_qclaim_confidence_penalty_factor', 0.5)),
            'acred_factchecker_urls': cfg.get('acred_factchecker_urls', [])
        }
    }
    ident = hashu.hash_dict(dictu.select_keys(
        result, content.ident_keys(result)))
    return {
        **result,
        'identifier': ident
    }
Ejemplo n.º 4
0
def bot_info(sub_bots, cfg):
    """Returns a description for this ArticleCredReviewer

    :param sub_bots: bot items used by this ArticleCredReviewer
    :param cfg: config options
    :returns: an `ArticleCredReviewer`
    :rtype: dict
    """
    result = {
        '@context': content.ci_context,
        '@type': 'ArticleCredReviewer',
        'additionalType': content.super_types('ArticleCredReviewer'),
        'name': 'ESI Article Credibility Reviewer',
        'description':
        'Reviews the credibility of an article by (i) semantically analysing it to detect relevant claims (ii) getting credibility reviews for the claims and (iii) getting a credibility reviews for the site(s) that published the article.',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-04-01T17:02:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python', 'Cogito'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {
            # any launch configs?
        },
        'taskConfiguration': {
            'cred_conf_threshold':
            cfg.get('cred_conf_threshold', 0.7),
            'max_claims_in_doc':
            int(cfg.get('max_claims_in_doc', 5)),
            'relsents_in_colls':
            cfg.get('relsents_in_colls', [
                'generic', 'pilot-se', 'pilot-gr', 'pilot-at', 'factcheckers',
                'fc-dev'
            ]),
            'target_url_collect_coll':
            cfg.get('target_url_collect_coll',
                    cfg.get('default_url_collect_coll', None)),
            'acred_review_format':
            cfg.get('acred_review_format', 'schema.org')
        }
    }
    return {
        **result, 'identifier':
        hashu.hash_dict(dictu.select_keys(result, content.ident_keys(result)))
    }
Ejemplo n.º 5
0
def stance_reviewer(model_meta, in_dir):
    result = {
        '@context':
        'http://coinform.eu',
        '@type':
        'SentStanceReviewer',
        'additionalType':
        sentStanceReviewer_schema['super_types'],
        'name':
        'ESI Sentence Stance Reviewer',
        'description':
        'Assesses the stance between two sentences (e.g. agree, disagree, discuss) it was trained and evaluated on FNC-1 achieving 92% accuracy.',
        'author':
        bot_describer.esiLab_organization(),
        'dateCreated':
        '2020-01-13T15:18:00Z',
        'applicationCategory': ['NLP'],
        'applicationSubCategory': ['Stance Detection'],
        'applicationSuite': ['Co-inform'],
        'softwareRequirements': [
            'python', 'pytorch', 'transformers', 'RoBERTaModel',
            'RoBERTaTokenizer'
        ],
        'softwareVersion':
        '0.1.1',
        'executionEnvironment': {
            **bot_describer.inspect_execution_env(), 'cuda':
            torch.cuda.is_available()
        },
        'isBasedOn': [],
        'launchConfiguration': {
            'model':
            model_meta,
            'model_config':
            bot_describer.path_as_media_object(
                os.path.join(in_dir, 'config.json')),
            'pytorch_model':
            bot_describer.path_as_media_object(
                os.path.join(in_dir, 'pytorch_model.bin'))
        }
    }
    result['identifier'] = calc_stance_reviewer_id(result)
    return result
Ejemplo n.º 6
0
def bot_info(sub_bots, cfg):
    result = {
        '@context': ci_context,
        '@type': 'SentPolarityReviewer',
        'name': 'ESI Sentence Polarity Reviewer',
        'description': 'Estimates the polar similarity between two sentences',
        'additionalType': content.super_types('SentPolarityReviewer'),
        'softwareVersion': version,
        'dateCreated': '2020-03-27T22:54:00Z',
        'url':
        'http://coinform.eu/bot/SentencePolarSimilarityReviewer/%s' % version,
        'applicationSuite': 'Co-inform',
        'author': bot_describer.esiLab_organization(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {}
    }
    ident = hashu.hash_dict(
        dictu.select_keys(result, content.ident_keys(result)))
    return {**result, 'identifier': ident}
Ejemplo n.º 7
0
def worth_reviewer(model_meta, in_dir):
    result = {
        '@context':
        'http://coinform.eu',
        '@type':
        'SentCheckWorthinessReviewer',
        'additionalType':
        sentWorthReviewer_schema['super_types'],
        'name':
        'ESI Sentence Worth Reviewer',
        'description':
        'Assesses the worthiness of a sentence: CFS (whorty) NCS (unwhorty). It was trained and evaluated on a group of different datasets (CBD+Poynter+Clef\'19T1) achieving 95% accuracy.',
        'author':
        bot_describer.esiLab_organization(),
        'dateCreated':
        '2020-05-08T15:18:00Z',
        'applicationCategory': ['NLP'],
        'applicationSubCategory': ['Check-worthiness'],
        'applicationSuite': ['Co-inform'],
        'softwareRequirements': [
            'python', 'pytorch', 'transformers', 'RoBERTaModel',
            'RoBERTaTokenizer'
        ],
        'softwareVersion':
        '0.1.0',
        'executionEnvironment': {
            **bot_describer.inspect_execution_env(), 'cuda':
            torch.cuda.is_available()
        },
        'isBasedOn': [],
        'launchConfiguration': {
            'model':
            model_meta,
            'model_config':
            bot_describer.path_as_media_object(
                os.path.join(in_dir, 'config.json')),
            'pytorch_model':
            bot_describer.path_as_media_object(
                os.path.join(in_dir, 'pytorch_model.bin'))
        }
    }
    result['identifier'] = calc_worth_reviewer_id(result)
    return result
Ejemplo n.º 8
0
def bot_info(cfg):
    result = {
        '@context': ci_context,
        '@type': 'ClaimReviewNormalizer',
        'name': 'ESI ClaimReview Credibility Normalizer',
        'description':
        'Analyses the alternateName and numerical rating value for a ClaimReview and tries to convert that into a normalised credibility rating',
        'additionalType': content.super_types('ClaimReviewNormalizer'),
        'author': bot_describer.esiLab_organization(),
        'dateCreated': dateCreated,
        'softwareVersion': version,
        'url': 'http://coinform.eu/bot/ClaimReviewNormalizer/%s' % version,
        'applicationSuite': 'Co-inform',
        'isBasedOn': [],  # no dependencies
        'launchConfiguration': {}  # no configs?
    }
    ident = hashu.hash_dict(
        dictu.select_keys(result, content.ident_keys(result)))
    return {**result, 'identifier': ident}
Ejemplo n.º 9
0
def bot_info(sub_bots, cfg):
    result = {
        '@context': ci_context,
        '@type': 'CredReviewer',
        'additionalType': content.super_types('CredReviewer'),
        'name': 'ESI Top-level Credibility Reviewer',
        'description': 'Reviews the credibility of various supported content items, mainly by delegating to the appropriate content-level reviewer',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-04-02T18:05:00Z',
        'applicationCategory': ['Disinformation Detection'],
        'softwareRequirements': ['python'],
        'softwareVersion': version,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': sub_bots,
        'launchConfiguration': {},
        'taskConfiguration': {}
    }
    return {
        **result,
        'identifier': hashu.hash_dict(dictu.select_keys(
            result,
            content.itemref_keys(result)
        ))}
Ejemplo n.º 10
0
def sim_reviewer(vec_space, index_format):
    semenc_info = vec_space['semantic_encoder_info_fn']()
    result = {
        '@context': 'http://coinform.eu',
        '@type': 'SemSentSimReviewer',
        'additionalType': ['SoftwareApplication', 'Bot'],
        'name': 'ESI Sentence Similarity Reviewer %s' % index_format,
        'description':
        'Claim neural index that uses a semantic similarity measure based on a semantic encoder. It achieved 83% accuracy on STS-B.',
        'author': bot_describer.esiLab_organization(),
        'dateCreated': '2020-03-19T15:09:00Z',
        'applicationCategory': ['NLP'],
        'applicationSubCategory': ['SemanticSimilarity'],
        'applicationSuite': ['Co-inform'],
        'softwareRequirements': ['python', 'numpy'],
        'softwareVersion': '0.1.0-%s' % index_format,
        'executionEnvironment': bot_describer.inspect_execution_env(),
        'isBasedOn': [semenc_info],
        'launchConfiguration': {
            'vecSpace': vec_space['dataset_info']
        }
    }
    result['identifier'] = calc_sim_reviewer_id(result)
    return result
Ejemplo n.º 11
0
def load_tsv_vector_space(tsv_vecs_path, sep='\t'):
    """load the word embeddings file and create a vecspace dict
    that stores vectors with their correlated information and
    indices useful for searching the spece.

    :param tsv_vecs_path: path to upload the stored embeddings
    :type tsv_vecs_path: str
    :param sep: separator of the embeddings file
    :type sep: str
    :return: dictionary that contains the embeddings `labels`, the numpy array
    of word `vectors`, the created `faiss_index`, the `source` path
    of the embeddings and the number of embeddings dimensions `dim`
    :rtype: dict
    """
    labels = []
    vectors = []
    start = time.time()
    logger.info('Loading vectors from %s' % tsv_vecs_path)
    ndims = None
    with open(tsv_vecs_path, 'r', encoding='utf-8') as vecs_f:
        for line_idx, line in enumerate(vecs_f.readlines()):
            elems = line.split(sep)
            labels.append(elems[0])
            if ndims is None:
                ndims = len(elems[1:])
            msg = 'line %d, expecting %d dims, but %d' % (line_idx, ndims,
                                                          len(elems[1:]))
            assert ndims == len(elems[1:]), msg
            vectors.append(
                np.array(list(map(float, elems[1:])), dtype=np.float32))
        vectors = np.vstack(vectors)

        labels_set = set(labels)
        if len(labels_set) != len(labels):
            logger.warn("Repeated labels, %d vs %d" %
                        (len(labels), len(labels_set)))
        ndims = vectors.shape[1]
        assert ndims == ndims, '%d != %d' % (ndims, ndims)
    logger.info('Loaded %d vectors in %ds' % (len(labels),
                                              (time.time() - start)))
    nvectors = normalize(vectors)
    return {
        'labels': labels,
        'vectors': nvectors,
        'faiss_index': create_faiss_index(nvectors, ndims),
        'source': tsv_vecs_path,
        'dim': ndims,
        'dataset_info': {
            '@context':
            'http://schema.org',
            '@type':
            'Dataset',
            'name':
            'Co-inform Sentence embeddings',
            'identifier':
            hashu.sha256_file(tsv_vecs_path),
            'description':
            'Dataset of %d sentence embeddings extracted from claim reviews and articles collected as part of the Co-inform project'
            % len(labels),
            'dateCreated':
            isodate.as_utc_timestamp(os.path.getctime(tsv_vecs_path)),
            'dateModified':
            isodate.as_utc_timestamp(os.path.getmtime(tsv_vecs_path)),
            'creator':
            bot_describer.esiLab_organization(),
            'encoding': {
                '@type': 'MediaObject',
                'contentSize': bot_describer.readable_file_size(tsv_vecs_path),
                'encodingFormat': 'text/tab-separated-values'
            }
        }
    }