コード例 #1
0
def _score_by_text_frequencies(search, connection, feature, texts,
                               target_units, source_units, features, stoplist,
                               distance_basis, max_distance, tag_helper):
    source_frequencies_getter = _lookup_wrapper(
        get_inverse_text_frequencies(connection, feature, texts[0].id))
    target_frequencies_getter = _lookup_wrapper(
        get_inverse_text_frequencies(connection, feature, texts[1].id))
    return _score(search, connection, target_units, source_units, features,
                  stoplist, distance_basis, max_distance,
                  source_frequencies_getter, target_frequencies_getter,
                  tag_helper)
コード例 #2
0
def _get_inv_lemmata_freq_getter(conn, freq_basis, text_options, latin_units):
    if freq_basis != 'texts':
        return _inverse_averaged_freq_getter(
            get_corpus_frequencies(conn, 'lemmata',
                                   text_options.text.language), latin_units)
    return _lookup_wrapper(
        get_inverse_text_frequencies(conn, 'lemmata', text_options.text.id))
コード例 #3
0
def test_mini_text_frequencies(minipop, mini_latin_metadata,
                               mini_greek_metadata, v3checker):
    all_text_metadata = [
        m for m in itertools.chain.from_iterable(
            [mini_latin_metadata, mini_greek_metadata])
    ]
    title2id = {
        t.title: t.id
        for t in minipop.find(Text.collection,
                              title=[m['title'] for m in all_text_metadata])
    }
    for metadata in all_text_metadata:
        v3freqs = _load_v3_mini_text_stem_freqs(minipop,
                                                Text.json_decode(metadata),
                                                v3checker)
        text_id = title2id[metadata['title']]
        v5freqs = get_inverse_text_frequencies(minipop, 'lemmata', text_id)
        for form_index, freq in v5freqs.items():
            assert form_index in v3freqs
            assert math.isclose(v3freqs[form_index], 1.0 / freq)
コード例 #4
0
ファイル: multitext.py プロジェクト: tesserae/tesserae-v5
def compute_inverse_frequencies(connection, feature_type, text_id):
    """Compute inverse text frequencies by specified feature type

    Parameters
    ----------
    connection : tesserae.db.mongodb.TessMongoConnection
    feature_type : str
        Feature category to be used in calculating frequencies
    text_id : bson.objectid.ObjectId
        ObjectId of the text whose feature frequencies are to be computed

    Returns
    -------
    1d np.array
        index by form index to obtain corresponding inverse text frequency
    """
    inv_freqs_dict = get_inverse_text_frequencies(connection, feature_type,
                                                  text_id)
    inverse_frequencies = np.zeros(max(inv_freqs_dict) + 1)
    inverse_frequencies[[k for k in inv_freqs_dict.keys()]] = \
        [v for v in inv_freqs_dict.values()]
    return inverse_frequencies