コード例 #1
0
def test_get_df_for_words_with_no_results(words_no_results):
    """
        This test case fails for some unknown reason

        Fails. Palmetto can not handle underscores.
    """
    palmetto = Palmetto()
    doc_ids = palmetto.get_df_for_words(words_no_results)
    for i in range(0, len(words_no_results)):
        assert (doc_ids[i][0] == words_no_results[i])
コード例 #2
0
def build_weighted_graph(synset_packs):
    """
    Build a weighted graph out of synset packs.

    Return a list of tuples with weights such as:
    [
        [((item_a, item_b), 1.534), (item_a, item_c), 1.1234],
        [((item_c, item_d), 1.34), (item_c, item_f), 1.24],
        ...
    ]
    """
    # get all document frequencies
    # collect all terms
    words = []
    for i in range(0, len(synset_packs)):
        for j in range(0, len(synset_packs[i][1])):
            synset_packs[i][1][j] = synset_packs[i][1][j].name().split(".")[0]
            words.append(synset_packs[i][1][j])
        if len(synset_packs[i][1]) == 0:
            synset_packs[i][1].append(synset_packs[i][0])
            words.append(synset_packs[i][0])

    palmetto = Palmetto()
    doc_id_tuples = palmetto.get_df_for_words(words)
    doc_id_tuples_dict = dict(doc_id_tuples)

    edges = []
    for i in range(0, len(synset_packs)):
        for j in range(i + 1, len(synset_packs)):
            edge = []
            for that_word in synset_packs[j][1]:
                for this_word in synset_packs[i][1]:
                    edge_item = (
                        (
                            this_word,
                            that_word
                        ),
                        calculate_coherence(
                            this_word,
                            that_word,
                            doc_id_tuples_dict
                        )
                    )
                    edge.append(edge_item)
            edges.append(sorted(edge, key=lambda x: x[1], reverse=True))
    return edges
コード例 #3
0
def build_weighted_graph(synset_packs):
    """
    Build a weighted graph out of synset packs.

    Return a list of tuples with weights such as:
    [
        [((item_a, item_b), 1.534), (item_a, item_c), 1.1234],
        [((item_c, item_d), 1.34), (item_c, item_f), 1.24],
        ...
    ]
    """
    # get all document frequencies
    # collect all terms
    words = []
    for i in range(0, len(synset_packs)):
        for j in range(0, len(synset_packs[i][1])):
            synset_packs[i][1][j] = synset_packs[i][1][j].name().split(".")[0]
            words.append(synset_packs[i][1][j])
        if len(synset_packs[i][1]) == 0:
            synset_packs[i][1].append(synset_packs[i][0])
            words.append(synset_packs[i][0])

    palmetto = Palmetto()
    doc_id_tuples = palmetto.get_df_for_words(words)
    doc_id_tuples_dict = dict(doc_id_tuples)

    edges = []
    for i in range(0, len(synset_packs)):
        for j in range(i + 1, len(synset_packs)):
            edge = []
            for that_word in synset_packs[j][1]:
                for this_word in synset_packs[i][1]:
                    edge_item = ((this_word, that_word),
                                 calculate_coherence(this_word, that_word,
                                                     doc_id_tuples_dict))
                    edge.append(edge_item)
            edges.append(sorted(edge, key=lambda x: x[1], reverse=True))
    return edges
コード例 #4
0
def test_get_df_for_words(words):
    palmetto = Palmetto()
    doc_ids = palmetto.get_df_for_words(words)
    for i in range(0, len(words)):
        assert (doc_ids[i][0] == words[i])
コード例 #5
0
import sys
import palmettopy.exceptions
from palmettopy.palmetto import Palmetto

words = [
    "cherry", "pie", "cr_eam", "apple", "orange", "banana", "pineapple",
    "plum", "pig", "cra_cker", "so_und", "kit"
]
palmetto = Palmetto()
try:
    result = palmetto.get_df_for_words(words)
    sys.exit(0)
except palmettopy.exceptions.EndpointDown:
    sys.exit(1)
コード例 #6
0
ファイル: run.py プロジェクト: shaya7/palmetto-py
from palmettopy.palmetto import Palmetto
words = [
    "cherry", "pie", "cr_eam", "apple", "orange", "banana", "pineapple",
    "plum", "pig", "cra_cker", "so_und", "kit"
]
#words = ['label', 'type', 'character', 'subject', 'discipline', 'topic', 'national', 'home_page', 'foundation', 'basis', 'foundation_garment', 'initiation']
#words = ['label', 'type', 'character', 'subject', 'discipline', 'topic', 'national', 'home_page', 'foundation', 'basis', 'foundation_garment']
#words = ['label', 'type', 'character', 'subject', 'discipline', 'topic', 'national', 'home_page', 'foundation', 'basis']
palmetto = Palmetto()
palmetto.get_df_for_words(words)
#print(palmetto.get_coherence_fast(words))
#print(palmetto.get_coherence(words))