def test_get_df_for_words_with_no_results(words_no_results): """ This test case fails for some unknown reason Fails. Palmetto can not handle underscores. """ palmetto = Palmetto() doc_ids = palmetto.get_df_for_words(words_no_results) for i in range(0, len(words_no_results)): assert (doc_ids[i][0] == words_no_results[i])
def build_weighted_graph(synset_packs): """ Build a weighted graph out of synset packs. Return a list of tuples with weights such as: [ [((item_a, item_b), 1.534), (item_a, item_c), 1.1234], [((item_c, item_d), 1.34), (item_c, item_f), 1.24], ... ] """ # get all document frequencies # collect all terms words = [] for i in range(0, len(synset_packs)): for j in range(0, len(synset_packs[i][1])): synset_packs[i][1][j] = synset_packs[i][1][j].name().split(".")[0] words.append(synset_packs[i][1][j]) if len(synset_packs[i][1]) == 0: synset_packs[i][1].append(synset_packs[i][0]) words.append(synset_packs[i][0]) palmetto = Palmetto() doc_id_tuples = palmetto.get_df_for_words(words) doc_id_tuples_dict = dict(doc_id_tuples) edges = [] for i in range(0, len(synset_packs)): for j in range(i + 1, len(synset_packs)): edge = [] for that_word in synset_packs[j][1]: for this_word in synset_packs[i][1]: edge_item = ( ( this_word, that_word ), calculate_coherence( this_word, that_word, doc_id_tuples_dict ) ) edge.append(edge_item) edges.append(sorted(edge, key=lambda x: x[1], reverse=True)) return edges
def build_weighted_graph(synset_packs): """ Build a weighted graph out of synset packs. Return a list of tuples with weights such as: [ [((item_a, item_b), 1.534), (item_a, item_c), 1.1234], [((item_c, item_d), 1.34), (item_c, item_f), 1.24], ... ] """ # get all document frequencies # collect all terms words = [] for i in range(0, len(synset_packs)): for j in range(0, len(synset_packs[i][1])): synset_packs[i][1][j] = synset_packs[i][1][j].name().split(".")[0] words.append(synset_packs[i][1][j]) if len(synset_packs[i][1]) == 0: synset_packs[i][1].append(synset_packs[i][0]) words.append(synset_packs[i][0]) palmetto = Palmetto() doc_id_tuples = palmetto.get_df_for_words(words) doc_id_tuples_dict = dict(doc_id_tuples) edges = [] for i in range(0, len(synset_packs)): for j in range(i + 1, len(synset_packs)): edge = [] for that_word in synset_packs[j][1]: for this_word in synset_packs[i][1]: edge_item = ((this_word, that_word), calculate_coherence(this_word, that_word, doc_id_tuples_dict)) edge.append(edge_item) edges.append(sorted(edge, key=lambda x: x[1], reverse=True)) return edges
def test_get_df_for_words(words): palmetto = Palmetto() doc_ids = palmetto.get_df_for_words(words) for i in range(0, len(words)): assert (doc_ids[i][0] == words[i])
import sys import palmettopy.exceptions from palmettopy.palmetto import Palmetto words = [ "cherry", "pie", "cr_eam", "apple", "orange", "banana", "pineapple", "plum", "pig", "cra_cker", "so_und", "kit" ] palmetto = Palmetto() try: result = palmetto.get_df_for_words(words) sys.exit(0) except palmettopy.exceptions.EndpointDown: sys.exit(1)
from palmettopy.palmetto import Palmetto words = [ "cherry", "pie", "cr_eam", "apple", "orange", "banana", "pineapple", "plum", "pig", "cra_cker", "so_und", "kit" ] #words = ['label', 'type', 'character', 'subject', 'discipline', 'topic', 'national', 'home_page', 'foundation', 'basis', 'foundation_garment', 'initiation'] #words = ['label', 'type', 'character', 'subject', 'discipline', 'topic', 'national', 'home_page', 'foundation', 'basis', 'foundation_garment'] #words = ['label', 'type', 'character', 'subject', 'discipline', 'topic', 'national', 'home_page', 'foundation', 'basis'] palmetto = Palmetto() palmetto.get_df_for_words(words) #print(palmetto.get_coherence_fast(words)) #print(palmetto.get_coherence(words))