Ejemplo n.º 1
0
def plot_scores(x_s, y_s, max_x=np.inf, color='g.', label=None, clear=True):
    plot_xs = sorted(x_s[x_s < max_x])
    indices = sorted(range(len(plot_xs)), cmp=ind_cmp(x_s))
    if clear:
        clf()
    #plot(plot_xs, [tc_scores[i] for i in indices], '.')
    #plot(plot_xs, [pc_scores[i] for i in indices], 'r.-')
    return plot(plot_xs, [y_s[i] for i in indices], color, label=label)[0]
Ejemplo n.º 2
0
def which_topics(words, voc_size=1000):
    which_topics = {}
    sig_words = get_sig_words(words)
    top_words = [sorted(range(len(topic)), cmp=ind_cmp(topic), reverse=True) 
                 for topic in words]
    for word in range(voc_size):
        which_topics[word] = []
        for i in range(len(top_words)):
            if top_words[i].index(word) < sig_words[i]:
                which_topics[word].append(i)
    return which_topics
Ejemplo n.º 3
0
def plot_stats(stats, x, algs=None, flat=False):
    if algs == None:
        algs = ['lda', 'ldaT', 'ldaC', 'projector', 'Baseline', 'LSI']
    colors = 'brgmcyk'
    clf()
    k = int(stats['k'][0])
    for i in range(len(algs)):
        color = colors[i]
        alg = algs[i]
        if flat and alg != 'Baseline':
            alg += '-' + str(k)
        x_s = stats[x]
        scores = stats[alg]
        indices = sorted(range(len(x_s)), cmp=ind_cmp(x_s))
        plot(sorted(x_s), [scores[i] for i in indices], color + '.', label=alg)
        legend(loc="best")
Ejemplo n.º 4
0
def plot_extract(flat_results, n_s=[1000, 1500, 2000, 2500], y='projector-20',
                 clear=True):
    if clear:
        clf()
    colors = {1000:'b', 1500:'g', 2000:'r', 2500:'k'}
    markers = {1000:'+', 1500:'x', 2000:'o', 2500:'.'}
    plots = []
    for n in n_s:
        results = extract(flat_results, n)
        indices = sorted(range(len(results['l'])), cmp=ind_cmp(results['l']))
        if y == 'ratio':
            y_s = np.array([results['projector-20'][i] for i in indices]) / \
                  np.array([results['ldaT-20'][i] for i in indices])
        else:
            y_s = [results[y][i] for i in indices]
        plots.append(plot(sorted(results['l']), y_s,
                          colors[n] + '-' + markers[n], 
                          label=str(n) + ' documents')[0])
    legend(loc='best')
    return plots
Ejemplo n.º 5
0
def plot_deep_stats(stats, k=20, index=0, clear=True):
    """plot the output of get_deep_stats()
    """
    if clear:
        clf()
    #algs = ['lda', 'ldaT', 'ldaC', 'projector', 'Baseline', 'LSI']
    algs = ['lda', 'ldaC', 'projector', 'Baseline', 'LSI', 'kmeans', 
            'malletlda']
    markers = ['.', 'x', 'd', 'o', '-', '*', '^']
    colors = 'bgrymkc'
    for alg in algs:
        if alg != 'Baseline':
           alg_name = alg + '-' + str(k)
        else:
            alg_name = alg
        sig_words = sorted(stats[index]['sig_words'])
        indices = sorted(range(len(sig_words)), 
                         cmp=ind_cmp(stats[index]['sig_words']))
        scores = stats[index][alg_name]
        plot(sig_words, [scores[i] for i in indices], 
             colors[algs.index(alg)] + markers[algs.index(alg)] + '-', 
             label=alg)
    legend(loc='best')