def plot_scores(x_s, y_s, max_x=np.inf, color='g.', label=None, clear=True): plot_xs = sorted(x_s[x_s < max_x]) indices = sorted(range(len(plot_xs)), cmp=ind_cmp(x_s)) if clear: clf() #plot(plot_xs, [tc_scores[i] for i in indices], '.') #plot(plot_xs, [pc_scores[i] for i in indices], 'r.-') return plot(plot_xs, [y_s[i] for i in indices], color, label=label)[0]
def which_topics(words, voc_size=1000): which_topics = {} sig_words = get_sig_words(words) top_words = [sorted(range(len(topic)), cmp=ind_cmp(topic), reverse=True) for topic in words] for word in range(voc_size): which_topics[word] = [] for i in range(len(top_words)): if top_words[i].index(word) < sig_words[i]: which_topics[word].append(i) return which_topics
def plot_stats(stats, x, algs=None, flat=False): if algs == None: algs = ['lda', 'ldaT', 'ldaC', 'projector', 'Baseline', 'LSI'] colors = 'brgmcyk' clf() k = int(stats['k'][0]) for i in range(len(algs)): color = colors[i] alg = algs[i] if flat and alg != 'Baseline': alg += '-' + str(k) x_s = stats[x] scores = stats[alg] indices = sorted(range(len(x_s)), cmp=ind_cmp(x_s)) plot(sorted(x_s), [scores[i] for i in indices], color + '.', label=alg) legend(loc="best")
def plot_extract(flat_results, n_s=[1000, 1500, 2000, 2500], y='projector-20', clear=True): if clear: clf() colors = {1000:'b', 1500:'g', 2000:'r', 2500:'k'} markers = {1000:'+', 1500:'x', 2000:'o', 2500:'.'} plots = [] for n in n_s: results = extract(flat_results, n) indices = sorted(range(len(results['l'])), cmp=ind_cmp(results['l'])) if y == 'ratio': y_s = np.array([results['projector-20'][i] for i in indices]) / \ np.array([results['ldaT-20'][i] for i in indices]) else: y_s = [results[y][i] for i in indices] plots.append(plot(sorted(results['l']), y_s, colors[n] + '-' + markers[n], label=str(n) + ' documents')[0]) legend(loc='best') return plots
def plot_deep_stats(stats, k=20, index=0, clear=True): """plot the output of get_deep_stats() """ if clear: clf() #algs = ['lda', 'ldaT', 'ldaC', 'projector', 'Baseline', 'LSI'] algs = ['lda', 'ldaC', 'projector', 'Baseline', 'LSI', 'kmeans', 'malletlda'] markers = ['.', 'x', 'd', 'o', '-', '*', '^'] colors = 'bgrymkc' for alg in algs: if alg != 'Baseline': alg_name = alg + '-' + str(k) else: alg_name = alg sig_words = sorted(stats[index]['sig_words']) indices = sorted(range(len(sig_words)), cmp=ind_cmp(stats[index]['sig_words'])) scores = stats[index][alg_name] plot(sig_words, [scores[i] for i in indices], colors[algs.index(alg)] + markers[algs.index(alg)] + '-', label=alg) legend(loc='best')