def graph_present(): data = load_rep_data() df = pd.DataFrame(data) sub = df[['name', 'present', 'yea', 'nay', 'absent']].sort_values(by='yea', ascending=False) sns.barplot(x="yea", y="name", data=sub, label='small') plt.show() return
def graph_present_by_party(): data = load_rep_data() df = pd.DataFrame(data) sub = df[['name', 'present', 'party', 'yea', 'nay']] group = sub.groupby(['party']).sum().sort_values(by='present', ascending=True) group.plot(kind='barh', ax=ax) plt.show() return
def make_stat_array_votes(): data = load_rep_data() votes = [x['votes'] for x in data] names = [x['name'] + ' (' + x['party'] + ')' for x in data] sizes = [x['present'] for x in data] parties = [x['party'] for x in data] result = np.array(votes) return {'mat': votes, 'names': names, 'sizes': sizes, 'parties': parties}
def plot_solidarity_index(): data = load_rep_data() mat = [] for d in data: mat.append(d['votes']) line = np.sum(mat, axis=0) plt.matshow(line) print line.shape plt.show() return
def show_votes(): data = load_rep_data() mat = [] labels = [] for d in data: mat.append(d['votes'][:100]) labels.append(d['name']) mat = np.array(mat) ax.matshow(mat, cmap=plt.cm.get_cmap('coolwarm'), aspect='auto') ax.set_yticklabels(['']+labels) plt.show() return
def text_to_vectors(): data = load_rep_data()#[:40] names = [x['name'] + ' (' + x['party'] + ')' for x in data] quotes = [x['quotes'].lower() for x in data] sizes = [x['present'] for x in data] parties = [x['party'] for x in data] vectorizer = TfidfVectorizer\ (min_df=25, stop_words=stop_words, strip_accents='unicode', lowercase=True, ngram_range=(1, 2), norm='l2', smooth_idf=True, sublinear_tf=False, use_idf=True, analyzer='word' ) print 'vectorizing ...' X = vectorizer.fit_transform(quotes) D = -(X * X.T).todense() return {'mat': D, 'names': names, 'sizes': sizes, 'parties': parties}