def mds_author_term(fname1='corr_2d_mds_authors_by_terms.png', fname2='corr_2d_mds_terms_by_authors.png'): bib_data = get_bib_data() mat, authors, term_list, authors_cnt = get_author_by_term_mat(bib_data, tfreq=5, afreq=10) adist = dist.squareform(dist.pdist(mat, 'correlation')) coords,_ = mds(adist, dim=2) fig = plt.figure() fig.clf() plt.xlim(-15, 20) plt.ylim(-15, 20) for label, x, y in zip(authors, coords[:,0], coords[:,1]): plt.annotate(label, xy=(x*20,y*20)) plt.axis('off') plt.savefig(fname1) mat = mat.T tdist = dist.squareform(dist.pdist(mat, 'correlation')) coords, _ = mds(tdist, dim=2) #fig = plt.figure() fig.clf(); plt.xlim(-80,100) plt.ylim(-100,100) for label, x, y in zip(term_list, coords[:,0], coords[:,1]): plt.annotate(label, xy=(x*500,y*500)) plt.axis('off') plt.savefig(fname2)
def mds_year_term(fname1='corr_2d_mds_years_by_terms.png', fname2='corr_2d_mds_terms_by_years.png'): bib_data = get_bib_data() mat, years, term_list, years_cnt = get_year_by_term_mat(bib_data, freq=5) ydist = dist.squareform(dist.pdist(mat, 'correlation')) coords,_ = mds(ydist, dim=2) mat = mat.T tdist = dist.squareform(dist.pdist(mat, 'correlation')) coords, _ = mds(tdist, dim=2) fig = plt.figure() fig.clf(); plt.xlim(-80,100) plt.ylim(-100,100) for label, x, y in zip(term_list, coords[:,0], coords[:,1]): plt.annotate(label, xy=(x*100,y*100)) plt.axis('off') plt.savefig(fname2)
def MDS_A_by_B(mat, labels, fname): adist = dist.squareform(dist.pdist(mat, 'correlation')) coords,_ = mds(adist, dim=2) fig = plt.figure() fig.clf() plt.xlim(-15, 20) plt.ylim(-15, 20) for label, x, y in zip(labels, coords[:,0], coords[:,1]): plt.annotate(label, xy=(x*20,y*20)) #plt.axis('off') plt.savefig(fname)