def model_report(db, pnos, w2v_model, cluster_model, outdir, top_n=10): """ Summarize the trait assigned to the document by the models. Do so by returning the parse for the top_n clusters by strength """ parsed_clusters = parse_clusters(cluster_model, w2v_model) dists = {name: cluster_distances(db, pno, w2v_model, cluster_model) for name,pno in pnos} cluster_parse_fn = '/'.join([outdir, 'parsed_clusters.p']) dist_fn = '/'.join([outdir, 'patent_dists.p']) tsne_fn = '/'.join([outdir, 'embedding_fig_tsne.png']) save_dict(cluster_parse_fn, parsed_clusters) save_dict(dist_fn, dists) embedding_fig(w2v_model, cluster_model, savefn = tsne_fn, n=150)
def model_report(db, pnos, w2v_model, cluster_model, outdir, top_n=10): """ Summarize the trait assigned to the document by the models. Do so by returning the parse for the top_n clusters by strength """ parsed_clusters = parse_clusters(cluster_model, w2v_model) dists = { name: cluster_distances(db, pno, w2v_model, cluster_model) for name, pno in pnos } cluster_parse_fn = '/'.join([outdir, 'parsed_clusters.p']) dist_fn = '/'.join([outdir, 'patent_dists.p']) tsne_fn = '/'.join([outdir, 'embedding_fig_tsne.png']) save_dict(cluster_parse_fn, parsed_clusters) save_dict(dist_fn, dists) embedding_fig(w2v_model, cluster_model, savefn=tsne_fn, n=150)
def main(): db = MongoClient().patents inctr, outctr = cites_over_time(db,limit=None) save_dict('inctr.p', dict(inctr)) save_dict('outctr.p', dict(outctr)) in_dates, in_cites = zip(*inctr.items()) out_dates, out_cites = zip(*outctr.items()) f,(ax1, ax2) = plt.subplots(1,2,sharey=True) f.set_size_inches(18.5, 10.5) ax1.xaxis.set_major_formatter(ticker.FuncFormatter(lambda numdate, _: num2date(numdate).strftime('%Y-%m'))) ax1.hist(map(date2num, in_dates), weights=in_cites, bins=150) ax1.set_xlabel('Date') ax1.set_ylabel('Count') ax1.set_title('In-degrees over Time.') ax2.xaxis.set_major_formatter(ticker.FuncFormatter(lambda numdate, _: num2date(numdate).strftime('%Y-%m'))) ax2.hist(map(date2num, out_dates), weights=out_cites, bins=150) ax2.set_xlabel('Date') ax2.set_ylabel('Count') ax2.set_title('Out-Degrees over Time.') plt.savefig('cites_over_time.png')