Example #1
0
def model_report(db, pnos, w2v_model, cluster_model, outdir, top_n=10):
    """
    Summarize the trait assigned to the document by the models. 
    Do so by returning the parse for the top_n clusters by strength
    """
    parsed_clusters = parse_clusters(cluster_model, w2v_model)
    dists = {name: cluster_distances(db, pno, w2v_model, cluster_model)
             for name,pno in pnos}
    cluster_parse_fn = '/'.join([outdir, 'parsed_clusters.p'])
    dist_fn = '/'.join([outdir, 'patent_dists.p'])
    tsne_fn = '/'.join([outdir, 'embedding_fig_tsne.png'])
    save_dict(cluster_parse_fn, parsed_clusters)
    save_dict(dist_fn, dists)
    embedding_fig(w2v_model, cluster_model, savefn = tsne_fn, n=150)
Example #2
0
def model_report(db, pnos, w2v_model, cluster_model, outdir, top_n=10):
    """
    Summarize the trait assigned to the document by the models. 
    Do so by returning the parse for the top_n clusters by strength
    """
    parsed_clusters = parse_clusters(cluster_model, w2v_model)
    dists = {
        name: cluster_distances(db, pno, w2v_model, cluster_model)
        for name, pno in pnos
    }
    cluster_parse_fn = '/'.join([outdir, 'parsed_clusters.p'])
    dist_fn = '/'.join([outdir, 'patent_dists.p'])
    tsne_fn = '/'.join([outdir, 'embedding_fig_tsne.png'])
    save_dict(cluster_parse_fn, parsed_clusters)
    save_dict(dist_fn, dists)
    embedding_fig(w2v_model, cluster_model, savefn=tsne_fn, n=150)
def main():
    db = MongoClient().patents
    inctr, outctr = cites_over_time(db,limit=None)
    save_dict('inctr.p', dict(inctr))
    save_dict('outctr.p', dict(outctr))
    in_dates, in_cites = zip(*inctr.items())
    out_dates, out_cites = zip(*outctr.items())
    f,(ax1, ax2) = plt.subplots(1,2,sharey=True)
    f.set_size_inches(18.5, 10.5)
    ax1.xaxis.set_major_formatter(ticker.FuncFormatter(lambda numdate, _: num2date(numdate).strftime('%Y-%m')))
    ax1.hist(map(date2num, in_dates), weights=in_cites, bins=150)
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Count')
    ax1.set_title('In-degrees over Time.')
    ax2.xaxis.set_major_formatter(ticker.FuncFormatter(lambda numdate, _: num2date(numdate).strftime('%Y-%m')))
    ax2.hist(map(date2num, out_dates), weights=out_cites, bins=150)
    ax2.set_xlabel('Date')
    ax2.set_ylabel('Count')
    ax2.set_title('Out-Degrees over Time.')
    plt.savefig('cites_over_time.png')