def dooropen_vs_indegree_plots(lim=250000, savefn='test.pdf', show=False):
    db = MongoClient().patents
    trait_fields = ["_id", "2_gen_avg_dist_w2v", "2_gen_sum_dist_w2v", "citedby"]
    trait_nulls = [None, -2, -2, []]
    trait_pnos, avgs, sums, cbs = zip(*list(get_field_generators(
        db.traits, trait_fields, trait_nulls, limit=lim  
    )))
    in_degs = [len(x) for x in cbs]
    _, allstar_avgs, allstar_sums, allstar_cbs = zip(*list([
        [db.traits.find_one({'_id': pno}).get(field, nil) for field,nil in zip(trait_fields, trait_nulls)]
        for pno in _allstar_pnos
    ]))
    _, normal_avgs, normal_sums, normal_cbs = zip(*list((
        [db.traits.find_one({'_id': pno}).get(field, -2) for field,nil in zip(trait_fields, trait_nulls)]
        for pno in _normal_pnos
    )))
    allstar_indegs = [len(cb) for cb in allstar_cbs]
    normal_indegs = [len(cb) for cb in normal_cbs]
    allstar_slopes = [db.patns.find_one({'pno': pno}).get('wave_slope', None)
                      for pno in _allstar_pnos]
    normal_slopes = [db.patns.find_one({'pno': pno}).get('wave_slope', None)
                     for pno in _normal_pnos]
    patn_fields = ["pno", "wave_slope"]
    patn_nulls = [None, None]
    pno_2_slope = {pno: slope for pno,slope in get_field_generators(
        db.patns, patn_fields, patn_nulls, limit=lim
    )}
    slopes_collate = [pno_2_slope.get(pno, None) for pno in trait_pnos]
    assert(len(slopes_collate) == len(avgs) == len(sums))
    # plot total reach vis in-degree.
    f,axarr = plt.subplots(2,2)
    f.set_size_inches(18.5,10.5)
    axarr[0,0].scatter(in_degs,avgs, s=2)
    axarr[0,0].scatter(allstar_indegs, allstar_avgs, marker='x', color='red')
    axarr[0,0].scatter(normal_indegs, normal_avgs, marker='x', color='green')
    axarr[0,0].set_xlabel('In-Degree')
    axarr[0,0].set_ylabel('Average Reach')
    axarr[0,0].set_ylim([0,np.max(allstar_avgs+avgs+normal_avgs)])
    axarr[0,1].scatter(in_degs,sums, s=2)
    axarr[0,1].scatter(allstar_indegs, allstar_sums, marker='x', color='red')
    axarr[0,1].scatter(normal_indegs, normal_sums, marker='x', color='green')
    axarr[0,1].set_xlabel('In-Degree')
    axarr[0,1].set_ylabel('Total Reach')
    axarr[0,1].set_ylim([0,np.max(allstar_sums+sums+normal_sums)])
    axarr[1,0].scatter(slopes_collate, avgs, s=2)
    axarr[1,0].scatter(allstar_slopes, allstar_avgs, marker='x', color='red')
    axarr[1,0].scatter(normal_slopes, normal_avgs, marker='x', color='green')
    axarr[1,0].set_xlabel('wave slope')
    axarr[1,0].set_ylabel('average reach')
    axarr[1,0].set_ylim([0,np.max(allstar_avgs+avgs+normal_avgs)])
    axarr[1,1].scatter(slopes_collate, sums, s=2)
    axarr[1,1].scatter(allstar_slopes, allstar_sums, marker='x', color='red')
    axarr[1,1].scatter(normal_slopes, normal_sums, marker='x', color='green')
    axarr[1,1].set_xlabel('wave slope')
    axarr[1,1].set_ylabel('total reach')
    axarr[1,1].set_ylim([0,np.max(allstar_sums+sums+normal_sums)])
    if savefn is not None:
        plt.savefig(savefn, dpi=50)
    if show: 
        plt.show()
Beispiel #2
0
def full_pipeline(db, n_topics, out_dir, limit=100, name=''):
    print "Getting texts..."
    pnos, texts = zip(*list(
        get_field_generators(db.pat_text, ['_id', 'patText'], [None, ''])))
    print "Initializing Model..."
    lda_model = MyLda(n_topics, name)
    texts = [t for t in texts if t is not None]
    print "Fitting model..."
    lda_model.fit(pnos, texts)
    print "Saving model..."
    lda_model.save(out_dir, just_lda=False)
    print "exporting summary stats..."
    lda_model.export(out_dir)
Beispiel #3
0
def full_pipeline(db, n_topics, out_dir, limit=100, name = ''):
    print "Getting texts..."
    pnos,texts = zip(*list(get_field_generators(
        db.pat_text,
        ['_id', 'patText'], 
        [None, '']
    )))
    print "Initializing Model..."
    lda_model = MyLda(n_topics,name)
    texts = [t for t in texts if t is not None]
    print "Fitting model..."
    lda_model.fit(pnos, texts)
    print "Saving model..."
    lda_model.save(out_dir, just_lda = False)
    print "exporting summary stats..."
    lda_model.export(out_dir)
def dooropen_vs_indegree_plots(lim=250000, savefn='test.pdf', show=False):
    db = MongoClient().patents
    trait_fields = [
        "_id", "2_gen_avg_dist_w2v", "2_gen_sum_dist_w2v", "citedby"
    ]
    trait_nulls = [None, -2, -2, []]
    trait_pnos, avgs, sums, cbs = zip(*list(
        get_field_generators(db.traits, trait_fields, trait_nulls, limit=lim)))
    in_degs = [len(x) for x in cbs]
    _, allstar_avgs, allstar_sums, allstar_cbs = zip(*list([[
        db.traits.find_one({
            '_id': pno
        }).get(field, nil) for field, nil in zip(trait_fields, trait_nulls)
    ] for pno in _allstar_pnos]))
    _, normal_avgs, normal_sums, normal_cbs = zip(*list(([
        db.traits.find_one({
            '_id': pno
        }).get(field, -2) for field, nil in zip(trait_fields, trait_nulls)
    ] for pno in _normal_pnos)))
    allstar_indegs = [len(cb) for cb in allstar_cbs]
    normal_indegs = [len(cb) for cb in normal_cbs]
    allstar_slopes = [
        db.patns.find_one({
            'pno': pno
        }).get('wave_slope', None) for pno in _allstar_pnos
    ]
    normal_slopes = [
        db.patns.find_one({
            'pno': pno
        }).get('wave_slope', None) for pno in _normal_pnos
    ]
    patn_fields = ["pno", "wave_slope"]
    patn_nulls = [None, None]
    pno_2_slope = {
        pno: slope
        for pno, slope in get_field_generators(
            db.patns, patn_fields, patn_nulls, limit=lim)
    }
    slopes_collate = [pno_2_slope.get(pno, None) for pno in trait_pnos]
    assert (len(slopes_collate) == len(avgs) == len(sums))
    # plot total reach vis in-degree.
    f, axarr = plt.subplots(2, 2)
    f.set_size_inches(18.5, 10.5)
    axarr[0, 0].scatter(in_degs, avgs, s=2)
    axarr[0, 0].scatter(allstar_indegs, allstar_avgs, marker='x', color='red')
    axarr[0, 0].scatter(normal_indegs, normal_avgs, marker='x', color='green')
    axarr[0, 0].set_xlabel('In-Degree')
    axarr[0, 0].set_ylabel('Average Reach')
    axarr[0, 0].set_ylim([0, np.max(allstar_avgs + avgs + normal_avgs)])
    axarr[0, 1].scatter(in_degs, sums, s=2)
    axarr[0, 1].scatter(allstar_indegs, allstar_sums, marker='x', color='red')
    axarr[0, 1].scatter(normal_indegs, normal_sums, marker='x', color='green')
    axarr[0, 1].set_xlabel('In-Degree')
    axarr[0, 1].set_ylabel('Total Reach')
    axarr[0, 1].set_ylim([0, np.max(allstar_sums + sums + normal_sums)])
    axarr[1, 0].scatter(slopes_collate, avgs, s=2)
    axarr[1, 0].scatter(allstar_slopes, allstar_avgs, marker='x', color='red')
    axarr[1, 0].scatter(normal_slopes, normal_avgs, marker='x', color='green')
    axarr[1, 0].set_xlabel('wave slope')
    axarr[1, 0].set_ylabel('average reach')
    axarr[1, 0].set_ylim([0, np.max(allstar_avgs + avgs + normal_avgs)])
    axarr[1, 1].scatter(slopes_collate, sums, s=2)
    axarr[1, 1].scatter(allstar_slopes, allstar_sums, marker='x', color='red')
    axarr[1, 1].scatter(normal_slopes, normal_sums, marker='x', color='green')
    axarr[1, 1].set_xlabel('wave slope')
    axarr[1, 1].set_ylabel('total reach')
    axarr[1, 1].set_ylim([0, np.max(allstar_sums + sums + normal_sums)])
    if savefn is not None:
        plt.savefig(savefn, dpi=50)
    if show:
        plt.show()