def plot_places(min_prop=MIN_PROP): place_counts = collections.Counter([ art['place'] for art in mongo['article'].find( {}, {'place': True} ) if art['place'] ]) top_places = [ place[0] for place in place_counts.most_common(10) ] data = crosstab( mongo['article'], 'place', top_places, 'tags.label', pattern.tag_groups['pkg'].labels ) frame = pd.DataFrame( data, index=top_places, columns=pattern.tag_groups['pkg'].labels, ) min_count = min_prop * frame.sum().sum() sum0 = frame.sum() plot_frame = frame.ix[:, sum0 >= min_count] plot_frame['other'] = frame.ix[:, sum0 < min_count].sum(axis=1) plot_frame = plot_frame.div( 1.0 * plot_frame.sum(axis=1), axis=0, ) # Make plots plot_tags_by_place( plot_frame, stacked=True, outname=file_name(['pkg', 'stacked'], path='place') ) plot_tags_by_place( plot_frame, stacked=False, outname=file_name(['pkg', 'adjacent'], path='place') )
def plot_places(min_prop=MIN_PROP): place_counts = collections.Counter([ art['place'] for art in mongo['article'].find({}, {'place': True}) if art['place'] ]) top_places = [place[0] for place in place_counts.most_common(10)] data = crosstab(mongo['article'], 'place', top_places, 'tags.label', pattern.tag_groups['pkg'].labels) frame = pd.DataFrame( data, index=top_places, columns=pattern.tag_groups['pkg'].labels, ) min_count = min_prop * frame.sum().sum() sum0 = frame.sum() plot_frame = frame.ix[:, sum0 >= min_count] plot_frame['other'] = frame.ix[:, sum0 < min_count].sum(axis=1) plot_frame = plot_frame.div( 1.0 * plot_frame.sum(axis=1), axis=0, ) # Make plots plot_tags_by_place(plot_frame, stacked=True, outname=file_name(['pkg', 'stacked'], path='place')) plot_tags_by_place(plot_frame, stacked=False, outname=file_name(['pkg', 'adjacent'], path='place'))
def test_defaults(self): assert_equal(utils.file_name(['foo', 'bar']), os.path.join(trendpath.fig_dir, 'foo-bar'))
]) return dprimes, groups print('{0} articles included in both sets'.format( len(val.pmids) )) print('{0} articles included in both sets, excluding supplements'.format( len(val.pmids_no_supplement) )) # Validate boolean values validation = val.validate() dprimes, groups = to_hist(validation) val.validate_hist( dprimes, labels=groups, title='Supplements included', xlabel='D-Prime', outname=file_name(['dprime-supplements'], path='validate') ) print('Categorical Validation: Supplements Included') print(np.mean(sum(dprimes, []))) validation = val.validate(no_supplement=True) dprimes, groups = to_hist(validation) val.validate_hist( dprimes, labels=groups, title='Supplements excluded', xlabel='D-Prime', outname=file_name(['dprime-no-supplements'], path='validate') ) print('Categorical Validation: Supplements Excluded') print(np.mean(sum(dprimes, []))) # Validate continuous values
print('{0} articles included in both sets'.format( len(val.pmids) )) print('{0} articles included in both sets, excluding supplements'.format( len(val.pmids_no_supplement) )) # Validate categorical values validation = val.validate() dprimes, groups = to_hist(validation) val.validate_hist( dprimes, bins=10, labels=groups, title='Supplements included', xlabel='D-Prime', outname=file_name(['dprime-supplements'], path='validate') ) print('Categorical Validation: Supplements Included') print(np.mean(sum(dprimes, []))) validation = val.validate(no_supplement=True) dprimes, groups = to_hist(validation) import pdb; pdb.set_trace() val.validate_hist( dprimes, bins=10, labels=groups, title='Supplements excluded', xlabel='D-Prime', outname=file_name(['dprime-no-supplements'], path='validate') ) print('Categorical Validation: Supplements Excluded') print(np.mean(sum(dprimes, [])))
summary, labels = summarize_ranks( mongo['article'], labels, min_prop=0.05, ) ranks = order.analyze_rank_order(summary, mongo['article']) ranks_spm = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'spm'}) ranks_fsl = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'fsl'}) ranks_afni = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'afni'}) seqplot.rank_plot(ranks, outname=file_name(['seq'], path='seq')) seqplot.multi_rank_plot(ranks, [ 'spm', 'fsl', 'afni', ], [ ranks_spm, ranks_fsl, ranks_afni, ], outname=file_name(['seq', 'pkg'], path='seq')) #years = range(2000, 2014) #ranks_year = [] #
x_interp, interpolate.splev(x_interp, fit), '-', ) ax = plt.gca() ax.set_xlim((dates[0] - 1, dates[-1] + 1)) if title: ax.set_title(title) if xlabel: ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) if outname: plt.savefig(outname + '.pdf', bbox_inches='tight') plt.close() for group in pipeline.variance_groups: plot_pipelines( DATES, count_year[group], ylabel='Count', outname=file_name(['count-{0}'.format(group)], path='pipelines') ) plot_pipelines( DATES, count_year[group], totals=total_year, ylabel='Proportion', outname=file_name(['prop-{0}'.format(group)], path='pipelines') )
print('{0} articles included in both sets'.format(len(val.pmids))) print('{0} articles included in both sets, excluding supplements'.format( len(val.pmids_no_supplement))) # Validate categorical values validation = val.validate() dprimes, groups = to_hist(validation) frame = to_frame(validation) val.validate_hist(dprimes, bins=5, labels=groups, title='Supplements included', xlabel='D-Prime', outname=file_name(['dprime-supplements'], path='validate')) frame.to_csv(file_name(['dprime-supplements'], path='validate'), ext='.csv') print('Categorical Validation: Supplements Included') print(np.mean(sum(dprimes, []))) validation = val.validate(no_supplement=True) dprimes, groups = to_hist(validation) frame = to_frame(validation) val.validate_hist(dprimes, bins=5, labels=groups, title='Supplements excluded', xlabel='D-Prime', outname=file_name(['dprime-no-supplements'], path='validate')) frame.to_csv(file_name(['dprime-no-supplements'], path='validate', ext='.csv'))
print('{0} articles included in both sets'.format( len(val.pmids) )) print('{0} articles included in both sets, excluding supplements'.format( len(val.pmids_no_supplement) )) # Validate categorical values validation = val.validate() dprimes, groups = to_hist(validation) frame = to_frame(validation) val.validate_hist( dprimes, bins=5, labels=groups, title='Supplements included', xlabel='D-Prime', outname=file_name(['dprime-supplements'], path='validate') ) frame.to_csv(file_name(['dprime-supplements'], path='validate'), ext='.csv') print('Categorical Validation: Supplements Included') print(np.mean(sum(dprimes, []))) validation = val.validate(no_supplement=True) dprimes, groups = to_hist(validation) frame = to_frame(validation) val.validate_hist( dprimes, bins=5, labels=groups, title='Supplements excluded', xlabel='D-Prime', outname=file_name(['dprime-no-supplements'], path='validate') ) frame.to_csv(file_name(['dprime-no-supplements'], path='validate', ext='.csv')) print('Categorical Validation: Supplements Excluded')
def test_path(self): assert_equal( utils.file_name(['foo', 'bar'], path='bob'), os.path.join(trendpath.fig_dir, 'bob', 'foo-bar') )
def test_delim(self): assert_equal( utils.file_name(['foo', 'bar'], dlm='_'), os.path.join(trendpath.fig_dir, 'foo_bar') )
def test_defaults(self): assert_equal( utils.file_name(['foo', 'bar']), os.path.join(trendpath.fig_dir, 'foo-bar') )
""" """ from neurotrends.config import mongo from neurotrends.model.utils import verified_mongo from neurotrends.analysis.groupby.naive import summarize_custom from neurotrends.analysis.groupby.naive import summarize_smooth_kernel, summarize_highpass_cutoff from neurotrends.analysis.plot.histplot import hist from neurotrends.analysis.plot.utils import file_name import numpy as np cursor = mongo['article'].find(verified_mongo, {'tags': 1, 'date': 1}) summary_smooth_kernel = summarize_custom(cursor, 'smooth_kernel', summarize_smooth_kernel) summary_highpass_cutoff = summarize_custom(cursor, 'highpass_cutoff', summarize_highpass_cutoff) hist(summary_smooth_kernel, bins=np.arange(0.5, 19.5, 1), xlabel='Smoothing Kernel', outname=file_name(['smooth-kernel'], path='hist')) hist(summary_highpass_cutoff, xlog=True, xlabel='High-pass Filter Cutoff', outname=file_name(['highpass-cutoff'], path='hist'))
colors = get_colors(plot_frame.columns, PALETTE_NAME) plot_frame = plot_frame.div( 1.0 * plot_frame.sum(axis=1), axis=0, ) plot_frame.plot( kind='barh', stacked=True, color=colors, ) ax = plt.gca() # Hack: Remove horizontal line at y=0 inserted by pandas ax.lines.pop() ax.invert_yaxis() ax.set_xlabel('Proportion') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend( handles, labels, loc='upper left', bbox_to_anchor=(1, 1) ) plt.savefig( file_name(['pkg', 'stacked'], path='place') + '.pdf', bbox_inches='tight' )
def test_delim(self): assert_equal(utils.file_name(['foo', 'bar'], dlm='_'), os.path.join(trendpath.fig_dir, 'foo_bar'))
def test_path(self): assert_equal(utils.file_name(['foo', 'bar'], path='bob'), os.path.join(trendpath.fig_dir, 'bob', 'foo-bar'))
""" from neurotrends.config import mongo from neurotrends.model.utils import verified_mongo from neurotrends.analysis.groupby.naive import summarize_custom from neurotrends.analysis.groupby.naive import summarize_smooth_kernel, summarize_highpass_cutoff from neurotrends.analysis.plot.histplot import hist from neurotrends.analysis.plot.utils import file_name import numpy as np cursor = mongo["article"].find(verified_mongo, {"tags": 1, "date": 1}) summary_smooth_kernel = summarize_custom(cursor, "smooth_kernel", summarize_smooth_kernel) summary_highpass_cutoff = summarize_custom(cursor, "highpass_cutoff", summarize_highpass_cutoff) hist( summary_smooth_kernel, bins=np.arange(0.5, 19.5, 1), xlabel="Smoothing Kernel", outname=file_name(["smooth-kernel"], path="hist"), ) hist( summary_highpass_cutoff, xlog=True, xlabel="High-pass Filter Cutoff", outname=file_name(["highpass-cutoff"], path="hist"), )
if label not in ['highpass_cutoff', 'smooth_kernel'] ] labels = list(set(labels)) summary, labels = summarize_ranks( mongo['article'], labels, min_prop=0.05, ) ranks = order.analyze_rank_order(summary, mongo['article']) ranks_spm = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'spm'}) ranks_fsl = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'fsl'}) ranks_afni = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'afni'}) seqplot.rank_plot(ranks, outname=file_name(['seq'], path='seq')) seqplot.multi_rank_plot( ranks, [ 'spm', 'fsl', 'afni', ], [ ranks_spm, ranks_fsl, ranks_afni, ], outname=file_name(['seq', 'pkg'], path='seq') )
ax.set_xlim((dates[0] - 1, dates[-1] + 1)) if title: ax.set_title(title) if xlabel: ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) if outname: plt.savefig(outname + '.pdf', bbox_inches='tight') plt.close() for group in pipeline.variance_groups: plot_pipelines(DATES, count_year[group], ylabel='Count', outname=file_name(['count-{0}'.format(group)], path='pipelines')) plot_pipelines(DATES, count_year[group], totals=total_year, ylabel='Proportion', outname=file_name(['prop-{0}'.format(group)], path='pipelines'))