Example #1
0
def plot_places(min_prop=MIN_PROP):

    place_counts = collections.Counter([
        art['place']
        for art in mongo['article'].find(
            {},
            {'place': True}
        )
        if art['place']
    ])

    top_places = [
        place[0]
        for place in place_counts.most_common(10)
    ]

    data = crosstab(
        mongo['article'],
        'place',
        top_places,
        'tags.label',
        pattern.tag_groups['pkg'].labels
    )

    frame = pd.DataFrame(
        data,
        index=top_places,
        columns=pattern.tag_groups['pkg'].labels,
    )

    min_count = min_prop * frame.sum().sum()

    sum0 = frame.sum()
    plot_frame = frame.ix[:, sum0 >= min_count]
    plot_frame['other'] = frame.ix[:, sum0 < min_count].sum(axis=1)

    plot_frame = plot_frame.div(
        1.0 * plot_frame.sum(axis=1),
        axis=0,
    )

    # Make plots
    plot_tags_by_place(
        plot_frame, stacked=True,
        outname=file_name(['pkg', 'stacked'], path='place')
    )
    plot_tags_by_place(
        plot_frame, stacked=False,
        outname=file_name(['pkg', 'adjacent'], path='place')
    )
Example #2
0
def plot_places(min_prop=MIN_PROP):

    place_counts = collections.Counter([
        art['place'] for art in mongo['article'].find({}, {'place': True})
        if art['place']
    ])

    top_places = [place[0] for place in place_counts.most_common(10)]

    data = crosstab(mongo['article'], 'place', top_places, 'tags.label',
                    pattern.tag_groups['pkg'].labels)

    frame = pd.DataFrame(
        data,
        index=top_places,
        columns=pattern.tag_groups['pkg'].labels,
    )

    min_count = min_prop * frame.sum().sum()

    sum0 = frame.sum()
    plot_frame = frame.ix[:, sum0 >= min_count]
    plot_frame['other'] = frame.ix[:, sum0 < min_count].sum(axis=1)

    plot_frame = plot_frame.div(
        1.0 * plot_frame.sum(axis=1),
        axis=0,
    )

    # Make plots
    plot_tags_by_place(plot_frame,
                       stacked=True,
                       outname=file_name(['pkg', 'stacked'], path='place'))
    plot_tags_by_place(plot_frame,
                       stacked=False,
                       outname=file_name(['pkg', 'adjacent'], path='place'))
Example #3
0
 def test_defaults(self):
     assert_equal(utils.file_name(['foo', 'bar']),
                  os.path.join(trendpath.fig_dir, 'foo-bar'))
        ])
    return dprimes, groups

print('{0} articles included in both sets'.format(
    len(val.pmids)
))
print('{0} articles included in both sets, excluding supplements'.format(
    len(val.pmids_no_supplement)
))

# Validate boolean values
validation = val.validate()
dprimes, groups = to_hist(validation)
val.validate_hist(
    dprimes, labels=groups, title='Supplements included', xlabel='D-Prime',
    outname=file_name(['dprime-supplements'], path='validate')
)
print('Categorical Validation: Supplements Included')
print(np.mean(sum(dprimes, [])))

validation = val.validate(no_supplement=True)
dprimes, groups = to_hist(validation)
val.validate_hist(
    dprimes, labels=groups, title='Supplements excluded', xlabel='D-Prime',
    outname=file_name(['dprime-no-supplements'], path='validate')
)
print('Categorical Validation: Supplements Excluded')
print(np.mean(sum(dprimes, [])))

# Validate continuous values
Example #5
0
print('{0} articles included in both sets'.format(
    len(val.pmids)
))
print('{0} articles included in both sets, excluding supplements'.format(
    len(val.pmids_no_supplement)
))


# Validate categorical values

validation = val.validate()
dprimes, groups = to_hist(validation)
val.validate_hist(
    dprimes, bins=10, labels=groups, title='Supplements included', xlabel='D-Prime',
    outname=file_name(['dprime-supplements'], path='validate')
)
print('Categorical Validation: Supplements Included')
print(np.mean(sum(dprimes, [])))


validation = val.validate(no_supplement=True)
dprimes, groups = to_hist(validation)
import pdb; pdb.set_trace()
val.validate_hist(
    dprimes, bins=10, labels=groups, title='Supplements excluded', xlabel='D-Prime',
    outname=file_name(['dprime-no-supplements'], path='validate')
)
print('Categorical Validation: Supplements Excluded')
print(np.mean(sum(dprimes, [])))
Example #6
0
summary, labels = summarize_ranks(
    mongo['article'],
    labels,
    min_prop=0.05,
)

ranks = order.analyze_rank_order(summary, mongo['article'])

ranks_spm = order.analyze_rank_order(summary, mongo['article'],
                                     {'tags.label': 'spm'})
ranks_fsl = order.analyze_rank_order(summary, mongo['article'],
                                     {'tags.label': 'fsl'})
ranks_afni = order.analyze_rank_order(summary, mongo['article'],
                                      {'tags.label': 'afni'})

seqplot.rank_plot(ranks, outname=file_name(['seq'], path='seq'))

seqplot.multi_rank_plot(ranks, [
    'spm',
    'fsl',
    'afni',
], [
    ranks_spm,
    ranks_fsl,
    ranks_afni,
],
                        outname=file_name(['seq', 'pkg'], path='seq'))

#years = range(2000, 2014)
#ranks_year = []
#
Example #7
0
        x_interp, interpolate.splev(x_interp, fit), '-',
    )

    ax = plt.gca()

    ax.set_xlim((dates[0] - 1, dates[-1] + 1))

    if title:
        ax.set_title(title)
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)

    if outname:
        plt.savefig(outname + '.pdf', bbox_inches='tight')

    plt.close()

for group in pipeline.variance_groups:

    plot_pipelines(
        DATES, count_year[group], ylabel='Count',
        outname=file_name(['count-{0}'.format(group)], path='pipelines')
    )

    plot_pipelines(
        DATES, count_year[group], totals=total_year, ylabel='Proportion',
        outname=file_name(['prop-{0}'.format(group)], path='pipelines')
    )
Example #8
0
print('{0} articles included in both sets'.format(len(val.pmids)))
print('{0} articles included in both sets, excluding supplements'.format(
    len(val.pmids_no_supplement)))

# Validate categorical values

validation = val.validate()
dprimes, groups = to_hist(validation)
frame = to_frame(validation)
val.validate_hist(dprimes,
                  bins=5,
                  labels=groups,
                  title='Supplements included',
                  xlabel='D-Prime',
                  outname=file_name(['dprime-supplements'], path='validate'))
frame.to_csv(file_name(['dprime-supplements'], path='validate'), ext='.csv')
print('Categorical Validation: Supplements Included')
print(np.mean(sum(dprimes, [])))

validation = val.validate(no_supplement=True)
dprimes, groups = to_hist(validation)
frame = to_frame(validation)
val.validate_hist(dprimes,
                  bins=5,
                  labels=groups,
                  title='Supplements excluded',
                  xlabel='D-Prime',
                  outname=file_name(['dprime-no-supplements'],
                                    path='validate'))
frame.to_csv(file_name(['dprime-no-supplements'], path='validate', ext='.csv'))
Example #9
0
print('{0} articles included in both sets'.format(
    len(val.pmids)
))
print('{0} articles included in both sets, excluding supplements'.format(
    len(val.pmids_no_supplement)
))


# Validate categorical values

validation = val.validate()
dprimes, groups = to_hist(validation)
frame = to_frame(validation)
val.validate_hist(
    dprimes, bins=5, labels=groups, title='Supplements included', xlabel='D-Prime',
    outname=file_name(['dprime-supplements'], path='validate')
)
frame.to_csv(file_name(['dprime-supplements'], path='validate'), ext='.csv')
print('Categorical Validation: Supplements Included')
print(np.mean(sum(dprimes, [])))


validation = val.validate(no_supplement=True)
dprimes, groups = to_hist(validation)
frame = to_frame(validation)
val.validate_hist(
    dprimes, bins=5, labels=groups, title='Supplements excluded', xlabel='D-Prime',
    outname=file_name(['dprime-no-supplements'], path='validate')
)
frame.to_csv(file_name(['dprime-no-supplements'], path='validate', ext='.csv'))
print('Categorical Validation: Supplements Excluded')
Example #10
0
 def test_path(self):
     assert_equal(
         utils.file_name(['foo', 'bar'], path='bob'),
         os.path.join(trendpath.fig_dir, 'bob', 'foo-bar')
     )
Example #11
0
 def test_delim(self):
     assert_equal(
         utils.file_name(['foo', 'bar'], dlm='_'),
         os.path.join(trendpath.fig_dir, 'foo_bar')
     )
Example #12
0
 def test_defaults(self):
     assert_equal(
         utils.file_name(['foo', 'bar']),
         os.path.join(trendpath.fig_dir, 'foo-bar')
     )
Example #13
0
"""

"""

from neurotrends.config import mongo
from neurotrends.model.utils import verified_mongo
from neurotrends.analysis.groupby.naive import summarize_custom
from neurotrends.analysis.groupby.naive import summarize_smooth_kernel, summarize_highpass_cutoff
from neurotrends.analysis.plot.histplot import hist
from neurotrends.analysis.plot.utils import file_name

import numpy as np

cursor = mongo['article'].find(verified_mongo, {'tags': 1, 'date': 1})

summary_smooth_kernel = summarize_custom(cursor, 'smooth_kernel',
                                         summarize_smooth_kernel)
summary_highpass_cutoff = summarize_custom(cursor, 'highpass_cutoff',
                                           summarize_highpass_cutoff)

hist(summary_smooth_kernel,
     bins=np.arange(0.5, 19.5, 1),
     xlabel='Smoothing Kernel',
     outname=file_name(['smooth-kernel'], path='hist'))

hist(summary_highpass_cutoff,
     xlog=True,
     xlabel='High-pass Filter Cutoff',
     outname=file_name(['highpass-cutoff'], path='hist'))
colors = get_colors(plot_frame.columns, PALETTE_NAME)

plot_frame = plot_frame.div(
    1.0 * plot_frame.sum(axis=1),
    axis=0,
)

plot_frame.plot(
    kind='barh', stacked=True,
    color=colors,
)

ax = plt.gca()

# Hack: Remove horizontal line at y=0 inserted by pandas
ax.lines.pop()

ax.invert_yaxis()
ax.set_xlabel('Proportion')

handles, labels = ax.get_legend_handles_labels()
lgd = ax.legend(
    handles, labels,
    loc='upper left', bbox_to_anchor=(1, 1)
)

plt.savefig(
    file_name(['pkg', 'stacked'], path='place') + '.pdf',
    bbox_inches='tight'
)
Example #15
0
 def test_delim(self):
     assert_equal(utils.file_name(['foo', 'bar'], dlm='_'),
                  os.path.join(trendpath.fig_dir, 'foo_bar'))
Example #16
0
 def test_path(self):
     assert_equal(utils.file_name(['foo', 'bar'], path='bob'),
                  os.path.join(trendpath.fig_dir, 'bob', 'foo-bar'))
Example #17
0
"""

from neurotrends.config import mongo
from neurotrends.model.utils import verified_mongo
from neurotrends.analysis.groupby.naive import summarize_custom
from neurotrends.analysis.groupby.naive import summarize_smooth_kernel, summarize_highpass_cutoff
from neurotrends.analysis.plot.histplot import hist
from neurotrends.analysis.plot.utils import file_name

import numpy as np

cursor = mongo["article"].find(verified_mongo, {"tags": 1, "date": 1})

summary_smooth_kernel = summarize_custom(cursor, "smooth_kernel", summarize_smooth_kernel)
summary_highpass_cutoff = summarize_custom(cursor, "highpass_cutoff", summarize_highpass_cutoff)

hist(
    summary_smooth_kernel,
    bins=np.arange(0.5, 19.5, 1),
    xlabel="Smoothing Kernel",
    outname=file_name(["smooth-kernel"], path="hist"),
)

hist(
    summary_highpass_cutoff,
    xlog=True,
    xlabel="High-pass Filter Cutoff",
    outname=file_name(["highpass-cutoff"], path="hist"),
)
Example #18
0
    if label not in ['highpass_cutoff', 'smooth_kernel']
]

labels = list(set(labels))

summary, labels = summarize_ranks(
    mongo['article'], labels, min_prop=0.05,
)

ranks = order.analyze_rank_order(summary, mongo['article'])

ranks_spm = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'spm'})
ranks_fsl = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'fsl'})
ranks_afni = order.analyze_rank_order(summary, mongo['article'], {'tags.label': 'afni'})

seqplot.rank_plot(ranks, outname=file_name(['seq'], path='seq'))

seqplot.multi_rank_plot(
    ranks,
    [
        'spm',
        'fsl',
        'afni',
    ],
    [
        ranks_spm,
        ranks_fsl,
        ranks_afni,
    ],
    outname=file_name(['seq', 'pkg'], path='seq')
)
Example #19
0
    ax.set_xlim((dates[0] - 1, dates[-1] + 1))

    if title:
        ax.set_title(title)
    if xlabel:
        ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)

    if outname:
        plt.savefig(outname + '.pdf', bbox_inches='tight')

    plt.close()


for group in pipeline.variance_groups:

    plot_pipelines(DATES,
                   count_year[group],
                   ylabel='Count',
                   outname=file_name(['count-{0}'.format(group)],
                                     path='pipelines'))

    plot_pipelines(DATES,
                   count_year[group],
                   totals=total_year,
                   ylabel='Proportion',
                   outname=file_name(['prop-{0}'.format(group)],
                                     path='pipelines'))