Beispiel #1
0
def run(timezones_amount=20):
    # dataframe takes list of dicts
    frame = pd.DataFrame(load_jsoned_file('bitlygov_ua.json'))
    print frame
    tz_counts = frame['tz'].value_counts().fillna('Missing')
    pprint(tz_counts[:timezones_amount])
    print 'ploting...'
    plot_top(tz_counts, timezones_amount)
Beispiel #2
0
def run():
    frame = pd.DataFrame(misc.load_jsoned_file('bitlygov_ua.json'))
    # get not null fields from 'a' field
    cframe = frame[frame.a.notnull()]
    # np.where first is the condition (has Windows in string)
    # Group to Windiws, Non Windows fields
    # example
    # In [42]: np.where([1,0,1,1], "tak", "nie")
    # Out[42]:
    #     array(['tak', 'nie', 'tak', 'tak'],
    #                   dtype='|S3')
    oss = np.where(cframe['a'].str.contains('Windows'), 'Windows', 'Non Windows')
    # grouping timezones by oss
    tz_by_os = cframe.groupby([cframe['tz'], oss])
    # size.unstack ????
    agg_tzs = tz_by_os.size().unstack().fillna(0)
    # ????
    indexer = agg_tzs.sum(1).argsort()
    # ????
    count_subset = agg_tzs.take(indexer)[-10:]
    count_subset.plot(kind='barh', stacked=True)