Example #1
0
def plot_pv_fraction_in_campaign(pv, access_method, counts, sample_rate, hours = 1):
    # get pvs per hours for source
    pv  = copy.copy(pv[pv['access_method'] == access_method])
    pv.index = pd.Series(pv.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    pv = pv.groupby(pv.index).sum()

    pv['pageviews_in_campaign'] = get_counts_df(counts, hours, sample_rate).sum(axis=1)
    pv['fraction_of_pageviews_in_campaign'] = pv['pageviews_in_campaign'] / pv['pageviews']
    plot_df(pv[['fraction_of_pageviews_in_campaign']], 'fraction per %d hours' % hours)
    return pv
Example #2
0
def plot_central_tendency(d,
                          regs,
                          start='2000',
                          stop='2050',
                          hours=1,
                          index=None,
                          ylabel='dollars per donation',
                          title='',
                          method='mean',
                          amount_limit=10000.0):
    d = d[start:stop]
    if index is None:
        d.index = pd.Series(d.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        amounts = d.ix[d.name.str.match(reg).apply(bool)]['amount']
        amounts[amounts > amount_limit] = amount_limit
        if method == 'mean':
            d_plot[name] = amounts.groupby(
                amounts.index).sum() / amounts.groupby(amounts.index).count()
        else:
            d_plot[name] = amounts.groupby(amounts.index).median()
    return plot_df(d_plot, ylabel=ylabel, title=title, interactive=False)
def plot_by_time(d, regs, start = '2000', stop = '2050', hours = 1, amount = False, cum = False, normalize = False, ylabel = '', interactive = False, index = None, rotate=False):
    
    d = d[start:stop]

    if index is None:
        d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        if amount:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['amount']
        else:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['n']

        if normalize:
            counts = counts/counts.sum()

        if cum:
            d_plot[name] = counts.groupby(counts.index).sum().cumsum()
        else:
            d_plot[name] = counts.groupby(counts.index).sum()

    d_plot = d_plot.fillna(0)
    #d_plot.plot(figsize=(10, 4))
    return plot_df(d_plot, ylabel, interactive = interactive, rotate=rotate)
Example #4
0
def plot_by_impressions_seen(d,
                             regs,
                             normalize=True,
                             max_impressions=5,
                             amount=False,
                             interactive=False,
                             ylabel=''):

    d_plot = pd.DataFrame()
    for name, reg in sorted(regs.items()):
        if amount:
            counts = d[d.name.str.contains(reg)]['amount']
        else:
            counts = d[d.name.str.contains(reg)]['n']

        counts = counts.groupby(counts.index).sum()
        if normalize:
            counts = counts / counts.sum()

        counts1 = counts.loc[:max_impressions]
        counts1.loc[max_impressions + 1] = counts[max_impressions:].sum()
        d_plot[name] = counts1

    return plot_df(d_plot,
                   xlabel='impressions seen',
                   ylabel=ylabel,
                   interactive=interactive)
Example #5
0
def plot_rate_by_time(don,
                      imp,
                      regs,
                      hours=1,
                      start='2000',
                      stop='2050',
                      ylabel='donation rate',
                      interactive=False,
                      index=None):

    don = don[start:stop]
    imp = imp[start:stop]

    if index is None:
        don.index = pd.Series(don.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
        imp.index = pd.Series(imp.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        don.index = don[index]
        imp.index = imp[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        dons = don.ix[don.name.str.match(reg).apply(bool)]['n']
        dons = dons.groupby(dons.index).sum()
        imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n']
        imps = imps.groupby(imps.index).sum()

        d_plot[name] = dons / imps
    #d_plot.plot(figsize=(10, 4))
    return plot_df(d_plot, ylabel, interactive=interactive)
Example #6
0
def plot_central_tendency(
    d,
    regs,
    start="2000",
    stop="2050",
    hours=1,
    index=None,
    ylabel="dollars per donation",
    title="",
    method="mean",
    amount_limit=10000.0,
):
    d = d[start:stop]
    if index is None:
        d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        amounts = d.ix[d.name.str.match(reg).apply(bool)]["amount"]
        amounts[amounts > amount_limit] = amount_limit
        if method == "mean":
            d_plot[name] = amounts.groupby(amounts.index).sum() / amounts.groupby(amounts.index).count()
        else:
            d_plot[name] = amounts.groupby(amounts.index).median()
    return plot_df(d_plot, ylabel=ylabel, title=title, interactive=False)
Example #7
0
def plot_by_impressions_seen(d,
                            regs,
                            normalize = True,
                            max_impressions = 5,
                            amount = False,
                            interactive = False, 
                            ylabel = ''
                             ):
    
    d_plot = pd.DataFrame()
    for name, reg in sorted(regs.items()):
        if amount:
            counts = d[d.name.str.contains(reg)]['amount']
        else:
            counts = d[d.name.str.contains(reg)]['n']

        counts = counts.groupby(counts.index).sum()
        if normalize:
            counts = counts / counts.sum()

        counts1 = counts.loc[:max_impressions]
        counts1.loc[max_impressions+1] = counts[max_impressions:].sum()
        d_plot[name] = counts1

    return plot_df(d_plot, xlabel = 'impressions seen', ylabel=ylabel, interactive = interactive)
Example #8
0
def plot_conversion_rate(d, regs, start = '2000', stop = '2050', hours = 1, index = None, ylabel = 'conversion_rate',title= ''):
    if d.shape[0] == 0:
        print ('No Conversion rate data for this device')
        return

    d = d[start:stop]
    if index is None:
        d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        clicks = d.ix[d.name.str.match(reg).apply(bool)]
        if clicks.shape[0] == 0:
            continue
        for method in methods:
            clicks_by_method = clicks[clicks['payment_method'] == method]
            if clicks_by_method.shape[0] == 0:
                continue
            donations = clicks_by_method[clicks_by_method['donation'] == 1]['n']
            if donations.shape[0] == 0:
                continue
            donations = donations.groupby(donations.index).sum()
            clicks_by_method = clicks_by_method.groupby(clicks_by_method.index)['n'].sum()
            d_plot[name+' '+ method] = donations / clicks_by_method

    if d_plot.shape[0] < 3:
        print ('No Conversion rate data for this device')
        return
    return plot_df(d_plot, ylabel=ylabel, title=title, interactive = False)
Example #9
0
def plot_donation_rate(don,
                        imp,
                        regs,
                        cum = False,
                        max_impressions=5,
                        amount = False, 
                        interactive = False,
                        ylabel = '',
):
    

    d_plot = pd.DataFrame()
    for name, reg in sorted(regs.items()):

        if amount:
            donations = don.ix[don.name.str.contains(reg)]['amount']
        else:
            donations = don.ix[don.name.str.contains(reg)]['n']
        donations = donations.groupby(donations.index).sum()
        donations1 = donations.loc[:max_impressions]
        donations1.loc[max_impressions+1] = donations[max_impressions:].sum()

        impressions = imp.ix[imp.name.str.contains(reg)]['n']
        impressions = impressions.groupby(impressions.index).sum()
        impressions1 = impressions.loc[:max_impressions]
        impressions1.loc[max_impressions+1] = impressions[max_impressions:].sum()

        if cum:

            impressions1 = impressions1.cumsum()
            donations1 = donations1.cumsum()

        d_plot[name] = donations1 / impressions1

    return plot_df(d_plot, xlabel = 'impressions seen', ylabel=ylabel, interactive=interactive)
Example #10
0
def plot_conversion_rate(d,
                         regs,
                         start='2000',
                         stop='2050',
                         hours=1,
                         index=None,
                         ylabel='conversion_rate',
                         title='',
                         methods=['amazon', 'paypal', 'cc']):
    d = d[start:stop]
    if index is None:
        d.index = pd.Series(d.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        clicks = d.ix[d.name.str.match(reg).apply(bool)]
        for method in methods:
            clicks_by_method = clicks[clicks['payment_method'] == method]
            donations = clicks_by_method[clicks_by_method['donation'] ==
                                         1]['n']
            donations = donations.groupby(donations.index).sum()
            clicks_by_method = clicks_by_method.groupby(
                clicks_by_method.index)['n'].sum()

            #print clicks_by_method.head()
            #print donations.head()
            d_plot[name + ' ' + method] = donations / clicks_by_method
    return plot_df(d_plot, ylabel=ylabel, title=title, interactive=False)
Example #11
0
def plot_by_time(d,
                 regs,
                 start='2000',
                 stop='2050',
                 hours=1,
                 amount=False,
                 cum=False,
                 normalize=False,
                 ylabel='',
                 interactive=False,
                 index=None,
                 rotate=False):

    d = d[start:stop]

    if index is None:
        d.index = pd.Series(d.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        if amount:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['dollars']
        else:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['n']

        if normalize:
            counts = counts / counts.sum()

        if cum:
            d_plot[name] = counts.groupby(counts.index).sum().cumsum()
        else:
            d_plot[name] = counts.groupby(counts.index).sum()

        if d_plot[name].shape[0] < 3:
            del d_plot[name]

    if d_plot.shape[0] < 3:
        print('There is no data for this campaign or this kind of banners')
        return

    d_plot = d_plot.fillna(0)

    plot_df(d_plot, ylabel, interactive=interactive, rotate=rotate)
Example #12
0
def plot_pv_fraction_in_campaign(pv,
                                 access_method,
                                 counts,
                                 sample_rate,
                                 hours=1):
    # get pvs per hours for source
    pv = copy.copy(pv[pv['access_method'] == access_method])
    pv.index = pd.Series(pv.index).apply(
        lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    pv = pv.groupby(pv.index).sum()

    pv['pageviews_in_campaign'] = get_counts_df(counts, hours,
                                                sample_rate).sum(axis=1)
    pv['fraction_of_pageviews_in_campaign'] = pv['pageviews_in_campaign'] / pv[
        'pageviews']
    plot_df(pv[['fraction_of_pageviews_in_campaign']],
            'fraction per %d hours' % hours)
    return pv
Example #13
0
def plot_traffic(pv, imp, source, reg, start, stop, hours = 1):
    pv_s  = copy.copy(pv[pv['access_method'] == source])
    pv_s.index = pd.Series(pv_s.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    pv_s = pv_s.groupby(pv_s.index).sum()
    pv_s.rename(columns={'pageviews': source + ' pageviews'}, inplace=True)

    imp_s = copy.copy(imp.ix[imp.name.str.match(reg).apply(bool)][['n']])
    imp_s.index = pd.Series(imp_s.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    imp_s = imp_s.groupby(imp_s.index).sum()

    imp_s.rename(columns={'n': source + ' impressions'}, inplace=True)

    d = pv_s.merge(imp_s, how = 'left', left_index = True, right_index = True)[[source + ' pageviews', source + ' impressions']]
    return plot_df(d, 'count per %d hours' % hours)
Example #14
0
def plot_by_time(d, regs, start = '2000', stop = '2050', hours = 1, amount = False, cum = False, normalize = False, ylabel = '', interactive = False, index = None, rotate=False):
    
    d = d[start:stop]

    if index is None:
        d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        if amount:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['dollars']
        else:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['n']

        if normalize:
            counts = counts/counts.sum()

        if cum:
            d_plot[name] = counts.groupby(counts.index).sum().cumsum()
        else:
            d_plot[name] = counts.groupby(counts.index).sum()

        if d_plot[name].shape[0] < 3:
            del d_plot[name]
            
            

    if d_plot.shape[0] < 3:
        print('There is no data for this campaign or this kind of banners')
        return

    d_plot = d_plot.fillna(0)


    plot_df(d_plot, ylabel, interactive = interactive, rotate=rotate)
Example #15
0
def plot_traffic(pv, imp, source, reg, start, stop, hours=1):
    pv_s = copy.copy(pv[pv['access_method'] == source])
    pv_s.index = pd.Series(pv_s.index).apply(
        lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    pv_s = pv_s.groupby(pv_s.index).sum()
    pv_s.rename(columns={'pageviews': source + ' pageviews'}, inplace=True)

    imp_s = copy.copy(imp.ix[imp.name.str.match(reg).apply(bool)][['n']])
    imp_s.index = pd.Series(imp_s.index).apply(
        lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    imp_s = imp_s.groupby(imp_s.index).sum()

    imp_s.rename(columns={'n': source + ' impressions'}, inplace=True)

    d = pv_s.merge(imp_s, how='left', left_index=True, right_index=True)[[
        source + ' pageviews', source + ' impressions'
    ]]
    return plot_df(d, 'count per %d hours' % hours)
Example #16
0
def plot_rate_by_time(don,
                      imp,
                      regs,
                      hours=1,
                      start='2000',
                      stop='2050',
                      ylabel='donation rate',
                      interactive=False,
                      index=None):

    don = don[start:stop]
    imp = imp[start:stop]

    if index is None:
        don.index = pd.Series(don.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
        imp.index = pd.Series(imp.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        don.index = don[index]
        imp.index = imp[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        dons = don.ix[don.name.str.match(reg).apply(bool)]['n']
        dons = dons.groupby(dons.index).sum()
        imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n']
        imps = imps.groupby(imps.index).sum()

        d_rate = dons / imps

        if d_rate.shape[0] < 3:
            continue

        largest = d_rate.nlargest(2).ix[1]
        d_rate[d_rate > largest] = largest
        d_plot[name] = d_rate

    if d_plot.shape[0] < 3:
        print('There is no data for this campaign or this kind of banners')
        return

    return plot_df(d_plot, ylabel, interactive=interactive)
Example #17
0
def plot_conversion_rate(d, regs, start = '2000', stop = '2050', hours = 1, index = None, ylabel = 'conversion_rate',title= '', methods = ['amazon', 'paypal', 'cc']):
    d = d[start:stop]
    if index is None:
        d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        clicks = d.ix[d.name.str.match(reg).apply(bool)]
        for method in methods:
            clicks_by_method = clicks[clicks['payment_method'] == method]
            donations = clicks_by_method[clicks_by_method['donation'] == 1]['n']
            donations = donations.groupby(donations.index).sum()
            clicks_by_method = clicks_by_method.groupby(clicks_by_method.index)['n'].sum()
            

            #print clicks_by_method.head()
            #print donations.head()
            d_plot[name+' '+ method] = donations / clicks_by_method
    return plot_df(d_plot, ylabel=ylabel, title=title, interactive = False)
Example #18
0
def plot_donation_rate(
    don,
    imp,
    regs,
    cum=False,
    max_impressions=5,
    amount=False,
    interactive=False,
    ylabel='',
):

    d_plot = pd.DataFrame()
    for name, reg in sorted(regs.items()):

        if amount:
            donations = don.ix[don.name.str.contains(reg)]['amount']
        else:
            donations = don.ix[don.name.str.contains(reg)]['n']
        donations = donations.groupby(donations.index).sum()
        donations1 = donations.loc[:max_impressions]
        donations1.loc[max_impressions + 1] = donations[max_impressions:].sum()

        impressions = imp.ix[imp.name.str.contains(reg)]['n']
        impressions = impressions.groupby(impressions.index).sum()
        impressions1 = impressions.loc[:max_impressions]
        impressions1.loc[max_impressions +
                         1] = impressions[max_impressions:].sum()

        if cum:

            impressions1 = impressions1.cumsum()
            donations1 = donations1.cumsum()

        d_plot[name] = donations1 / impressions1

    return plot_df(d_plot,
                   xlabel='impressions seen',
                   ylabel=ylabel,
                   interactive=interactive)
Example #19
0
def plot_by_time(d,
                 regs,
                 start='2000',
                 stop='2050',
                 hours=1,
                 amount=False,
                 cum=False,
                 normalize=False,
                 ylabel='',
                 interactive=False,
                 index=None,
                 rotate=False):

    d = d[start:stop]

    if index is None:
        d.index = pd.Series(d.index).apply(
            lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        d.index = d[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        if amount:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['amount']
        else:
            counts = d.ix[d.name.str.match(reg).apply(bool)]['n']

        if normalize:
            counts = counts / counts.sum()

        if cum:
            d_plot[name] = counts.groupby(counts.index).sum().cumsum()
        else:
            d_plot[name] = counts.groupby(counts.index).sum()

    d_plot = d_plot.fillna(0)
    #d_plot.plot(figsize=(10, 4))
    return plot_df(d_plot, ylabel, interactive=interactive, rotate=rotate)
Example #20
0
def plot_rate_by_time(don, imp, regs,  hours = 1, start = '2000', stop = '2050', ylabel = 'donation rate', interactive = False, index = None):
    

    don = don[start:stop]
    imp = imp[start:stop]

    if index is None:
        don.index = pd.Series(don.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
        imp.index = pd.Series(imp.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        don.index = don[index]
        imp.index = imp[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        dons = don.ix[don.name.str.match(reg).apply(bool)]['n']
        dons = dons.groupby(dons.index).sum()
        imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n']
        imps = imps.groupby(imps.index).sum()

        d_plot[name] = dons/imps
    #d_plot.plot(figsize=(10, 4))
    return plot_df(d_plot, ylabel, interactive = interactive)
Example #21
0
def plot_rate_by_time(don, imp, regs,  hours = 1, start = '2000', stop = '2050', ylabel = 'donation rate', interactive = False, index = None):
    

    don = don[start:stop]
    imp = imp[start:stop]

    if index is None:
        don.index = pd.Series(don.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
        imp.index = pd.Series(imp.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours))
    else:
        don.index = don[index]
        imp.index = imp[index]

    d_plot = pd.DataFrame()
    for name, reg in regs.items():
        dons = don.ix[don.name.str.match(reg).apply(bool)]['n']
        dons = dons.groupby(dons.index).sum()
        imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n']
        imps = imps.groupby(imps.index).sum()

        d_rate = dons/imps

        if d_rate.shape[0] < 3:
            continue

        largest = d_rate.nlargest(2).ix[1]
        d_rate[d_rate > largest] = largest
        d_plot[name] = d_rate

        

    if d_plot.shape[0] < 3:
        print('There is no data for this campaign or this kind of banners')
        return
    
    return plot_df(d_plot, ylabel, interactive = interactive)