def plot_pv_fraction_in_campaign(pv, access_method, counts, sample_rate, hours = 1): # get pvs per hours for source pv = copy.copy(pv[pv['access_method'] == access_method]) pv.index = pd.Series(pv.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) pv = pv.groupby(pv.index).sum() pv['pageviews_in_campaign'] = get_counts_df(counts, hours, sample_rate).sum(axis=1) pv['fraction_of_pageviews_in_campaign'] = pv['pageviews_in_campaign'] / pv['pageviews'] plot_df(pv[['fraction_of_pageviews_in_campaign']], 'fraction per %d hours' % hours) return pv
def plot_central_tendency(d, regs, start='2000', stop='2050', hours=1, index=None, ylabel='dollars per donation', title='', method='mean', amount_limit=10000.0): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): amounts = d.ix[d.name.str.match(reg).apply(bool)]['amount'] amounts[amounts > amount_limit] = amount_limit if method == 'mean': d_plot[name] = amounts.groupby( amounts.index).sum() / amounts.groupby(amounts.index).count() else: d_plot[name] = amounts.groupby(amounts.index).median() return plot_df(d_plot, ylabel=ylabel, title=title, interactive=False)
def plot_by_time(d, regs, start = '2000', stop = '2050', hours = 1, amount = False, cum = False, normalize = False, ylabel = '', interactive = False, index = None, rotate=False): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): if amount: counts = d.ix[d.name.str.match(reg).apply(bool)]['amount'] else: counts = d.ix[d.name.str.match(reg).apply(bool)]['n'] if normalize: counts = counts/counts.sum() if cum: d_plot[name] = counts.groupby(counts.index).sum().cumsum() else: d_plot[name] = counts.groupby(counts.index).sum() d_plot = d_plot.fillna(0) #d_plot.plot(figsize=(10, 4)) return plot_df(d_plot, ylabel, interactive = interactive, rotate=rotate)
def plot_by_impressions_seen(d, regs, normalize=True, max_impressions=5, amount=False, interactive=False, ylabel=''): d_plot = pd.DataFrame() for name, reg in sorted(regs.items()): if amount: counts = d[d.name.str.contains(reg)]['amount'] else: counts = d[d.name.str.contains(reg)]['n'] counts = counts.groupby(counts.index).sum() if normalize: counts = counts / counts.sum() counts1 = counts.loc[:max_impressions] counts1.loc[max_impressions + 1] = counts[max_impressions:].sum() d_plot[name] = counts1 return plot_df(d_plot, xlabel='impressions seen', ylabel=ylabel, interactive=interactive)
def plot_rate_by_time(don, imp, regs, hours=1, start='2000', stop='2050', ylabel='donation rate', interactive=False, index=None): don = don[start:stop] imp = imp[start:stop] if index is None: don.index = pd.Series(don.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) imp.index = pd.Series(imp.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: don.index = don[index] imp.index = imp[index] d_plot = pd.DataFrame() for name, reg in regs.items(): dons = don.ix[don.name.str.match(reg).apply(bool)]['n'] dons = dons.groupby(dons.index).sum() imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n'] imps = imps.groupby(imps.index).sum() d_plot[name] = dons / imps #d_plot.plot(figsize=(10, 4)) return plot_df(d_plot, ylabel, interactive=interactive)
def plot_central_tendency( d, regs, start="2000", stop="2050", hours=1, index=None, ylabel="dollars per donation", title="", method="mean", amount_limit=10000.0, ): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): amounts = d.ix[d.name.str.match(reg).apply(bool)]["amount"] amounts[amounts > amount_limit] = amount_limit if method == "mean": d_plot[name] = amounts.groupby(amounts.index).sum() / amounts.groupby(amounts.index).count() else: d_plot[name] = amounts.groupby(amounts.index).median() return plot_df(d_plot, ylabel=ylabel, title=title, interactive=False)
def plot_by_impressions_seen(d, regs, normalize = True, max_impressions = 5, amount = False, interactive = False, ylabel = '' ): d_plot = pd.DataFrame() for name, reg in sorted(regs.items()): if amount: counts = d[d.name.str.contains(reg)]['amount'] else: counts = d[d.name.str.contains(reg)]['n'] counts = counts.groupby(counts.index).sum() if normalize: counts = counts / counts.sum() counts1 = counts.loc[:max_impressions] counts1.loc[max_impressions+1] = counts[max_impressions:].sum() d_plot[name] = counts1 return plot_df(d_plot, xlabel = 'impressions seen', ylabel=ylabel, interactive = interactive)
def plot_conversion_rate(d, regs, start = '2000', stop = '2050', hours = 1, index = None, ylabel = 'conversion_rate',title= ''): if d.shape[0] == 0: print ('No Conversion rate data for this device') return d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): clicks = d.ix[d.name.str.match(reg).apply(bool)] if clicks.shape[0] == 0: continue for method in methods: clicks_by_method = clicks[clicks['payment_method'] == method] if clicks_by_method.shape[0] == 0: continue donations = clicks_by_method[clicks_by_method['donation'] == 1]['n'] if donations.shape[0] == 0: continue donations = donations.groupby(donations.index).sum() clicks_by_method = clicks_by_method.groupby(clicks_by_method.index)['n'].sum() d_plot[name+' '+ method] = donations / clicks_by_method if d_plot.shape[0] < 3: print ('No Conversion rate data for this device') return return plot_df(d_plot, ylabel=ylabel, title=title, interactive = False)
def plot_donation_rate(don, imp, regs, cum = False, max_impressions=5, amount = False, interactive = False, ylabel = '', ): d_plot = pd.DataFrame() for name, reg in sorted(regs.items()): if amount: donations = don.ix[don.name.str.contains(reg)]['amount'] else: donations = don.ix[don.name.str.contains(reg)]['n'] donations = donations.groupby(donations.index).sum() donations1 = donations.loc[:max_impressions] donations1.loc[max_impressions+1] = donations[max_impressions:].sum() impressions = imp.ix[imp.name.str.contains(reg)]['n'] impressions = impressions.groupby(impressions.index).sum() impressions1 = impressions.loc[:max_impressions] impressions1.loc[max_impressions+1] = impressions[max_impressions:].sum() if cum: impressions1 = impressions1.cumsum() donations1 = donations1.cumsum() d_plot[name] = donations1 / impressions1 return plot_df(d_plot, xlabel = 'impressions seen', ylabel=ylabel, interactive=interactive)
def plot_conversion_rate(d, regs, start='2000', stop='2050', hours=1, index=None, ylabel='conversion_rate', title='', methods=['amazon', 'paypal', 'cc']): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): clicks = d.ix[d.name.str.match(reg).apply(bool)] for method in methods: clicks_by_method = clicks[clicks['payment_method'] == method] donations = clicks_by_method[clicks_by_method['donation'] == 1]['n'] donations = donations.groupby(donations.index).sum() clicks_by_method = clicks_by_method.groupby( clicks_by_method.index)['n'].sum() #print clicks_by_method.head() #print donations.head() d_plot[name + ' ' + method] = donations / clicks_by_method return plot_df(d_plot, ylabel=ylabel, title=title, interactive=False)
def plot_by_time(d, regs, start='2000', stop='2050', hours=1, amount=False, cum=False, normalize=False, ylabel='', interactive=False, index=None, rotate=False): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): if amount: counts = d.ix[d.name.str.match(reg).apply(bool)]['dollars'] else: counts = d.ix[d.name.str.match(reg).apply(bool)]['n'] if normalize: counts = counts / counts.sum() if cum: d_plot[name] = counts.groupby(counts.index).sum().cumsum() else: d_plot[name] = counts.groupby(counts.index).sum() if d_plot[name].shape[0] < 3: del d_plot[name] if d_plot.shape[0] < 3: print('There is no data for this campaign or this kind of banners') return d_plot = d_plot.fillna(0) plot_df(d_plot, ylabel, interactive=interactive, rotate=rotate)
def plot_pv_fraction_in_campaign(pv, access_method, counts, sample_rate, hours=1): # get pvs per hours for source pv = copy.copy(pv[pv['access_method'] == access_method]) pv.index = pd.Series(pv.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) pv = pv.groupby(pv.index).sum() pv['pageviews_in_campaign'] = get_counts_df(counts, hours, sample_rate).sum(axis=1) pv['fraction_of_pageviews_in_campaign'] = pv['pageviews_in_campaign'] / pv[ 'pageviews'] plot_df(pv[['fraction_of_pageviews_in_campaign']], 'fraction per %d hours' % hours) return pv
def plot_traffic(pv, imp, source, reg, start, stop, hours = 1): pv_s = copy.copy(pv[pv['access_method'] == source]) pv_s.index = pd.Series(pv_s.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) pv_s = pv_s.groupby(pv_s.index).sum() pv_s.rename(columns={'pageviews': source + ' pageviews'}, inplace=True) imp_s = copy.copy(imp.ix[imp.name.str.match(reg).apply(bool)][['n']]) imp_s.index = pd.Series(imp_s.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) imp_s = imp_s.groupby(imp_s.index).sum() imp_s.rename(columns={'n': source + ' impressions'}, inplace=True) d = pv_s.merge(imp_s, how = 'left', left_index = True, right_index = True)[[source + ' pageviews', source + ' impressions']] return plot_df(d, 'count per %d hours' % hours)
def plot_by_time(d, regs, start = '2000', stop = '2050', hours = 1, amount = False, cum = False, normalize = False, ylabel = '', interactive = False, index = None, rotate=False): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): if amount: counts = d.ix[d.name.str.match(reg).apply(bool)]['dollars'] else: counts = d.ix[d.name.str.match(reg).apply(bool)]['n'] if normalize: counts = counts/counts.sum() if cum: d_plot[name] = counts.groupby(counts.index).sum().cumsum() else: d_plot[name] = counts.groupby(counts.index).sum() if d_plot[name].shape[0] < 3: del d_plot[name] if d_plot.shape[0] < 3: print('There is no data for this campaign or this kind of banners') return d_plot = d_plot.fillna(0) plot_df(d_plot, ylabel, interactive = interactive, rotate=rotate)
def plot_traffic(pv, imp, source, reg, start, stop, hours=1): pv_s = copy.copy(pv[pv['access_method'] == source]) pv_s.index = pd.Series(pv_s.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) pv_s = pv_s.groupby(pv_s.index).sum() pv_s.rename(columns={'pageviews': source + ' pageviews'}, inplace=True) imp_s = copy.copy(imp.ix[imp.name.str.match(reg).apply(bool)][['n']]) imp_s.index = pd.Series(imp_s.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) imp_s = imp_s.groupby(imp_s.index).sum() imp_s.rename(columns={'n': source + ' impressions'}, inplace=True) d = pv_s.merge(imp_s, how='left', left_index=True, right_index=True)[[ source + ' pageviews', source + ' impressions' ]] return plot_df(d, 'count per %d hours' % hours)
def plot_rate_by_time(don, imp, regs, hours=1, start='2000', stop='2050', ylabel='donation rate', interactive=False, index=None): don = don[start:stop] imp = imp[start:stop] if index is None: don.index = pd.Series(don.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) imp.index = pd.Series(imp.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: don.index = don[index] imp.index = imp[index] d_plot = pd.DataFrame() for name, reg in regs.items(): dons = don.ix[don.name.str.match(reg).apply(bool)]['n'] dons = dons.groupby(dons.index).sum() imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n'] imps = imps.groupby(imps.index).sum() d_rate = dons / imps if d_rate.shape[0] < 3: continue largest = d_rate.nlargest(2).ix[1] d_rate[d_rate > largest] = largest d_plot[name] = d_rate if d_plot.shape[0] < 3: print('There is no data for this campaign or this kind of banners') return return plot_df(d_plot, ylabel, interactive=interactive)
def plot_conversion_rate(d, regs, start = '2000', stop = '2050', hours = 1, index = None, ylabel = 'conversion_rate',title= '', methods = ['amazon', 'paypal', 'cc']): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): clicks = d.ix[d.name.str.match(reg).apply(bool)] for method in methods: clicks_by_method = clicks[clicks['payment_method'] == method] donations = clicks_by_method[clicks_by_method['donation'] == 1]['n'] donations = donations.groupby(donations.index).sum() clicks_by_method = clicks_by_method.groupby(clicks_by_method.index)['n'].sum() #print clicks_by_method.head() #print donations.head() d_plot[name+' '+ method] = donations / clicks_by_method return plot_df(d_plot, ylabel=ylabel, title=title, interactive = False)
def plot_donation_rate( don, imp, regs, cum=False, max_impressions=5, amount=False, interactive=False, ylabel='', ): d_plot = pd.DataFrame() for name, reg in sorted(regs.items()): if amount: donations = don.ix[don.name.str.contains(reg)]['amount'] else: donations = don.ix[don.name.str.contains(reg)]['n'] donations = donations.groupby(donations.index).sum() donations1 = donations.loc[:max_impressions] donations1.loc[max_impressions + 1] = donations[max_impressions:].sum() impressions = imp.ix[imp.name.str.contains(reg)]['n'] impressions = impressions.groupby(impressions.index).sum() impressions1 = impressions.loc[:max_impressions] impressions1.loc[max_impressions + 1] = impressions[max_impressions:].sum() if cum: impressions1 = impressions1.cumsum() donations1 = donations1.cumsum() d_plot[name] = donations1 / impressions1 return plot_df(d_plot, xlabel='impressions seen', ylabel=ylabel, interactive=interactive)
def plot_by_time(d, regs, start='2000', stop='2050', hours=1, amount=False, cum=False, normalize=False, ylabel='', interactive=False, index=None, rotate=False): d = d[start:stop] if index is None: d.index = pd.Series(d.index).apply( lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: d.index = d[index] d_plot = pd.DataFrame() for name, reg in regs.items(): if amount: counts = d.ix[d.name.str.match(reg).apply(bool)]['amount'] else: counts = d.ix[d.name.str.match(reg).apply(bool)]['n'] if normalize: counts = counts / counts.sum() if cum: d_plot[name] = counts.groupby(counts.index).sum().cumsum() else: d_plot[name] = counts.groupby(counts.index).sum() d_plot = d_plot.fillna(0) #d_plot.plot(figsize=(10, 4)) return plot_df(d_plot, ylabel, interactive=interactive, rotate=rotate)
def plot_rate_by_time(don, imp, regs, hours = 1, start = '2000', stop = '2050', ylabel = 'donation rate', interactive = False, index = None): don = don[start:stop] imp = imp[start:stop] if index is None: don.index = pd.Series(don.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) imp.index = pd.Series(imp.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: don.index = don[index] imp.index = imp[index] d_plot = pd.DataFrame() for name, reg in regs.items(): dons = don.ix[don.name.str.match(reg).apply(bool)]['n'] dons = dons.groupby(dons.index).sum() imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n'] imps = imps.groupby(imps.index).sum() d_plot[name] = dons/imps #d_plot.plot(figsize=(10, 4)) return plot_df(d_plot, ylabel, interactive = interactive)
def plot_rate_by_time(don, imp, regs, hours = 1, start = '2000', stop = '2050', ylabel = 'donation rate', interactive = False, index = None): don = don[start:stop] imp = imp[start:stop] if index is None: don.index = pd.Series(don.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) imp.index = pd.Series(imp.index).apply(lambda tm: tm - timedelta(hours=(24 * tm.day + tm.hour) % hours)) else: don.index = don[index] imp.index = imp[index] d_plot = pd.DataFrame() for name, reg in regs.items(): dons = don.ix[don.name.str.match(reg).apply(bool)]['n'] dons = dons.groupby(dons.index).sum() imps = imp.ix[imp.name.str.match(reg).apply(bool)]['n'] imps = imps.groupby(imps.index).sum() d_rate = dons/imps if d_rate.shape[0] < 3: continue largest = d_rate.nlargest(2).ix[1] d_rate[d_rate > largest] = largest d_plot[name] = d_rate if d_plot.shape[0] < 3: print('There is no data for this campaign or this kind of banners') return return plot_df(d_plot, ylabel, interactive = interactive)