def lc005(type_, country): ''' report statistics matching (type_,country) by ad, location, and whole set, then chart ads colored by location, styled by type_ Prints a line for all ads of this type and country styled by location; preceded by a report. ''' kxyl = sorted([(ad.id, k.snap, ad.diff_spotusd, k.location.split('/')[1][:30]) for k, ads in pkl.items() for ad in ads if k.type_ == type_ and ad.diff_spotusd != None and ad.country == country]) print("\n%s:%s stats per ad" % (type_, country)) keys = sorted(set([k for k, x, y, l in kxyl])) for key in keys: answer = describe([y for k, x, y, l in kxyl if k == key], only=('mean', 'stddev')) if answer: print('ad:%s ' % key + describe_str(answer)) print("\n%s:%s stats per location" % (type_, country)) labels = sorted(set([l for k, x, y, l in kxyl])) for label in labels: print('label:%s ' % label + describe_str(describe([y for k, x, y, l in kxyl if l == label]))) print print(describe_str(describe([y for k, x, y, l in kxyl]))) # create a lines chart title = 'ads of type:%s in country:%s' % (type_, country) xlabel, ylabel = '15m samples', 'difference from spot usd price' lines_chart(kxyl, xlabel, ylabel, title)
def pdfapprox(samples): """Return a function that approximates the pdf of a set of samples using a Gaussian expansion computed from the mean, variance, skewness and Fisher's kurtosis. """ # Estimate mean, variance, skewness and kurtosis mu,sig,sk,kur = stats.describe(samples)[2:] # Get central moments cnt = [None]*4 cnt[0] = mu cnt[1] = sig*sig cnt[2] = sk * sig**1.5 cnt[3] = (kur+3.0) * sig**2.0 return pdf_moments(cnt)
def lc008(sortreversed): ''' report and chart most severe stddev diff_spotusd by trader if True then most else least Each trader will have a line based on the standard deviation of their historic above-spotusd prices (for both bids and asks). A greater standard deviation means that their markup varied throughout their history. A lesser standard deviation means they ran a consistent markup over time... It says nothing about how much their markup was, just weather it varied significantly or not. Depending on how this recipe is called, it will answer for the the traders with the greatest (most erratic) or least (most consistent) markup over time. ''' if sortreversed: label = 'most' else: label = 'least' temp = sorted([((k.type_, ad.user), k.snap, ad.diff_spotusd) for k, ads in pkl.items() for ad in ads if ad.diff_spotusd != None]) keys = sorted(set(x[0] for x in temp)) kstats = {} for key in keys: kstats[key] = describe([y for k, x, y in temp if k == key], only=('mean', 'stddev')) print('40 traders with %s erratic diff_spotusd:' % label) kvals = sorted([(k, v['mean'], v['stddev']) for k, v in kstats.items() if v != None], key=itemgetter(2), reverse=sortreversed)[:40] for kval in kvals: print('%s %-30s %15.2f mean %15.2f stddev' % (kval[0][0], kval[0][1][:30], kval[1], kval[2])) kxyl = sorted([ (ad.id, k.snap, ad.diff_spotusd, '%s %s %s' % (k.type_, ad.user, ad.country)) for k, ads in pkl.items() for ad in ads if (k.type_, ad.user) in [x[0] for x in kvals] and ad.diff_spotusd != None ]) title = 'ads from 40 %s erratic diff_spotusd traders' % label xlabel, ylabel = '15m samples', 'difference from spot usd price' lines_chart(kxyl, xlabel, ylabel, title)
def lc007(sortreversed): ''' report and chart most severe mean diff_spotusd by trader if True then highest else lowest Each trader will have similarly-styled-lines for each of their ads and the report will show the traders with the greatest or least mean based on their historic above-spotusd prices (for both bids and asks). A higher mean means that their markup was likely very pricey. A lower mean means that their markup was closer to spot. A negative mean indicates that they are advertising at prices which result in loss and may hint that the trader is practicing bait-and-switch. ''' if sortreversed: label = 'highest' else: label = 'lowest' temp = sorted([((k.type_, ad.user), k.snap, ad.diff_spotusd) for k, ads in pkl.items() for ad in ads if ad.diff_spotusd != None]) keys = sorted(set(x[0] for x in temp)) kstats = {} for key in keys: kstats[key] = describe([y for k, x, y in temp if k == key], only=('mean', 'stddev')) print('40 traders with %s diff_spotusd:' % label) kvals = sorted([(k, v['mean'], v['stddev']) for k, v in kstats.items() if v != None], key=itemgetter(1), reverse=sortreversed)[:40] for kval in kvals: print('%s %-30s %15.2f mean %15.2f stddev' % (kval[0][0], kval[0][1][:30], kval[1], kval[2])) kxyl = sorted([ (ad.id, k.snap, ad.diff_spotusd, '%s %s %s' % (k.type_, ad.user, ad.country)) for k, ads in pkl.items() for ad in ads if (k.type_, ad.user) in [x[0] for x in kvals] and ad.diff_spotusd != None ]) title = 'ads from 40 %s diff_spotusd traders' % label xlabel, ylabel = '15m samples', 'difference from spot usd price' lines_chart(kxyl, xlabel, ylabel, title)
def lc009(type_, stat): ''' for ads of (type_), chart (stat) of diff_spotusd by location type_ in ('a','b'), stat in ('min','max','mean','median','stddev') Each location will have a single line based on the average above-spotusd-markup for all ads of this type at that location. ''' keys = sorted(set(k for k in pkl.keys() if k.type_ == type_)) kstats = {} for key in keys: astat = describe( [ad.diff_spotusd for ad in pkl[key] if ad.diff_spotusd != None], only=[stat]) if astat: kstats[key] = astat[stat] kxyl = sorted([('%s %s' % (k.type_, k.loc), k.snap, kstats[k], '%s %s' % (k.type_, k.location.split('/')[1][:30])) for k, y in kstats.items()]) title = '%s diff_spotusd of type:%s by location' % (stat, type_) xlabel, ylabel = '15m samples', 'difference from spot usd price' lines_chart(kxyl, xlabel, ylabel, title)
def lc006(type_): ''' chart mean and stddev for ads of (type_) by location Prints two lines per location (mean and stddev) for ads of this type, ''' ## below kept as an example of how NOT to access pkl #temp = sorted([(k.location.split('/')[1][:30], k.snap, ad.diff_spotusd) # for k, ads in pkl.items() for ad in ads # if k.type_==type_ and ad.diff_spotusd != None]) #keys = sorted(set([x[0] for x in temp])) #xs = sorted(set([x[1] for x in temp])) #kxyl = [] #for snap in xs: # print(snap) # for key in keys: # ystats = describe([y for k,x,y in temp if x==snap and k==key], # only=['mean','stddev']) # if not ystats: continue # kxyl.append(('%s mean' % key, snap, ystats['mean'], '%s mean' % key)) # kxyl.append(('%s stddev' % key, snap, ystats['stddev'], '%s stddev' % key)) # print(' %-37s, %15.2f, %15.2f' % (key, ystats['mean'], ystats['stddev'])) temp = [(k, describe([ad.diff_spotusd for ad in ads], only=('mean', 'stddev'))) for k, ads in pkl.items() if k.type_ == type_] kxyl = sorted([('m%d' % k.loc, k.snap, v['mean'], '%s mean' % k.location.split('/')[1][:30]) for k, v in temp if v != None]) kxyl.extend( sorted([('s%d' % k.loc, k.snap, v['stddev'], '%s stddev' % k.location.split('/')[1][:30]) for k, v in temp if v != None])) title = 'stddev, mean of diff_spotusd for all ads of type:%s by location' % type_ xlabel, ylabel = '15m samples', 'stddev, mean of diff_spotusd' lines_chart(sorted(kxyl), xlabel, ylabel, title, stylecnt=2, verbose=verbose)
print('harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)) print('mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)) print('median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af)) print('medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af)) print('mode:',stats.mode(l),stats.mode(a)) print('\nMOMENTS') print('moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af)) print('variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af)) print('skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af)) print('kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af)) print('mean:',stats.mean(a),stats.mean(af)) print('var:',stats.var(a),stats.var(af)) print('stdev:',stats.stdev(a),stats.stdev(af)) print('sem:',stats.sem(a),stats.sem(af)) print('describe:') print(stats.describe(l)) print(stats.describe(lf)) print(stats.describe(a)) print(stats.describe(af)) print('\nFREQUENCY') print('freqtable:') print('itemfreq:') print(stats.itemfreq(l)) print(stats.itemfreq(a)) print('scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40)) print('percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12)) print('histogram:',stats.histogram(l),stats.histogram(a)) print('cumfreq:') print(stats.cumfreq(l)) print(stats.cumfreq(lf)) print(stats.cumfreq(a))
print 'mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af) print 'median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af) print 'medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af) print 'mode:',stats.mode(l),stats.mode(a) print '\nMOMENTS' print 'moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af) print 'variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af) print 'skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af) print 'kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af) print 'tmean:',stats.tmean(a,(5,17)),stats.tmean(af,(5,17)) print 'tvar:',stats.tvar(a,(5,17)),stats.tvar(af,(5,17)) print 'tstdev:',stats.tstdev(a,(5,17)),stats.tstdev(af,(5,17)) print 'tsem:',stats.tsem(a,(5,17)),stats.tsem(af,(5,17)) print 'describe:' print stats.describe(l) print stats.describe(lf) print stats.describe(a) print stats.describe(af) print '\nFREQUENCY' print 'freqtable:' print 'itemfreq:' print stats.itemfreq(l) print stats.itemfreq(a) print 'scoreatpercentile:',stats.scoreatpercentile(l,40),stats.scoreatpercentile(lf,40),stats.scoreatpercentile(a,40),stats.scoreatpercentile(af,40) print 'percentileofscore:',stats.percentileofscore(l,12),stats.percentileofscore(lf,12),stats.percentileofscore(a,12),stats.percentileofscore(af,12) print 'histogram:',stats.histogram(l),stats.histogram(a) print 'cumfreq:' print stats.cumfreq(l) print stats.cumfreq(lf)
print 'mode:', stats.mode(l), stats.mode(a) print '\nMOMENTS' print 'moment:', stats.moment(l), stats.moment(lf), stats.moment( a), stats.moment(af) print 'variation:', stats.variation(l), stats.variation(a), stats.variation( lf), stats.variation(af) print 'skew:', stats.skew(l), stats.skew(lf), stats.skew(a), stats.skew(af) print 'kurtosis:', stats.kurtosis(l), stats.kurtosis(lf), stats.kurtosis( a), stats.kurtosis(af) print 'mean:', stats.mean(a), stats.mean(af) print 'var:', stats.var(a), stats.var(af) print 'stdev:', stats.stdev(a), stats.stdev(af) print 'sem:', stats.sem(a), stats.sem(af) print 'describe:' print stats.describe(l) print stats.describe(lf) print stats.describe(a) print stats.describe(af) print '\nFREQUENCY' print 'freqtable:' print 'itemfreq:' print stats.itemfreq(l) print stats.itemfreq(a) print 'scoreatpercentile:', stats.scoreatpercentile( l, 40), stats.scoreatpercentile(lf, 40), stats.scoreatpercentile( a, 40), stats.scoreatpercentile(af, 40) print 'percentileofscore:', stats.percentileofscore( l, 12), stats.percentileofscore(lf, 12), stats.percentileofscore( a, 12), stats.percentileofscore(af, 12)