def plot_cumulative_hist(data, dim, level): h = np.histogram(data, bins=range(int(min(data)), int(max(data)))) utils.plot_wrap((h[1][1:], np.cumsum(h[0])), title='%s cumulative diff by %s' % (dim, level), xlabel='%s_2013 - %s_2012' % (dim, dim), ylabel='cumulative sum of frequencies', filename='%s_diff_cumsum_hist_by_%s.png' % (dim, level))
def plot_pct_vs_cum_valid(votes, level, year): codes_by_valid = sorted(votes, key=lambda code: votes[code]["valid"]) cumsums = cumsum_all(votes, codes_by_valid) tuple_rounded_pct = lambda (n, d): utils.rounded_pct(n, d) cumsum_gov_pct = map(tuple_rounded_pct, zip(cumsums["gov"], cumsums["valid"])) cumsum_cap_pct = map(tuple_rounded_pct, zip(cumsums["cap"], cumsums["valid"])) cumsum_null_pct = map(tuple_rounded_pct, zip(cumsums["null"], cumsums["valid"])) utils.plot_wrap( ( cumsums["valid"], cumsum_gov_pct, "r", cumsums["valid"], cumsum_cap_pct, "b", cumsums["valid"], cumsum_null_pct, "k", ), title="Candidate %% vs cumulative valid votes, by %s" % level, xlabel="Cumulative valid votes (by %s)" % level, ylabel="Candidate % of votes", filename="candidate_pct_vs_cum_valid_by_%s_%d.png" % (level, year), )
def plot_benford_2nd(places, level): digs = range(0, 10) cap2013 = snd_dig_freq(places[1], 'cap') cap2012 = snd_dig_freq(places[0], 'cap') gov2013 = snd_dig_freq(places[1], 'gov') gov2012 = snd_dig_freq(places[0], 'gov') print 'Pearson X^2 statistics for second digit:' print 'cap 2013 at %s: %.2f' % (level, calc_chi(cap2013, [sum(cap2013.values()) * b/100 for b in benford_2nd])) print 'cap 2012 at %s: %.2f' % (level, calc_chi(cap2012, [sum(cap2012.values()) * b/100 for b in benford_2nd])) print 'gov 2013 at %s: %.2f' % (level, calc_chi(gov2013, [sum(gov2013.values()) * b/100 for b in benford_2nd])) print 'gov 2012 at %s: %.2f' % (level, calc_chi(gov2012, [sum(gov2012.values()) * b/100 for b in benford_2nd])) utils.plot_wrap((digs, snd_dig_pct(cap2013), 'bo-', digs, snd_dig_pct(cap2012), 'b.--', digs, snd_dig_pct(gov2013), 'ro-', digs, snd_dig_pct(gov2012), 'r.--', digs, benford_2nd, 'go-'), title='Benford\'s Law for the 2nd digit, by %s' % level, xlabel='Second digit', ylabel='Percentage of vote counts with corresponding 2nd digit (%)', labels=['Capriles 2013', 'Capriles 2012', 'Maduro 2013', 'Chavez 2012', 'Benford'], filename='benford_2nd_all_%s.png' % level)
def plot_benford_1st(places, level): digs = range(1, 10) cap2013 = fst_dig_freq(places[1], 'cap') cap2012 = fst_dig_freq(places[0], 'cap') gov2013 = fst_dig_freq(places[1], 'gov') gov2012 = fst_dig_freq(places[0], 'gov') print 'Pearson X^2 statistics for first digit:' print 'cap 2013 at %s: %.2f' % (level, calc_chi(cap2013, get_expected(benford_1st, sum(cap2013.values())))) print 'cap 2012 at %s: %.2f' % (level, calc_chi(cap2012, get_expected(benford_1st, sum(cap2012.values())))) print 'gov 2013 at %s: %.2f' % (level, calc_chi(gov2013, get_expected(benford_1st, sum(gov2013.values())))) print 'gov 2012 at %s: %.2f' % (level, calc_chi(gov2012, get_expected(benford_1st, sum(gov2012.values())))) utils.plot_wrap((digs, fst_dig_pct(cap2013), 'bo-', digs, fst_dig_pct(cap2012), 'b.--', digs, fst_dig_pct(gov2013), 'ro-', digs, fst_dig_pct(gov2012), 'r.--', digs, benford_1st.values(), 'go-'), title='Benford\'s Law for the 1st digit, by %s' % level, xlabel='First digit', ylabel='Percentage of vote counts with corresponding 1st digit (%)', labels=['Capriles 2013', 'Capriles 2012', 'Maduro 2013', 'Chavez 2012', 'Benford'], filename='benford_1st_all_%s.png' % level)