def ncr(n, r): # Handle special cases if (n < r) or (r < 0) or (n < 0): return 0 if (n == r) or (r == 0): return 1 # If the parent choices in the parent node is larger than the # maximum sample count, we don't have to be that exact vmax = 0x7FFFFFFF lmax = 9.33 ptotal = vmax if parent is None else parent.total if ptotal >= vmax: # Check the lower bound nCr >= (n/r)^r lmin_val = r * math.log(n / float(r)) if (lmin_val > lmax): return vmax # Use the lnncrr function to compute ncr (faster for large r) if r < 100: return entropy.ncr(n, r) res = entropy.lnncrr(n, r) if res > lmax: return vmax return int(round(math.exp(res)))
fig.savefig("out/sketch_info.pdf", format='pdf', bbox_inches='tight') print("Plotting information per sample...") fig = plt.figure(figsize=figsize) ax = fig.add_subplot(1, 1, 1) ax.plot(xs, info / xs, label="Information per sample", lw=0.75, color="k") ax.plot(xs, n_false_positives_mean, '--', label="False positives per sample", lw=0.75, color="#3465a4") ax.plot(xs, n_false_positives_min, ':', lw=0.25, color="#3465a4") ax.plot(xs, n_false_positives_max, ':', lw=0.25, color="#3465a4") ax.plot([0, n_samples], [n_bits - n_ones, n_bits - n_ones], '--', lw=0.5, color="#3465a4") ax.annotate(s="\\textit{Maximum false positives}", xy=(n_samples * 0.975, n_bits - n_ones), verticalalignment="bottom", horizontalalignment="right", fontsize=8.0) mInfo = entropy.lnncrr(n_bits, n_ones) / math.log(2.0) ax.plot([0, n_samples], [mInfo, mInfo], '-', lw=0.5, color="k") ax.annotate(s="\\textit{Maximum information}", xy=(n_samples * 0.975, mInfo), verticalalignment="bottom", horizontalalignment="right", fontsize=8.0) ax.set_xlim(1, n_samples) ax.set_xlabel("Sample count $N$") ax.set_ylabel("Bits") ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1.05), ncol=2) fig.savefig("out/sketch_info_per_sample.pdf", format='pdf', bbox_inches='tight') print("Plotting errors...") fig = plt.figure(figsize=figsize) ax = fig.add_subplot(1, 1, 1) ax.plot(xs, n_false_positives_mean, label="False positives", color="#3465a4")