def plot_equalize(): X = array(sum([range(110, 121)] * 5 + [range(130, 141)] * 7, [])) Xc = cumsum(histogram(X, bins=30)[0]) Y = equalize(X) Yc = cumsum(histogram(Y, bins=30)[0]) fig, ax = subplots() ax.plot(Xc, c='b') ax.plot(Yc, c='r') show()
def plot_equalize(): X = array(sum([range(110, 121)] * 5 + [range(130, 141)] * 7, [])) Xc = cumsum(histogram(X, bins = 30)[0]) Y = equalize(X) Yc = cumsum(histogram(Y, bins = 30)[0]) fig, ax = subplots() ax.plot(Xc, c = 'b') ax.plot(Yc, c = 'r') show()
def test_equalize(): """ Besides basic properties, does a linearity test (that may not be mathematically sound) """ X = array(sum([range(10, 21)] * 15 + [range(130, 141)] * 4, [])) Y = equalize(X, 255) assert Y.min() == 0 assert Y.max() == 255 C = cumsum(histogram(Y, bins = 30)[0]) L = linspace(0, X.shape[0], 30) err = sqrt(sum((L - C)**2)) / X.shape[0] assert err < 0.5, 'equalized cdf appears to be not be linear ({0:.2f} rmse from linearity)'.format(err)
def test_equalize(): """ Besides basic properties, does a linearity test (that may not be mathematically sound) """ X = array(sum([range(10, 21)] * 15 + [range(130, 141)] * 4, [])) Y = equalize(X, 255) assert Y.min() == 0 assert Y.max() == 255 C = cumsum(histogram(Y, bins=30)[0]) L = linspace(0, X.shape[0], 30) err = sqrt(sum((L - C)**2)) / X.shape[0] assert err < 0.5, 'equalized cdf appears to be not be linear ({0:.2f} rmse from linearity)'.format( err)
if 'shift' in fmt: print 'shifting by {0}'.format(fmt['shift']) column += fmt['shift'] if 'cut_gt' in fmt and 'cut_to' in fmt: print 'cutting > {0:f} to {1:f} for {2:d}'.format( fmt['cut_gt'], fmt['cut_to'], colnr) cut_cnt += (column > fmt['cut_gt']).sum() column[column > fmt['cut_gt']] = fmt['cut_to'] if 'cut_lt' in fmt and 'cut_to' in fmt: print 'cutting < {0:f} to {1:f} for {2:d}'.format( fmt['cut_lt'], fmt['cut_to'], colnr) cut_cnt += (column < fmt['cut_lt']).sum() column[column < fmt['cut_lt']] = fmt['cut_to'] if 'equalize' in fmt and fmt['equalize']: print 'equalizing histogram' column = equalize(column) ax_after.hist(column, facecolor='blue', bins=30) ax_after_cdf.hist(column, cumulative=True, facecolor='green', bins=30) ax_frac.pie([ column.shape[0] - columnnonnum.shape[0] - cut_cnt, columnnonnum.shape[0], cut_cnt ], labels=['normal', 'NaN', 'cut']) print 'frac', column.shape[0], [ column.shape[0] - columnnonnum.shape[0] - cut_cnt, columnnonnum.shape[0], cut_cnt ] show(block=False) while True: if 'cut_gt' in fmt: print 'column {0:d} already has a cutoff at {1:d}'.format(
else: fmt = copy(format['default']) if 'shift' in fmt: print 'shifting by {0}'.format(fmt['shift']) column += fmt['shift'] if 'cut_gt' in fmt and 'cut_to' in fmt: print 'cutting > {0:f} to {1:f} for {2:d}'.format(fmt['cut_gt'], fmt['cut_to'], colnr) cut_cnt += (column > fmt['cut_gt']).sum() column[column > fmt['cut_gt']] = fmt['cut_to'] if 'cut_lt' in fmt and 'cut_to' in fmt: print 'cutting < {0:f} to {1:f} for {2:d}'.format(fmt['cut_lt'], fmt['cut_to'], colnr) cut_cnt += (column < fmt['cut_lt']).sum() column[column < fmt['cut_lt']] = fmt['cut_to'] if 'equalize' in fmt and fmt['equalize']: print 'equalizing histogram' column = equalize(column) ax_after.hist(column, facecolor = 'blue', bins = 30) ax_after_cdf.hist(column, cumulative = True, facecolor = 'green', bins = 30) ax_frac.pie([column.shape[0] - columnnonnum.shape[0] - cut_cnt, columnnonnum.shape[0], cut_cnt], labels = ['normal', 'NaN', 'cut']) print 'frac', column.shape[0], [column.shape[0] - columnnonnum.shape[0] - cut_cnt, columnnonnum.shape[0], cut_cnt] show(block = False) while True: if 'cut_gt' in fmt: print 'column {0:d} already has a cutoff at {1:d}'.format(colnr, fmt['cut_gt']) close() break cut = raw_input('column {0:d} cutoff point? '.format(colnr)) if cut.strip() == '': print 'no cutoff' break try: