def main(): sampleSize = 500 distMatrices = [] for filename in inputs: outfilename = replaceEnforced(filename, "../data/CCS/", "pairwise_earthmover_") np.random.seed(1) print "reading data: {}".format(filename) df = pd.read_csv(filename) indices = np.random.choice([0, 1], sampleSize).astype(bool) df = df.iloc[indices, :] print "computing pairwise distances" wavelengths = map(extractWavelength, df.columns[3:]) metric = lambda y1, y2: earthmover1d(wavelengths, y1, y2) D = smp.pairwise_distances(df.iloc[:, 3:], metric=metric) distMatrices.append(D) print "saving to {}".format(outfilename) pd.DataFrame(D).to_csv(outfilename, index=None, header=None) for i in range(len(inputs) - 1): diff = distMatrices[i] - distMatrices[-1] frobenius = np.linalg.norm(diff, ord="fro") print "diff between {} and {}: {}".format(inputs[i], inputs[-1], frobenius)
def demo(): np.random.seed(1) n = 1000.0 xmin = -2.0 xmax = 3.0 xs = np.linspace(xmin, xmax, n) #ys = np.cos(4 * np.pi * xs) ** 2 #ys = np.random.lognormal(0, 1, len(xs)) * (xs > 0) * (xs < 1); ys /= np.sum(ys) ys = uniform(xs, 0.3, 0.4) #ys = uniform_randomEndpoints(xs) centers = np.array([-1,0,1,2]) m1 = nearestMapping(xs, centers) xs1, ys1 = discretize(xs, ys, m1) #showDiscretization(xs, ys, xs1, ys1, m1) m2 = proportionalMapping(xs, centers) xs2, ys2 = discretize(xs, ys, m2) #showDiscretization(xs, ys, xs2, ys2, m2) for i in range(4096): #testName = "nearest" if i % 2 == 0 else "proportional" #testMapping = m1 if i % 2 == 0 else m2 #testName = "nearest" #testMapping = m1 testName = "proportional" testMapping = m2 p1 = uniform_randomEndpoints(xs, numIntervals = 6) p2 = uniform_randomEndpoints(xs, numIntervals = 6) #if i >= 4: # p1 *= np.random.lognormal(0, 1, len(xs)) # p1 /= np.sum(p1) # if i >= 6: # p2 *= np.random.lognormal(0, 1, len(xs)) # p2 /= np.sum(p2) distance_before = earthmover1d(xs, p1, p2) xs_new, p1_new = discretize(xs, p1, testMapping) xs_new2, p2_new = discretize(xs, p2, testMapping) assert np.array_equal(xs_new, xs_new2) distance_after = earthmover1d(xs_new, p1_new, p2_new) print "{} -> {}".format(distance_before, distance_after) title = "{}: original distance = {:.2f}, discretized distance = {:.2f}".format(testName, distance_before, distance_after) d11_before = earthmover1d(xs, p1, p1) d11_after = earthmover1d(xs_new, p1_new, p1_new) assert d11_before == 0 assert d11_after == 0 epsilon = 1e-6 if distance_after - distance_before > epsilon: title += "; counterexample" fig, axes = plt.subplots(3, 2, sharex='col') a1 = [axes[i][0] for i in range(3)] a2 = [axes[i][1] for i in range(3)] showDiscretization(xs, p1, xs_new, p1_new, testMapping, axes = a1) showDiscretization(xs, p2, xs_new, p2_new, testMapping, axes = a2) fig.suptitle(title) break plt.show()