def test_canberra_32_2(): for i in range(10): X = random.randn(10, 2).astype(np.float32) Y = X[[0, 1, 2], :] X_indices = random.random_integers(low=0, high=9, size=5).astype(np.intp) assignments, inertia = assign_nearest(X, Y, 'canberra', X_indices=X_indices) cdist = scipy.spatial.distance.cdist(X[X_indices], Y, metric='canberra') ref = cdist.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row == row[0]): assert False
def assign_(xtcf, cf, top, sel, dt=1, outfname=None): if not outfname: outfname = os.path.split(xtcf)[1].split(".")[0] top = md.load_pdb(top) ndx = top.top.select(sel) centerpdbs = md.load(cf, top=top, atom_indices=ndx) xtc = md.load_xtc(xtcf, top=top, atom_indices=ndx, stride=dt) seq, iter0 = libdistance.assign_nearest(xtc, centerpdbs, metric="rmsd") np.savez_compressed(outfname + '.npz', a=seq) return seq
def test_kcenters_8(): X = np.random.RandomState(1).randn(100, 2) X32 = X.astype(np.float32) X64 = X.astype(np.float64) m1 = KCenters(n_clusters=10, random_state=0).fit([X32]) m2 = KCenters(n_clusters=10, random_state=0).fit([X64]) eq(m1.cluster_centers_, m2.cluster_centers_) eq(m1.distances_[0], m2.distances_[0]) eq(m1.labels_[0], m2.labels_[0]) assert np.all(np.logical_not(np.isnan(m1.distances_[0]))) eq(m1.predict([X32])[0], m2.predict([X64])[0]) eq(m1.predict([X32])[0], m1.labels_[0]) eq(float(m1.inertia_), libdistance.assign_nearest(X32, m1.cluster_centers_, "euclidean")[1])
def test_assign_nearest_rmsd_1(): # rmsd assign nearest without X_indices assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd") assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist = np.array([md.rmsd(X_rmsd, Y_rmsd[i], precentered=True) for i in range(len(Y_rmsd))]).T assert cdist.shape == (10, 3) np.testing.assert_array_equal( assignments, cdist.argmin(axis=1)) np.testing.assert_almost_equal( inertia, cdist[np.arange(10), assignments].sum(), decimal=6)
def test_assign_nearest_rmsd_1(): # rmsd assign nearest without X_indices assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd") assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd') assert cdist_rmsd.shape == (10, 3) np.testing.assert_array_equal( assignments, cdist_rmsd.argmin(axis=1)) np.testing.assert_almost_equal( inertia, cdist_rmsd[np.arange(10), assignments].sum(), decimal=6)
def test_assign_nearest_rmsd_1(): # rmsd assign nearest without X_indices assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd") assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist = np.array([ md.rmsd(X_rmsd, Y_rmsd[i], precentered=True) for i in range(len(Y_rmsd)) ]).T assert cdist.shape == (10, 3) np.testing.assert_array_equal(assignments, cdist.argmin(axis=1)) np.testing.assert_almost_equal(inertia, cdist[np.arange(10), assignments].sum(), decimal=6)
def test_assign_nearest_rmsd_2(): # rmsd assign nearest with X_indices assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd", X_indices) assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd') cdist_rmsd = cdist_rmsd[X_indices].astype(np.double) assert cdist_rmsd.shape == (5, 3) np.testing.assert_array_equal( assignments, cdist_rmsd.argmin(axis=1)) np.testing.assert_almost_equal( inertia, cdist_rmsd[np.arange(5), assignments].sum(), decimal=5)
def test_canberra_32_2(): for i in range(10): X = random.randn(10,2).astype(np.float32) Y = X[[0,1,2], :] X_indices = random.random_integers(low=0, high=9, size=5) assignments, inertia = assign_nearest(X, Y, 'canberra', X_indices=X_indices) cdist = scipy.spatial.distance.cdist(X[X_indices], Y, metric='canberra') ref = cdist.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row==row[0]): assert False
def test_assign_nearest_float_double_2(): # test with X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): if metric == 'canberra' and X.dtype == np.float32: # this is tested separately continue assignments, inertia = assign_nearest(X, Y, metric, X_indices) assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist = scipy.spatial.distance.cdist(X[X_indices], Y, metric=metric) yield lambda: np.testing.assert_array_equal( assignments, cdist.argmin(axis=1)) yield lambda: np.testing.assert_almost_equal( inertia, cdist[np.arange(5), assignments].sum(), decimal=5 if X.dtype == np.float32 else 10)
def test_assign_nearest_float_double_2(): # test with X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): if metric == 'canberra' and X.dtype == np.float32: # this is tested separately continue assignments, inertia = assign_nearest(X, Y, metric, X_indices) assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist_1 = cdist(X[X_indices], Y, metric=metric) yield lambda: np.testing.assert_array_equal( assignments, cdist_1.argmin(axis=1)) yield lambda: np.testing.assert_almost_equal( inertia, cdist_1[np.arange(5), assignments].sum(), decimal=5 if X.dtype == np.float32 else 10)
def test_canberra_32_1(): # with canberra in float32, there is a rounding issue where many of # the distances come out exactly the same, but due to finite floating # point resolution, a different one gets picked than by argmin() # on the cdist for i in range(10): X = random.randn(10,2).astype(np.float32) Y = X[[0,1,2], :] assignments, inertia = assign_nearest(X, Y, 'canberra') cdist = scipy.spatial.distance.cdist(X, Y, metric='canberra') ref = cdist.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row==row[0]): assert False
def test_assign_nearest_double_float_1(): # test without X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): if metric == 'canberra' and X.dtype == np.float32: # this is tested separately continue assignments, inertia = assign_nearest(X, Y, metric) assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist = scipy.spatial.distance.cdist(X, Y, metric=metric) assert cdist.shape == (10, 3) f = lambda: np.testing.assert_array_equal(assignments, cdist.argmin(axis=1)) f.description = 'assign_nearest: %s %s' % (metric, X.dtype) yield lambda: np.testing.assert_almost_equal( inertia, cdist[np.arange(10), assignments].sum(), decimal=5 if X.dtype == np.float32 else 10)
def test_canberra_32_1(): # with canberra in float32, there is a rounding issue where many of # the distances come out exactly the same, but due to finite floating # point resolution, a different one gets picked than by argmin() # on the cdist for i in range(10): X = random.randn(10, 2).astype(np.float32) Y = X[[0, 1, 2], :] assignments, inertia = assign_nearest(X, Y, 'canberra') cdist = scipy.spatial.distance.cdist(X, Y, metric='canberra') ref = cdist.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row == row[0]): assert False
def test_assign_nearest_double_float_1(): # test without X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): if metric == 'canberra' and X.dtype == np.float32: # this is tested separately continue assignments, inertia = assign_nearest(X, Y, metric) assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist_1 = cdist(X, Y, metric=metric) assert cdist_1.shape == (10, 3) f = lambda: np.testing.assert_array_equal( assignments, cdist_1.argmin(axis=1)) f.description = 'assign_nearest: %s %s' % (metric, X.dtype) yield lambda: np.testing.assert_almost_equal( inertia, cdist_1[np.arange(10), assignments].sum(), decimal=5 if X.dtype == np.float32 else 10)