def test_l2(): a = get_series('cdr3.test.ann') b = get_series('cdr3.test2.ann') assert l2(a, b) == 23230 * np.sqrt(2) a = get_series('cdr3.test3.ann') b = get_series('cdr3.test4.ann') assert l2(a, b) == math.sqrt(3**2 + 2**2)
def test_jensen_shannon(): # two identical samples gives distance 0 a = get_series('cdr3.test.ann') b = get_series('cdr3.test.ann') assert jensen_shannon(a, b) == 0 # two samples which when normalized are identical also give a distance of 0 # a = <0, 2>, b = <0, 4> # a' = a/|a| = <0, 1>, b' = <0, 1> a = get_series('cdr3.test3.ann') b = get_series('cdr3.test9.ann') assert jensen_shannon(a, b) == 0 # a = <0, 1>, b = <1, 0> # m = (a + b)/2 = <0.5, 0.5> # D(a || m) = 0*log(0/0.5) + 1*log(1/0.5) = 0 + 1*1 = 1 (using log base 2) # D(b || m) = 1 # JSdist(a, b) = sqrt((D(a || m) + D(b || m)) / 2) = sqrt((1 + 1) / 2) = 1 a = get_series('cdr3.test10.ann') b = get_series('cdr3.test11.ann') assert jensen_shannon(a, b) == 1 # make sure that things are aligned by index a = get_series('cdr3.test10.ann') b = get_series('cdr3.test13.ann') assert jensen_shannon(a, b) == 1 # check that a fill value of 0 is used for missing cdr3s a = get_series('cdr3.test10.ann') b = get_series('cdr3.test12.ann') assert jensen_shannon(a, b) == 1
def test_jaccard_index(): a = get_series('cdr3.test3.ann') b = get_series('cdr3.test4.ann') dist = jaccard_index(a, b) assert dist == 1 / 2 a = get_series('cdr3.test3.ann') b = get_series('cdr3.test5.ann') dist = jaccard_index(a, b) assert dist == 1 / 3
def test_min(): a = get_series('cdr3.test6.ann') b = get_series('cdr3.test7.ann') df = pd.DataFrame({'a': a, 'b': b}).fillna(0) min_ = df.apply(min, axis=1) assert len(min_) == 5 assert min_['a'] == 1 assert min_['b'] == 3 assert min_['c'] == 0 assert min_['d'] == 0 assert min_['e'] == 7
def test_add(): # with .add() method a = get_series('cdr3.test.ann') b = get_series('cdr3.test2.ann') c = a.add(b, fill_value=0) correct_c = c # add any number of series a = get_series('cdr3.test.ann') b = get_series('cdr3.test2.ann') c = functools.reduce(lambda a,b: a.add(b, fill_value=0), [a,b]) assert c.equals(correct_c)
def test_get_pairwise_distances(): # no vectors assert get_pairwise_distances([], jaccard) == [] # 1 vec a = get_series('cdr3.test3.ann') assert get_pairwise_distances([a], jaccard) == [] # 2 vecs a = get_series('cdr3.test3.ann') assert get_pairwise_distances([a, a], jaccard) == [0] # 3 vecs a = get_series('cdr3.test3.ann') b = get_series('cdr3.test4.ann') assert get_pairwise_distances([a, a, b], lp(1)) == [0, 5, 5]
def test_lp(): # p=1 a = get_series('cdr3.test3.ann') b = get_series('cdr3.test4.ann') assert lp(1)(a, b) == 3 + 2 # p=2 a = get_series('cdr3.test.ann') b = get_series('cdr3.test2.ann') assert lp(2)(a, b) == 23230 * np.sqrt(2) a = get_series('cdr3.test3.ann') b = get_series('cdr3.test4.ann') assert lp(2)(a, b) == math.sqrt(3**2 + 2**2) # p=3 a = get_series('cdr3.test3.ann') b = get_series('cdr3.test4.ann') assert lp(3)(a, b) == (3**3 + 2**3)**(1 / 3)
def test_linfty(): # two identical samples gives distance 0 a = get_series('cdr3.test.ann') b = get_series('cdr3.test.ann') assert linfty(a, b) == 0 # between two lines, the first one has the max diff a = get_series('cdr3.test3.ann') b = get_series('cdr3.test4.ann') assert linfty(a, b) == 3 # a letter present in one sample and missing in the other has the max diff a = get_series('cdr3.test6.ann') b = get_series('cdr3.test7.ann') assert linfty(a, b) == 6 # the max diff isn't from the first line in the file a = get_series('cdr3.test7.ann') b = get_series('cdr3.test8.ann') assert linfty(a, b) == 1
def test_make_series_compatible(): a = get_series('cdr3.test4.ann') b = get_series('cdr3.test5.ann') a,b = make_series_compatible([a, b])
def test_init(): get_series('cdr3.a.A_2000_2001_d_00_47407.ann')
def test_l2_distance(): a = get_series('cdr3.test.ann') b = get_series('cdr3.test2.ann') c = np.sqrt((a.subtract(b, fill_value=0)**2).sum())
def test_weighted_jaccard_index(): a = get_series('cdr3.test6.ann') b = get_series('cdr3.test7.ann') index = weighted_jaccard_index(a, b) assert index == 11 / 24