Beispiel #1
0
class TestSimilarityMatrix(object):
    matrix = None

    def setUp(self):
        self.matrix = SimilarityMatrix('data/similarities.h5')

    def tearDown(self):
        self.matrix.close()

    def test_find_1(self):
        result = list(self.matrix.find('2n6i_4FU_frag1', 0.98))

        expected = [('2n6i_4FU_frag2', 1.0), ('2n6i_4FU_frag6', 1.0)]
        assert_array_almost_equal([r[1] for r in result], [r[1] for r in expected], 3)
        eq_([r[0] for r in result], [r[0] for r in result])

    def test_iter_first2(self):
        myiter = iter(self.matrix)

        result = [next(myiter), next(myiter)]

        expected = [('2mlm_2W7_frag1', '2mlm_2W7_frag2', 0.5877), ('2mlm_2W7_frag2', '3wvm_STE_frag1', 0.4633)]
        assert_array_almost_equal([r[2] for r in result], [r[2] for r in expected], 3)
        eq_([(r[0], r[1],) for r in result], [(r[0], r[1],) for r in result])

    def test_iter_last(self):
        myiter = iter(self.matrix)

        result = None
        for row in myiter:
            result = row

        expected = ('3wyl_3KB_frag20', '3wyl_3KB_frag21', 0.999496452277409)
        assert_almost_equal(result[2], expected[2], 5)
        eq_(result[:2], expected[:2])
def test_simmatrix_import_run_ignore_upper_triangle():
    output_fn = tmpname()

    tsv = '''frag_id1	frag_id2	score
2mlm_2W7_frag1	2mlm_2W7_frag1	1.0000000000000000
2mlm_2W7_frag2	2mlm_2W7_frag2	1.0000000000000000
2mlm_2W7_frag1	2mlm_2W7_frag2	0.5877164873731594
2mlm_2W7_frag2	3wvm_STE_frag1	0.4633096818493935
2mlm_2W7_frag2	2mlm_2W7_frag1	0.5877164873731594
3wvm_STE_frag1	2mlm_2W7_frag2	0.4633096818493935
'''
    inputfile = StringIO(tsv)

    try:
        script.simmatrix_import_run(inputfile=inputfile,
                                    inputformat='tsv',
                                    simmatrixfn=output_fn,
                                    fragmentsdb='data/fragments.sqlite',
                                    nrrows=2,
                                    ignore_upper_triangle=True)

        simmatrix = SimilarityMatrix(output_fn)
        result = [r for r in simmatrix]
        simmatrix.close()
        print(result)
        expected = [('2mlm_2W7_frag1', '2mlm_2W7_frag2xx', 0.5877), ('2mlm_2W7_frag2', '3wvm_STE_frag1', 0.4633)]
        assert_array_almost_equal([r[2] for r in result], [r[2] for r in expected], 3)
        assert [(r[0], r[1],) for r in result] == [(r[0], r[1],) for r in result]
    finally:
        if os.path.exists(output_fn):
            os.remove(output_fn)
def test_simmatrix_filter_keep(keep_fragments_db, output_simmatrix_fn):
    script.simmatrix_filter('data/similarities.h5', output_simmatrix_fn, keep_fragments_db, None)

    result = SimilarityMatrix(output_simmatrix_fn)
    nr_pairs = len(result.pairs)
    result.close()
    assert nr_pairs == 93
Beispiel #4
0
def test_simmatrix_import_run_ignore_upper_triangle():
    output_fn = tmpname()

    tsv = '''frag_id1	frag_id2	score
2mlm_2W7_frag1	2mlm_2W7_frag1	1.0000000000000000
2mlm_2W7_frag2	2mlm_2W7_frag2	1.0000000000000000
2mlm_2W7_frag1	2mlm_2W7_frag2	0.5877164873731594
2mlm_2W7_frag2	3wvm_STE_frag1	0.4633096818493935
2mlm_2W7_frag2	2mlm_2W7_frag1	0.5877164873731594
3wvm_STE_frag1	2mlm_2W7_frag2	0.4633096818493935
'''
    inputfile = StringIO(tsv)

    try:
        script.simmatrix_import_run(inputfile=inputfile,
                                    inputformat='tsv',
                                    simmatrixfn=output_fn,
                                    fragmentsdb='data/fragments.sqlite',
                                    nrrows=2,
                                    ignore_upper_triangle=True)

        simmatrix = SimilarityMatrix(output_fn)
        result = [r for r in simmatrix]
        simmatrix.close()
        print(result)
        expected = [('2mlm_2W7_frag1', '2mlm_2W7_frag2xx', 0.5877), ('2mlm_2W7_frag2', '3wvm_STE_frag1', 0.4633)]
        assert_array_almost_equal([r[2] for r in result], [r[2] for r in expected], 3)
        assert [(r[0], r[1],) for r in result] == [(r[0], r[1],) for r in result]
    finally:
        if os.path.exists(output_fn):
            os.remove(output_fn)
def test_simmatrix_importfpneigh_run_ignore_upper_triangle():
    output_fn = tmpname()

    tsv = '''Compounds similar to 2mlm_2W7_frag1:
2mlm_2W7_frag1   1.0000
2mlm_2W7_frag2   0.5877
Compounds similar to 2mlm_2W7_frag2:
2mlm_2W7_frag2   1.0000
2mlm_2W7_frag1   0.5877
3wvm_STE_frag1   0.4633
'''
    inputfile = StringIO(tsv)

    try:
        script.simmatrix_importfpneigh_run(inputfile=inputfile,
                                           simmatrixfn=output_fn,
                                           fragmentsdb='data/fragments.sqlite',
                                           nrrows=3,
                                           ignore_upper_triangle=True)

        simmatrix = SimilarityMatrix(output_fn)
        rows = [r for r in simmatrix]
        simmatrix.close()
        expected = [(u'2mlm_2W7_frag1', u'2mlm_2W7_frag2', 0.5877), (u'2mlm_2W7_frag2', u'3wvm_STE_frag1', 0.4633)]
        assert rows == expected
    finally:
        os.remove(output_fn)
Beispiel #6
0
def test_simmatrix_filter_keep(keep_fragments_db, output_simmatrix_fn):
    script.simmatrix_filter('data/similarities.h5', output_simmatrix_fn, keep_fragments_db, None)

    result = SimilarityMatrix(output_simmatrix_fn)
    nr_pairs = len(result.pairs)
    result.close()
    assert nr_pairs == 93
Beispiel #7
0
def test_simmatrix_importfpneigh_run_ignore_upper_triangle():
    output_fn = tmpname()

    tsv = '''Compounds similar to 2mlm_2W7_frag1:
2mlm_2W7_frag1   1.0000
2mlm_2W7_frag2   0.5877
Compounds similar to 2mlm_2W7_frag2:
2mlm_2W7_frag2   1.0000
2mlm_2W7_frag1   0.5877
3wvm_STE_frag1   0.4633
'''
    inputfile = StringIO(tsv)

    try:
        script.simmatrix_importfpneigh_run(inputfile=inputfile,
                                           simmatrixfn=output_fn,
                                           fragmentsdb='data/fragments.sqlite',
                                           nrrows=3,
                                           ignore_upper_triangle=True)

        simmatrix = SimilarityMatrix(output_fn)
        rows = [r for r in simmatrix]
        simmatrix.close()
        expected = [(u'2mlm_2W7_frag1', u'2mlm_2W7_frag2', 0.5877), (u'2mlm_2W7_frag2', u'3wvm_STE_frag1', 0.4633)]
        assert rows == expected
    finally:
        os.remove(output_fn)
Beispiel #8
0
class TestWebservice(object):
    def setUp(self):
        self.matrix = SimilarityMatrix('data/similarities.h5')
        self.app = server.wsgi_app(self.matrix)

    def tearDown(self):
        self.matrix.close()

    def test_get_similar_fragments(self):
        fragment_id = '3j7u_NDP_frag24'
        cutoff = 0.85

        with self.app.app.test_request_context():
            result = server.get_similar_fragments(fragment_id, cutoff, 1000)
            expected = [
                {'query_frag_id': '3j7u_NDP_frag24', 'hit_frag_id': '3j7u_NDP_frag23', 'score': 0.8991},
            ]
            eq_(result, expected)

    def test_get_version(self):
        result = server.get_version()

        expected = {'version': __version__}
        eq_(result, expected)

    def test_wsgi_app(self):
        eq_(self.app.app.config['matrix'], self.matrix)
def test_simmatrix_filter_skip(output_simmatrix_fn):
    labels2skip = StringIO('2mm3_CHO_frag1\n')

    script.simmatrix_filter('data/similarities.h5', output_simmatrix_fn, None, labels2skip)

    result = SimilarityMatrix(output_simmatrix_fn)
    nr_pairs = len(result.pairs)
    result.close()
    assert nr_pairs == 11857
Beispiel #10
0
def test_simmatrix_filter_skip(output_simmatrix_fn):
    labels2skip = StringIO('2mm3_CHO_frag1\n')

    script.simmatrix_filter('data/similarities.h5', output_simmatrix_fn, None, labels2skip)

    result = SimilarityMatrix(output_simmatrix_fn)
    nr_pairs = len(result.pairs)
    result.close()
    assert nr_pairs == 11857
Beispiel #11
0
def test_merge():
    infiles = [tmpname(), tmpname(), tmpname()]

    outfile = tmpname()
    try:
        # fill infiles
        inmatrix1 = SimilarityMatrix(infiles[0], 'w', 1, 2**16-1, 2)
        inmatrix1.update([('a', 'b', 0.2)], {'a': 1, 'b': 2, 'c': 3})
        inmatrix1.close()

        # matrix with same labels -> copy pairs table by dump/append, ignores labels tables
        inmatrix2 = SimilarityMatrix(infiles[1], 'w', 2, 2**16-1, 3)
        inmatrix2.update([('a', 'c', 0.6)], {'a': 1, 'b': 2, 'c': 3})
        inmatrix2.close()

        # matrix generated with different labels -> copy pairs table by iterate/update, adds missing labels
        inmatrix3 = SimilarityMatrix(infiles[2], 'w', 2, 2**16-1, 3)
        inmatrix3.update([('b', 'e', 0.4), ('e', 'f', 0.8)], {'b': 1, 'e': 2, 'f': 3})
        inmatrix3.close()

        pairs.merge(infiles, outfile)

        # compare it
        outmatrix = SimilarityMatrix(outfile)
        result = list(outmatrix)
        outmatrix.close()
        expected = [('a', 'b', 0.2), ('a', 'c', 0.6), ('b', 'e', 0.4), ('e', 'f', 0.8)]
        assert result == expected
    finally:
        for infile in infiles:
            if os.path.isfile(infile):
                os.remove(infile)
        if os.path.isfile(outfile):
            os.remove(outfile)
Beispiel #12
0
class SimilarityMatrixInMemory(object):
    def __init__(self):
        self.matrix_fn = tmpname()
        self.matrix = SimilarityMatrix(self.matrix_fn, 'a', driver='H5FD_CORE', driver_core_backing_store=0)

    def __enter__(self):
        return self.matrix

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def close(self):
        self.matrix.close()
        if os.path.isfile(self.matrix_fn):
            os.remove(self.matrix_fn)
Beispiel #13
0
class SimilarityMatrixInMemory(object):
    def __init__(self):
        self.matrix_fn = tmpname()
        self.matrix = SimilarityMatrix(self.matrix_fn,
                                       'a',
                                       driver='H5FD_CORE',
                                       driver_core_backing_store=0)

    def __enter__(self):
        return self.matrix

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def close(self):
        self.matrix.close()
        if os.path.isfile(self.matrix_fn):
            os.remove(self.matrix_fn)
Beispiel #14
0
def test_merge():
    infiles = [tmpname(), tmpname(), tmpname()]

    outfile = tmpname()
    try:
        # fill infiles
        inmatrix1 = SimilarityMatrix(infiles[0], 'w', 1, 2**16 - 1, 2)
        inmatrix1.update([('a', 'b', 0.2)], {'a': 1, 'b': 2, 'c': 3})
        inmatrix1.close()

        # matrix with same labels -> copy pairs table by dump/append, ignores labels tables
        inmatrix2 = SimilarityMatrix(infiles[1], 'w', 2, 2**16 - 1, 3)
        inmatrix2.update([('a', 'c', 0.6)], {'a': 1, 'b': 2, 'c': 3})
        inmatrix2.close()

        # matrix generated with different labels -> copy pairs table by iterate/update, adds missing labels
        inmatrix3 = SimilarityMatrix(infiles[2], 'w', 2, 2**16 - 1, 3)
        inmatrix3.update([('b', 'e', 0.4), ('e', 'f', 0.8)], {
            'b': 1,
            'e': 2,
            'f': 3
        })
        inmatrix3.close()

        pairs.merge(infiles, outfile)

        # compare it
        outmatrix = SimilarityMatrix(outfile)
        result = list(outmatrix)
        outmatrix.close()
        expected = [('a', 'b', 0.2), ('a', 'c', 0.6), ('b', 'e', 0.4),
                    ('e', 'f', 0.8)]
        assert result == expected
    finally:
        for infile in infiles:
            if os.path.isfile(infile):
                os.remove(infile)
        if os.path.isfile(outfile):
            os.remove(outfile)
Beispiel #15
0
class TestFrozenSimilarityMatrix(object):
    pair_matrix_fn = None
    pair_matrix = None
    matrix_fn = None
    matrix = None

    def setUp(self):
        self.pair_matrix_fn = tmpname()
        self.pair_matrix = SimilarityMatrix(self.pair_matrix_fn, "a", driver="H5FD_CORE", driver_core_backing_store=0)
        labels = {"a": 0, "b": 1, "c": 2, "d": 3}
        similarities = [("a", "b", 0.9), ("a", "c", 0.5), ("b", "c", 0.6), ("d", "c", 0.7)]
        self.pair_matrix.update(similarities, labels)
        self.matrix_fn = tmpname()
        self.matrix = FrozenSimilarityMatrix(self.matrix_fn, "a", driver="H5FD_CORE", driver_core_backing_store=0)

    def tearDown(self):
        self.pair_matrix.close()
        if os.path.isfile(self.pair_matrix_fn):
            os.remove(self.pair_matrix_fn)
        self.matrix.close()
        if os.path.isfile(self.matrix_fn):
            os.remove(self.matrix_fn)

    def test_from_pairs_defaults(self):
        self.matrix.from_pairs(self.pair_matrix, 10)

        result = self.matrix.to_pandas()
        labels = ["a", "b", "c", "d"]
        expected = pd.DataFrame(
            [[0.0, 0.9, 0.5, 0.0], [0.9, 0.0, 0.6, 0.0], [0.5, 0.6, 0.0, 0.7], [0.0, 0.0, 0.7, 0.0]],
            index=labels,
            columns=labels,
        )
        pdt.assert_almost_equal(result, expected)

    def test_from_pairs_multiframe(self):
        self.matrix.from_pairs(self.pair_matrix, 1, None, False)

        result = self.matrix.to_pandas()
        labels = ["a", "b", "c", "d"]
        expected = pd.DataFrame(
            [[0.0, 0.9, 0.5, 0.0], [0.9, 0.0, 0.6, 0.0], [0.5, 0.6, 0.0, 0.7], [0.0, 0.0, 0.7, 0.0]],
            index=labels,
            columns=labels,
        )
        pdt.assert_almost_equal(result, expected)

    def test_from_pairs_limited(self):
        self.matrix.from_pairs(self.pair_matrix, 1, 1, False)

        result = self.matrix.to_pandas()
        labels = ["a", "b", "c", "d"]
        expected = pd.DataFrame(
            [[0.0, 0.9, 0.0, 0.0], [0.9, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
            index=labels,
            columns=labels,
        )
        pdt.assert_almost_equal(result, expected)

    def test_from_pairs_singlesided(self):
        self.matrix.from_pairs(self.pair_matrix, 10, None, True)

        result = self.matrix.to_pandas()
        print(result)
        labels = ["a", "b", "c", "d"]
        expected = pd.DataFrame(
            [[0.0, 0.9, 0.5, 0.0], [0.0, 0.0, 0.6, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.7, 0.0]],
            index=labels,
            columns=labels,
        )
        pdt.assert_almost_equal(result, expected)

    def test_find_defaults(self):
        self.matrix.from_pairs(self.pair_matrix, 10)

        hits = self.matrix.find("c", 0.55)
        expected = [("d", 0.7), ("b", 0.6)]
        eq_(hits, expected)

    def test_find_limit(self):
        self.matrix.from_pairs(self.pair_matrix, 10)

        hits = self.matrix.find("c", 0.55, 1)
        expected = [("d", 0.7)]
        eq_(hits, expected)

    def test_find_cutoffhigh_nohits(self):
        self.matrix.from_pairs(self.pair_matrix, 10)

        hits = self.matrix.find("c", 0.9)
        expected = []
        eq_(hits, expected)

    def test_find_badkey_keyerror(self):
        self.matrix.from_pairs(self.pair_matrix, 10)

        with assert_raises(KeyError):
            self.matrix.find("f", 0.45)

    def test_find_singlesided(self):
        self.matrix.from_pairs(self.pair_matrix, 10, None, True)
        print(self.matrix.scores.read())
        hits = self.matrix.find("c", 0.0)
        expected = []
        eq_(hits, expected)
Beispiel #16
0
def matrix():
    sim_matrix = SimilarityMatrix('data/similarities.h5')
    yield sim_matrix
    sim_matrix.close();