コード例 #1
0
    def test_scale(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.scale(handle_left, handle_right, out_left, out_right)

        if sum(counts_left.values()) < sum(counts_right.values()):
            scale_left = sum(counts_right.values()) / sum(counts_left.values())
            scale_right = 1.0
        else:
            scale_left = 1.0
            scale_right = sum(counts_left.values()) / sum(counts_right.values())

        for s in counts_left:
            counts_left[s] *= scale_left
        for s in counts_right:
            counts_right[s] *= scale_right

        utils.test_profile_file(filename_left, counts_left, 8)
        utils.test_profile_file(filename_right, counts_right, 8)
コード例 #2
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_scale(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.scale(handle_left, handle_right, out_left, out_right)

        if sum(counts_left.values()) < sum(counts_right.values()):
            scale_left = sum(counts_right.values()) / sum(counts_left.values())
            scale_right = 1.0
        else:
            scale_left = 1.0
            scale_right = sum(counts_left.values()) / sum(counts_right.values())

        for s in counts_left:
            counts_left[s] *= scale_left
        for s in counts_right:
            counts_right[s] *= scale_right

        utils.test_profile_file(filename_left, counts_left, 8)
        utils.test_profile_file(filename_right, counts_right, 8)
コード例 #3
0
    def namedEntitySimilarityFeatureExtractor(self, originalDocuments,
                                              machineSummary, humanSummaries):
        peer = utils.removeStopwords(
            utils.getNamedEntities(self.nerTagger, machineSummary))
        modelsOD = [
            utils.removeStopwords(
                utils.getNamedEntities(self.nerTagger, document))
            for document in originalDocuments
        ]
        modelsHS = [
            utils.removeStopwords(
                utils.getNamedEntities(self.nerTagger, document))
            for document in humanSummaries
        ]

        peerCount = utils.counts(peer)
        modelsODCount = [utils.counts(model) for model in modelsOD]
        modelsHSCount = [utils.counts(model) for model in modelsHS]

        jsAvgOD = [
            js.JS_Divergence(peerCount, model) for model in modelsODCount
        ]
        jsAvgHS = [
            js.JS_Divergence(peerCount, model) for model in modelsHSCount
        ]

        resultOD = sum(jsAvgOD) / float(len(modelsODCount))
        resultHS = sum(jsAvgHS) / float(len(modelsHSCount))

        if resultOD > 1:
            resultOD = 1
        if resultHS > 1:
            resultHS = 1

        return [resultOD, resultHS]
コード例 #4
0
ファイル: test_kdistlib.py プロジェクト: LUMC/kPAL
    def test_ProfileDistance_distance_k8(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profile_a = klib.Profile(utils.as_array(counts_a, 8))
        profile_b = klib.Profile(utils.as_array(counts_b, 8))

        k_dist = kdistlib.ProfileDistance()
        np.testing.assert_almost_equal(k_dist.distance(profile_a, profile_b), 0.4626209322)
コード例 #5
0
ファイル: test_klib.py プロジェクト: MatthewRalston/kPAL
    def test_profile_merge(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profile_left = klib.Profile(utils.as_array(counts_left, 8))
        profile_right = klib.Profile(utils.as_array(counts_right, 8))

        profile_left.merge(profile_right)
        utils.test_profile(profile_left, counts_left + counts_right, 8)
コード例 #6
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
    def test_profile_merge(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profile_left = klib.Profile(utils.as_array(counts_left, 8))
        profile_right = klib.Profile(utils.as_array(counts_right, 8))

        profile_left.merge(profile_right)
        utils.test_profile(profile_left, counts_left + counts_right, 8)
コード例 #7
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance_smooth(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, do_smooth=True, precision=3)

        assert out.getvalue() == 'left right 0.077\n'
コード例 #8
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out)

        assert out.getvalue() == 'left right %.10f\n' % 0.4626209323
コード例 #9
0
 def test_count_multi(self):
     counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
     counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES_LEFT)) as handle_left:
         with open(self.fasta(utils.SEQUENCES_RIGHT)) as handle_right:
             with utils.open_profile(filename, 'w') as profile_handle:
                 kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b'])
     utils.test_profile_file(filename, counts_left, 8, name='a')
     utils.test_profile_file(filename, counts_right, 8, name='b')
コード例 #10
0
    def test_distance(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out)

        assert out.getvalue() == 'left right %.10f\n' % 0.4626209323
コード例 #11
0
    def test_merge(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle)
        utils.test_profile_file(filename, counts_left + counts_right, 8)
コード例 #12
0
    def test_distance_smooth(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, do_smooth=True, precision=3)

        assert out.getvalue() == 'left right 0.077\n'
コード例 #13
0
ファイル: test_kdistlib.py プロジェクト: yimsea/kPAL
    def test_ProfileDistance_distance_k8(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profile_a = klib.Profile(utils.as_array(counts_a, 8))
        profile_b = klib.Profile(utils.as_array(counts_b, 8))

        k_dist = kdistlib.ProfileDistance()
        np.testing.assert_almost_equal(k_dist.distance(profile_a, profile_b),
                                       0.4626209322)
コード例 #14
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_merge(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle)
        utils.test_profile_file(filename, counts_left + counts_right, 8)
コード例 #15
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance_pairwise_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, precision=3,
                              custom_pairwise='numpy.multiply')

        assert out.getvalue() == 'left right 0.084\n'
コード例 #16
0
    def test_distance_smooth_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, do_smooth=True,
                              precision=3, custom_summary='np.max(values)')

        assert out.getvalue() == 'left right 0.474\n'
コード例 #17
0
    def test_distance_pairwise_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, precision=3,
                              custom_pairwise='abs(left - right) / (left + right + 1000)')

        assert out.getvalue() == 'left right 0.001\n'
コード例 #18
0
    def test_distance_pairwise_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, precision=3,
                              custom_pairwise='numpy.multiply')

        assert out.getvalue() == 'left right 0.084\n'
コード例 #19
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance_smooth_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, do_smooth=True,
                              precision=3, custom_summary='np.max(values)')

        assert out.getvalue() == 'left right 0.474\n'
コード例 #20
0
    def test_cat_prefixes(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_a, 8, name='X')) as handle_a:
            with utils.open_profile(self.profile(counts_b, 8, name='X')) as handle_b:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.cat([handle_a, handle_b], profile_handle, prefixes=['a_', 'b_'])
        utils.test_profile_file(filename, counts_a, 8, name='a_X')
        utils.test_profile_file(filename, counts_b, 8, name='b_X')
コード例 #21
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance_pairwise_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts_left, 8, 'left')) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8, 'right')) as handle_right:
                kmer.distance(handle_left, handle_right, out, precision=3,
                              custom_pairwise='abs(left - right) / (left + right + 1000)')

        assert out.getvalue() == 'left right 0.001\n'
コード例 #22
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_cat_prefixes(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_a, 8, name='X')) as handle_a:
            with utils.open_profile(self.profile(counts_b, 8, name='X')) as handle_b:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.cat([handle_a, handle_b], profile_handle, prefixes=['a_', 'b_'])
        utils.test_profile_file(filename, counts_a, 8, name='a_X')
        utils.test_profile_file(filename, counts_b, 8, name='b_X')
コード例 #23
0
ファイル: test_kdistlib.py プロジェクト: LUMC/kPAL
    def test_distance_matrix_two(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profiles = [klib.Profile(utils.as_array(counts_left, 8), 'a'),
                    klib.Profile(utils.as_array(counts_right, 8), 'b')]

        k_dist = kdistlib.ProfileDistance()
        out = StringIO()
        kdistlib.distance_matrix(profiles, out, 2, k_dist)

        assert out.getvalue().strip().split('\n') == ['2', 'a', 'b', '0.46']
コード例 #24
0
ファイル: test_kdistlib.py プロジェクト: yimsea/kPAL
    def test_ProfileDistance_distance_unmodified(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profile_a = klib.Profile(utils.as_array(counts_a, 8))
        profile_b = klib.Profile(utils.as_array(counts_b, 8))

        k_dist = kdistlib.ProfileDistance(do_balance=True)
        k_dist.distance(profile_a, profile_b)

        utils.test_profile(profile_a, counts_a, 8)
        utils.test_profile(profile_b, counts_b, 8)
コード例 #25
0
ファイル: test_kdistlib.py プロジェクト: LUMC/kPAL
    def test_ProfileDistance_distance_unmodified(self):
        counts_a = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profile_a = klib.Profile(utils.as_array(counts_a, 8))
        profile_b = klib.Profile(utils.as_array(counts_b, 8))

        k_dist = kdistlib.ProfileDistance(do_balance=True)
        k_dist.distance(profile_a, profile_b)

        utils.test_profile(profile_a, counts_a, 8)
        utils.test_profile(profile_b, counts_b, 8)
コード例 #26
0
    def test_distance_matrix_smooth(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.multi_profile(8,
                                                   [counts_left,
                                                    counts_right,
                                                    counts_left],
                                                   ['a', 'b', 'c'])) as handle:
                    kmer.distance_matrix(handle, out, do_smooth=True, precision=3)

        assert out.getvalue().strip().split('\n') == ['3', 'a', 'b', 'c', '0.077', '0.000 0.077']
コード例 #27
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance_matrix_smooth(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.multi_profile(8,
                                                   [counts_left,
                                                    counts_right,
                                                    counts_left],
                                                   ['a', 'b', 'c'])) as handle:
                    kmer.distance_matrix(handle, out, do_smooth=True, precision=3)

        assert out.getvalue().strip().split('\n') == ['3', 'a', 'b', 'c', '0.077', '0.000 0.077']
コード例 #28
0
    def test_distance_matrix_pairwise_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.multi_profile(8,
                                                   [counts_left,
                                                    counts_right,
                                                    counts_left],
                                                   ['a', 'b', 'c'])) as handle:
                    kmer.distance_matrix(handle, out, precision=3,
                                         custom_pairwise='abs(left - right) / (left + right + 1000)')

        assert out.getvalue().strip().split('\n') == ['3', 'a', 'b', 'c', '0.001', '0.000 0.001']
コード例 #29
0
 def test_count_multi_by_record(self):
     counts_by_record_left = [utils.counts(record, 8) for record in utils.SEQUENCES_LEFT]
     counts_by_record_right = [utils.counts(record, 8) for record in utils.SEQUENCES_RIGHT]
     names_left = [str(i) for i, _ in enumerate(counts_by_record_left)]
     names_right = [str(i) for i, _ in enumerate(counts_by_record_right)]
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES_LEFT, names=names_left)) as handle_left:
         with open(self.fasta(utils.SEQUENCES_RIGHT, names=names_right)) as handle_right:
             with utils.open_profile(filename, 'w') as profile_handle:
                 kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b'], by_record=True)
     for name, counts in zip(names_left, counts_by_record_left):
         utils.test_profile_file(filename, counts, 8, name='a_' + name)
     for name, counts in zip(names_right, counts_by_record_right):
         utils.test_profile_file(filename, counts, 8, name='b_' + name)
コード例 #30
0
    def test_merge_custom_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='numpy.multiply')

        counts_mult = Counter(dict((s, counts_left[s] * counts_right[s])
                                   for s in set(counts_left) & set(counts_right)))

        utils.test_profile_file(filename, counts_mult, 8)
コード例 #31
0
    def test_distance_matrix_pairwise_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.multi_profile(8,
                                                   [counts_left,
                                                    counts_right,
                                                    counts_left],
                                                   ['a', 'b', 'c'])) as handle:
                    kmer.distance_matrix(handle, out, precision=3,
                                         custom_pairwise='numpy.multiply')

        assert out.getvalue().strip().split('\n') == ['3', 'a', 'b', 'c', '0.084', '1.206 0.084']
コード例 #32
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance_matrix_pairwise_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.multi_profile(8,
                                                   [counts_left,
                                                    counts_right,
                                                    counts_left],
                                                   ['a', 'b', 'c'])) as handle:
                    kmer.distance_matrix(handle, out, precision=3,
                                         custom_pairwise='numpy.multiply')

        assert out.getvalue().strip().split('\n') == ['3', 'a', 'b', 'c', '0.084', '1.206 0.084']
コード例 #33
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_merge_custom_name(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='numpy.multiply')

        counts_mult = Counter(dict((s, counts_left[s] * counts_right[s])
                                   for s in set(counts_left) & set(counts_right)))

        utils.test_profile_file(filename, counts_mult, 8)
コード例 #34
0
ファイル: test_kdistlib.py プロジェクト: yimsea/kPAL
    def test_distance_matrix_two(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)

        profiles = [
            klib.Profile(utils.as_array(counts_left, 8), 'a'),
            klib.Profile(utils.as_array(counts_right, 8), 'b')
        ]

        k_dist = kdistlib.ProfileDistance()
        out = StringIO()
        kdistlib.distance_matrix(profiles, out, 2, k_dist)

        assert out.getvalue().strip().split('\n') == ['2', 'a', 'b', '0.46']
コード例 #35
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_distance_matrix_pairwise_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        out = StringIO()

        with utils.open_profile(self.multi_profile(8,
                                                   [counts_left,
                                                    counts_right,
                                                    counts_left],
                                                   ['a', 'b', 'c'])) as handle:
                    kmer.distance_matrix(handle, out, precision=3,
                                         custom_pairwise='abs(left - right) / (left + right + 1000)')

        assert out.getvalue().strip().split('\n') == ['3', 'a', 'b', 'c', '0.001', '0.000 0.001']
コード例 #36
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_merge_custom_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='(left + right) * np.logical_xor(left, right)')

        counts_xor = counts_left + counts_right
        for s in set(counts_left) & set(counts_right):
            del counts_xor[s]

        utils.test_profile_file(filename, counts_xor, 8)
コード例 #37
0
    def test_merge_custom_expr(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename, 'w') as profile_handle:
                    kmer.merge(handle_left, handle_right, profile_handle, custom_merger='(left + right) * np.logical_xor(left, right)')

        counts_xor = counts_left + counts_right
        for s in set(counts_left) & set(counts_right):
            del counts_xor[s]

        utils.test_profile_file(filename, counts_xor, 8)
コード例 #38
0
 def test_count(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES)) as fasta_handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.count([fasta_handle], profile_handle, 8)
     utils.test_profile_file(filename, counts, 8)
コード例 #39
0
ファイル: test_klib.py プロジェクト: MatthewRalston/kPAL
    def _test_profile_split(self, sequences, length):
        counts = utils.counts(sequences, length)
        profile = klib.Profile(utils.as_array(counts, length))
        left, right = profile.split()

        assert len(left) == len(right)
        assert sum(left) + sum(right) == sum(counts.values()) * 2

        indices_left = {}
        indices_right = {}
        indices_palindrome = {}

        for s, c in counts.items():
            r = utils.reverse_complement(s)
            if s < r:
                indices_left[utils.count_index(s)] = c * 2
            elif s > r:
                indices_right[utils.count_index(r)] = counts[s] * 2
            else:
                indices_palindrome[utils.count_index(s)] = c

        assert ([c for c in left if c > 0] == [
            c for i, c in sorted(
                list(indices_left.items()) + list(indices_palindrome.items()))
        ])
        assert ([c for c in right if c > 0] == [
            c for i, c in sorted(
                list(indices_right.items()) + list(indices_palindrome.items()))
        ])
コード例 #40
0
    def test_main_info(self, capsys):
        # For the `capsys` fixture, see:
        # http://pytest.org/latest/capture.html

        counts = utils.counts(utils.SEQUENCES, 8)
        filename = self.profile(counts, 8, 'a')

        kmer.main(['info', filename])

        out, err = capsys.readouterr()

        expected = 'File format version: 1.0.0\n'
        expected += 'Produced by: kMer unit tests\n\n'
        expected += 'Profile: a\n'
        expected += '- k-mer length: 8 (%d k-mers)\n' % (4**8)
        expected += '- Zero counts: %i\n' % (4**8 - len(counts))
        expected += '- Non-zero counts: %i\n' % len(counts)
        expected += '- Sum of counts: %i\n' % sum(counts.values())
        expected += '- Mean of counts: %.3f\n' % np.mean([0] *
                                                         (4**8 - len(counts)) +
                                                         list(counts.values()))
        expected += '- Median of counts: %.3f\n' % np.median(
            [0] * (4**8 - len(counts)) + list(counts.values()))
        expected += '- Standard deviation of counts: %.3f\n' % np.std(
            [0] * (4**8 - len(counts)) + list(counts.values()))

        assert out == expected
コード例 #41
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_positive(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.positive(handle_left, handle_right, out_left, out_right)

        utils.test_profile_file(filename_left, Counter(s for s in counts_left.elements()
                                                             if s in counts_right), 8)
        utils.test_profile_file(filename_right, Counter(s for s in counts_right.elements()
                                                              if s in counts_left), 8)
コード例 #42
0
ファイル: test_klib.py プロジェクト: MatthewRalston/kPAL
    def test_profile_from_file(self):
        counts = utils.counts(utils.SEQUENCES, 4)
        with utils.open_profile(self.profile(counts, 4),
                                'r') as profile_handle:
            profile = klib.Profile.from_file(profile_handle)

        utils.test_profile(profile, counts, 4)
コード例 #43
0
 def test_convert(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.profile_old_format(counts, 8)) as handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.convert([handle], profile_handle)
     utils.test_profile_file(filename, counts, 8)
コード例 #44
0
    def test_positive(self):
        counts_left = utils.counts(utils.SEQUENCES_LEFT, 8)
        counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8)
        filename_left = self.empty()
        filename_right = self.empty()

        with utils.open_profile(self.profile(counts_left, 8)) as handle_left:
            with utils.open_profile(self.profile(counts_right, 8)) as handle_right:
                with utils.open_profile(filename_left, 'w') as out_left:
                    with utils.open_profile(filename_right, 'w') as out_right:
                        kmer.positive(handle_left, handle_right, out_left, out_right)

        utils.test_profile_file(filename_left, Counter(s for s in counts_left.elements()
                                                             if s in counts_right), 8)
        utils.test_profile_file(filename_right, Counter(s for s in counts_right.elements()
                                                              if s in counts_left), 8)
コード例 #45
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
 def test_count(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.fasta(utils.SEQUENCES)) as fasta_handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.count([fasta_handle], profile_handle, 8)
     utils.test_profile_file(filename, counts, 8)
コード例 #46
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
    def test_profile_reverse_complement(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        profile = klib.Profile(utils.as_array(counts, 8))

        for i in range(profile.length):
            assert (profile.binary_to_dna(profile.reverse_complement(i)) ==
                    utils.reverse_complement(profile.binary_to_dna(i)))
コード例 #47
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
    def _test_profile_split(self, sequences, length):
        counts = utils.counts(sequences, length)
        profile = klib.Profile(utils.as_array(counts, length))
        left, right = profile.split()

        assert len(left) == len(right)
        assert sum(left) + sum(right) == sum(counts.values()) * 2

        indices_left = {}
        indices_right = {}
        indices_palindrome = {}

        for s, c in counts.items():
            r = utils.reverse_complement(s)
            if s < r:
                indices_left[utils.count_index(s)] = c * 2
            elif s > r:
                indices_right[utils.count_index(r)] = counts[s] * 2
            else:
                indices_palindrome[utils.count_index(s)] = c

        assert ([c for c in left if c > 0] ==
                [c for i, c in sorted(list(indices_left.items()) +
                                      list(indices_palindrome.items()))])
        assert ([c for c in right if c > 0] ==
                [c for i, c in sorted(list(indices_right.items()) +
                                      list(indices_palindrome.items()))])
コード例 #48
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
 def test_convert(self):
     counts = utils.counts(utils.SEQUENCES, 8)
     filename = self.empty()
     with open(self.profile_old_format(counts, 8)) as handle:
         with utils.open_profile(filename, 'w') as profile_handle:
             kmer.convert([handle], profile_handle)
     utils.test_profile_file(filename, counts, 8)
コード例 #49
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
    def test_profile_reverse_complement_palindrome(self):
        counts = utils.counts(['ACCTAGGT'], 8)
        profile = klib.Profile(utils.as_array(counts, 8))

        for i in range(profile.length):
            assert (profile.binary_to_dna(profile.reverse_complement(i)) ==
                    utils.reverse_complement(profile.binary_to_dna(i)))
コード例 #50
0
ファイル: test_kmer.py プロジェクト: Biomarino/kPAL
    def test_get_balance(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts, 8)) as input_handle:
            kmer.get_balance(input_handle, out, precision=3)

        assert out.getvalue() == '1 0.669\n'
コード例 #51
0
ファイル: test_klib.py プロジェクト: MatthewRalston/kPAL
    def test_profile_print_counts(self, capsys):
        counts = utils.counts(utils.SEQUENCES, 4)
        profile = klib.Profile(utils.as_array(counts, 4))
        profile.print_counts()

        out, err = capsys.readouterr()
        assert out == ''.join('%s %d\n' % (''.join(s), counts[''.join(s)])
                              for s in itertools.product('ACGT', repeat=4))
コード例 #52
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
    def test_profile_balance(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        profile = klib.Profile(utils.as_array(counts, 8))
        profile.balance()

        counts.update(dict((utils.reverse_complement(s), c)
                           for s, c in counts.items()))
        utils.test_profile(profile, counts, 8)
コード例 #53
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
    def test_profile_balance_palindrome(self):
        counts = utils.counts(['AATT'], 4)
        profile = klib.Profile(utils.as_array(counts, 4))
        profile.balance()

        counts.update(dict((utils.reverse_complement(s), c)
                           for s, c in counts.items()))
        utils.test_profile(profile, counts, 4)
コード例 #54
0
ファイル: test_klib.py プロジェクト: MatthewRalston/kPAL
    def test_profile_reverse_complement_palindrome(self):
        counts = utils.counts(['ACCTAGGT'], 8)
        profile = klib.Profile(utils.as_array(counts, 8))

        for i in range(profile.length):
            assert (profile.binary_to_dna(
                profile.reverse_complement(i)) == utils.reverse_complement(
                    profile.binary_to_dna(i)))
コード例 #55
0
ファイル: test_klib.py プロジェクト: MatthewRalston/kPAL
    def test_profile_reverse_complement(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        profile = klib.Profile(utils.as_array(counts, 8))

        for i in range(profile.length):
            assert (profile.binary_to_dna(
                profile.reverse_complement(i)) == utils.reverse_complement(
                    profile.binary_to_dna(i)))
コード例 #56
0
    def test_get_balance(self):
        counts = utils.counts(utils.SEQUENCES, 8)
        out = StringIO()

        with utils.open_profile(self.profile(counts, 8)) as input_handle:
            kmer.get_balance(input_handle, out, precision=3)

        assert out.getvalue() == '1 0.669\n'
コード例 #57
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
    def test_profile_print_counts(self, capsys):
        counts = utils.counts(utils.SEQUENCES, 4)
        profile = klib.Profile(utils.as_array(counts, 4))
        profile.print_counts()

        out, err = capsys.readouterr()
        assert out == ''.join('%s %d\n' % (''.join(s), counts[''.join(s)])
                              for s in itertools.product('ACGT', repeat=4))
コード例 #58
0
ファイル: test_klib.py プロジェクト: LUMC/kPAL
 def _test_from_fasta_by_record(self, sequences, k, prefix=None):
     counts_by_record = [utils.counts(sequence, k) for sequence in sequences]
     names = [str(i) for i, _ in enumerate(counts_by_record)]
     with open(self.fasta(sequences, names=names)) as fasta_handle:
         profiles = klib.Profile.from_fasta_by_record(fasta_handle, k, prefix=prefix)
         for name, counts, profile in zip(names, counts_by_record, profiles):
             prefixed_name = prefix + '_' + name if prefix else name
             utils.test_profile(profile, counts, k, name=prefixed_name)