Python isubsample 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: skbio.stats

메소드/함수: isubsample

hotexamples.com에서의 예제들: 12

Python isubsample - 12개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 skbio.stats.isubsample에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def test_per_sample_sequences_complex(self):
        maximum = 2

        def bin_f(x):
            return x['SequenceID'].rsplit('_', 1)[0]

        exp = sorted([('a', {
            'SequenceID': 'a_2',
            'Sequence': 'AATTGGCC-a2'
        }), ('a', {
            'SequenceID': 'a_3',
            'Sequence': 'AATTGGCC-a3'
        }), ('b', {
            'SequenceID': 'b_2',
            'Sequence': 'AATTGGCC-b2'
        }), ('b', {
            'SequenceID': 'b_1',
            'Sequence': 'AATTGGCC-b1'
        }), ('c', {
            'SequenceID': 'c_1',
            'Sequence': 'AATTGGCC-c1'
        }), ('c', {
            'SequenceID': 'c_2',
            'Sequence': 'AATTGGCC-c2'
        })],
                     key=lambda x: x[0])
        obs = isubsample(self.mock_sequence_iter(self.sequences),
                         maximum,
                         bin_f=bin_f,
                         buf_size=1)
        self.assertEqual(sorted(obs, key=lambda x: x[0]), exp)

예제 #2

파일 보기

파일: mod2_pcoa.py 프로젝트: jnpaulson/American-Gut

def subsample_dm(distmat, mapping_file, max, category, output):
    """Subsample the distmat to max samples per category value"""
    mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str),
                     index_col='#SampleID')
    id_to_cat = dict(mf[category])

    def bin_f(x):
        return id_to_cat[x]

    dm = read(distmat, into=DistanceMatrix)
    dm = dm.filter([id for _, id in isubsample(dm.ids, max, bin_f=bin_f)])
    dm.to_file(output)

예제 #3

파일 보기

파일: test_subsample.py 프로젝트: Kleptobismol/scikit-bio

 def test_per_sample_sequences_complex(self):
     maximum = 2
     bin_f = lambda x: x['SequenceID'].rsplit('_', 1)[0]
     exp = sorted([('a', {'SequenceID': 'a_2', 'Sequence': 'AATTGGCC-a2'}),
                   ('a', {'SequenceID': 'a_3', 'Sequence': 'AATTGGCC-a3'}),
                   ('b', {'SequenceID': 'b_2', 'Sequence': 'AATTGGCC-b2'}),
                   ('b', {'SequenceID': 'b_1', 'Sequence': 'AATTGGCC-b1'}),
                   ('c', {'SequenceID': 'c_1', 'Sequence': 'AATTGGCC-c1'}),
                   ('c', {'SequenceID': 'c_2', 'Sequence': 'AATTGGCC-c2'})],
                  key=lambda x: x[0])
     obs = isubsample(self.mock_sequence_iter(self.sequences), maximum,
                      bin_f=bin_f, buf_size=1)
     self.assertEqual(sorted(obs, key=lambda x: x[0]), exp)

예제 #4

파일 보기

def subsample_dm(distmat, mapping_file, max, category, output):
    """Subsample the distmat to max samples per category value"""
    mf = pd.read_csv(mapping_file,
                     '\t',
                     converters=defaultdict(str),
                     dtype=str)
    mf.set_index('#SampleID', inplace=True)

    id_to_cat = dict(mf[category])

    def bin_f(x):
        return id_to_cat.get(x)

    dm = read(distmat, into=DistanceMatrix)
    dm = dm.filter([id for _, id in isubsample(dm.ids, max, bin_f=bin_f)])
    dm.to_file(output)

예제 #5

파일 보기

    def test_isubsample_simple(self):
        maximum = 10

        def bin_f(x):
            return x['SequenceID'].rsplit('_', 1)[0]

        # note, the result here is sorted by sequence_id but is in heap order
        # by the random values associated to each sequence
        exp = sorted([('a', {
            'SequenceID': 'a_5',
            'Sequence': 'AATTGGCC-a5'
        }), ('a', {
            'SequenceID': 'a_1',
            'Sequence': 'AATTGGCC-a1'
        }), ('a', {
            'SequenceID': 'a_4',
            'Sequence': 'AATTGGCC-a4'
        }), ('a', {
            'SequenceID': 'a_3',
            'Sequence': 'AATTGGCC-a3'
        }), ('a', {
            'SequenceID': 'a_2',
            'Sequence': 'AATTGGCC-a2'
        }), ('b', {
            'SequenceID': 'b_2',
            'Sequence': 'AATTGGCC-b2'
        }), ('b', {
            'SequenceID': 'b_1',
            'Sequence': 'AATTGGCC-b1'
        }), ('c', {
            'SequenceID': 'c_3',
            'Sequence': 'AATTGGCC-c3'
        }), ('c', {
            'SequenceID': 'c_2',
            'Sequence': 'AATTGGCC-c2'
        }), ('c', {
            'SequenceID': 'c_1',
            'Sequence': 'AATTGGCC-c1'
        })],
                     key=lambda x: x[0])
        obs = isubsample(self.mock_sequence_iter(self.sequences),
                         maximum,
                         bin_f=bin_f)
        self.assertEqual(sorted(obs, key=lambda x: x[0]), exp)

예제 #6

파일 보기

파일: test_subsample.py 프로젝트: Kleptobismol/scikit-bio

    def test_per_sample_sequences_min_seqs(self):
        maximum = 10
        minimum = 3
        bin_f = lambda x: x['SequenceID'].rsplit('_', 1)[0]

        # note, the result here is sorted by sequence_id but is in heap order
        # by the random values associated to each sequence
        exp = sorted([('a', {'SequenceID': 'a_5', 'Sequence': 'AATTGGCC-a5'}),
                      ('a', {'SequenceID': 'a_1', 'Sequence': 'AATTGGCC-a1'}),
                      ('a', {'SequenceID': 'a_4', 'Sequence': 'AATTGGCC-a4'}),
                      ('a', {'SequenceID': 'a_3', 'Sequence': 'AATTGGCC-a3'}),
                      ('a', {'SequenceID': 'a_2', 'Sequence': 'AATTGGCC-a2'}),
                      ('c', {'SequenceID': 'c_3', 'Sequence': 'AATTGGCC-c3'}),
                      ('c', {'SequenceID': 'c_2', 'Sequence': 'AATTGGCC-c2'}),
                      ('c', {'SequenceID': 'c_1', 'Sequence': 'AATTGGCC-c1'})],
                     key=lambda x: x[0])
        obs = isubsample(self.mock_sequence_iter(self.sequences), maximum,
                         minimum, bin_f=bin_f)
        self.assertEqual(sorted(obs, key=lambda x: x[0]), exp)

예제 #7

파일 보기

파일: test_subsample.py 프로젝트: anderspitman/scikit-bio

 def test_binf_is_none(self):
     maximum = 2
     items = [1, 2]
     exp = [(True, 1), (True, 2)]
     obs = isubsample(items, maximum)
     self.assertEqual(list(obs), exp)

예제 #8

파일 보기

파일: test_subsample.py 프로젝트: anderspitman/scikit-bio

 def test_max_lt_zero(self):
     gen = isubsample([1, 2, 3], maximum=-10)
     with self.assertRaises(ValueError):
         next(gen)

예제 #9

파일 보기

파일: test_subsample.py 프로젝트: anderspitman/scikit-bio

 def test_min_gt_max(self):
     gen = isubsample([1, 2, 3], maximum=2, minimum=10)
     with self.assertRaises(ValueError):
         next(gen)

예제 #10

파일 보기

 def test_binf_is_none(self):
     maximum = 2
     items = [1, 2]
     exp = [(True, 1), (True, 2)]
     obs = isubsample(items, maximum)
     self.assertEqual(list(obs), exp)

예제 #11

파일 보기

 def test_max_lt_zero(self):
     gen = isubsample([1, 2, 3], maximum=-10)
     with self.assertRaises(ValueError):
         next(gen)

예제 #12

파일 보기

 def test_min_gt_max(self):
     gen = isubsample([1, 2, 3], maximum=2, minimum=10)
     with self.assertRaises(ValueError):
         next(gen)