Example #1
0
    def test_genotype_temp_files_removed_in_server_side(self):
        # Prepare
        shutil.rmtree('/tmp/ukbrest2tmp/', ignore_errors=True)
        genoq = GenoQuery(get_repository_path('example01'),
                          tmpdir='/tmp/ukbrest2tmp/')

        # Configure
        app.app.config['TESTING'] = True
        app.app.config['genoquery'] = genoq
        test_client = app.app.test_client()

        # Run
        response = test_client.get(
            '/ukbrest/api/v1.0/genotype/1/positions/100/276')

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        assert os.path.isdir('/tmp/ukbrest2tmp/')
        assert len(os.listdir('/tmp/ukbrest2tmp/')) == 0
Example #2
0
    def test_genotype_rsids_using_file_http_auth_with_credentials(self):
        # Prepare
        self.setUp(user_pass_line='user: thepassword2')

        rsids_file = get_repository_path('example01/rsids01.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/rsids',
            data={'file': (open(rsids_file, 'rb'), rsids_file)},
            headers=self._get_http_basic_auth_header('user', 'thepassword2'),
        )

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5
Example #3
0
    def test_query_incl_range_upper_limit_only(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_range(chr=1, stop=276)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 3
        assert results.loc[0, 'rsid'] == 'rs1'
        assert results.loc[1, 'rsid'] == 'rs2'
        assert results.loc[2, 'rsid'] == 'rs3'

        assert results.loc[0, 'allele1'] == 'G'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[0, '1.aa'] == 0.7491
        assert results.loc[0, '1.ab'] == 0.0133
        assert results.loc[0, '1.bb'] == 0.2376

        assert results.loc[1, '2.aa'] == 0.8654
        assert results.loc[1, '2.ab'] == 0.1041
        assert results.loc[1, '2.bb'] == 0.0306

        assert results.loc[2, '300.aa'] == 0.0828
        assert results.loc[2, '300.ab'] == 0.7752
        assert results.loc[2, '300.bb'] == 0.1421

        pos_values = results['pos'].unique()
        assert len(pos_values) == 3
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 181
        assert results.loc[2, 'pos'] == 276
Example #4
0
    def test_genotype_positions_lower_and_upper_limits(self):
        # Prepare
        # Run
        response = self.app.get(
            '/ukbrest/api/v1.0/genotype/1/positions/100/276')

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 3
        assert results.loc[0, 'rsid'] == 'rs1'
        assert results.loc[1, 'rsid'] == 'rs2'
        assert results.loc[2, 'rsid'] == 'rs3'

        assert results.loc[0, 'allele1'] == 'G'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[0, '1.aa'] == 0.7491
        assert results.loc[0, '1.ab'] == 0.0133
        assert results.loc[0, '1.bb'] == 0.2376

        assert results.loc[1, '2.aa'] == 0.8654
        assert results.loc[1, '2.ab'] == 0.1041
        assert results.loc[1, '2.bb'] == 0.0306

        assert results.loc[2, '300.aa'] == 0.0828
        assert results.loc[2, '300.ab'] == 0.7752
        assert results.loc[2, '300.bb'] == 0.1421

        pos_values = results['pos'].unique()
        assert len(pos_values) == 3
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 181
        assert results.loc[2, 'pos'] == 276
Example #5
0
    def test_query_incl_rsids_multiple(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_rsids(2, ['rs2000082', 'rs2000142'])

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 2

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 2
        assert results.loc[0, 'rsid'] == 'rs2000082'
        assert results.loc[1, 'rsid'] == 'rs2000142'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'T'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'G'

        assert results.loc[0, '1.aa'] == 0.0016
        assert results.loc[0, '1.ab'] == 0.8613
        assert results.loc[0, '1.bb'] == 0.1371

        assert results.loc[0, '300.aa'] == 0.0234
        assert results.loc[0, '300.ab'] == 0.0148
        assert results.loc[0, '300.bb'] == 0.9618

        assert results.loc[1, '1.aa'] == 0.9619
        assert results.loc[1, '1.ab'] == 0.0015
        assert results.loc[1, '1.bb'] == 0.0366

        assert results.loc[1, '300.aa'] == 0.0185
        assert results.loc[1, '300.ab'] == 0.1408
        assert results.loc[1, '300.bb'] == 0.8407

        pos_values = results['pos'].unique()
        assert len(pos_values) == 2
        assert results.loc[0, 'pos'] == 6192
        assert results.loc[1, 'pos'] == 10750
Example #6
0
    def test_query_incl_range_temp_directory(self):
        # prepare
        shutil.rmtree('/tmp/ukbrest_different/', ignore_errors=True)
        genoq = GenoQuery(get_repository_path('example01'),
                          tmpdir='/tmp/ukbrest_different/')

        # run
        bgen_file = genoq.get_incl_range(chr=1, start=100, stop=276)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        assert isdir('/tmp/ukbrest_different/')
        assert len(os.listdir('/tmp/ukbrest_different/')) == 1
Example #7
0
    def test_genotype_positions_lower_and_upper_limits_http_auth_with_credentials(
            self):
        # Prepare
        self.setUp(user_pass_line='user: thepassword2')

        # Run
        response = self.app.get(
            '/ukbrest/api/v1.0/genotype/1/positions/100/276',
            headers=self._get_http_basic_auth_header('user', 'thepassword2'),
        )

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3
Example #8
0
    def test_query_incl_range_lower_and_upper_limits_at_end(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))

        # run
        bgen_file = genoq.get_incl_range(chr=1, start=18058, stop=18389)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs246'
        assert results.loc[1, 'rsid'] == 'rs247'
        assert results.loc[2, 'rsid'] == 'rs248'
        assert results.loc[3, 'rsid'] == 'rs249'
        assert results.loc[4, 'rsid'] == 'rs250'

        assert results.loc[0, 'allele1'] == 'C'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'G'
        assert results.loc[2, 'allele2'] == 'C'

        assert results.loc[3, 'allele1'] == 'G'
        assert results.loc[3, 'allele2'] == 'A'

        assert results.loc[4, 'allele1'] == 'T'
        assert results.loc[4, 'allele2'] == 'C'

        assert results.loc[0, '1.aa'] == 0.0537
        assert results.loc[0, '1.ab'] == 0.9160
        assert results.loc[0, '1.bb'] == 0.0302

        assert results.loc[1, '2.aa'] == 0.0698
        assert results.loc[1, '2.ab'] == 0.9116
        assert results.loc[1, '2.bb'] == 0.0186

        assert results.loc[2, '300.aa'] == 0.0826
        assert results.loc[2, '300.ab'] == 0.0316
        assert results.loc[2, '300.bb'] == 0.8858

        assert results.loc[3, '299.aa'] == 0.7988
        assert results.loc[3, '299.ab'] == 0.1666
        assert results.loc[3, '299.bb'] == 0.0346

        assert results.loc[4, '150.aa'] == 0.0773
        assert results.loc[4, '150.ab'] == 0.8683
        assert results.loc[4, '150.bb'] == 0.0544

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 18058
        assert results.loc[1, 'pos'] == 18139
        assert results.loc[2, 'pos'] == 18211
        assert results.loc[3, 'pos'] == 18294
        assert results.loc[4, 'pos'] == 18389
Example #9
0
    def test_query_incl_rsids_using_file(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))
        # rsids are not ordered in the file, but they should be returned ordered
        rsids_file = get_repository_path('example01/rsids01.txt')

        # run
        bgen_file = genoq.get_incl_rsids(2, [rsids_file])

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs2000000'
        assert results.loc[1, 'rsid'] == 'rs2000020'
        assert results.loc[2, 'rsid'] == 'rs2000079'
        assert results.loc[3, 'rsid'] == 'rs2000138'
        assert results.loc[4, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[3, 'allele1'] == 'A'
        assert results.loc[3, 'allele2'] == 'G'

        assert results.loc[4, 'allele1'] == 'G'
        assert results.loc[4, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.9440
        assert results.loc[0, '1.ab'] == 0.0298
        assert results.loc[0, '1.bb'] == 0.0262

        assert results.loc[1, '2.aa'] == 0.1534
        assert results.loc[1, '2.ab'] == 0.7249
        assert results.loc[1, '2.bb'] == 0.1218

        assert results.loc[2, '3.aa'] == 0.9357
        assert results.loc[2, '3.ab'] == 0.0047
        assert results.loc[2, '3.bb'] == 0.0596

        assert results.loc[3, '1.aa'] == 0.8246
        assert results.loc[3, '1.ab'] == 0.0686
        assert results.loc[3, '1.bb'] == 0.1068

        assert results.loc[4, '2.aa'] == 0.0137
        assert results.loc[4, '2.ab'] == 0.0953
        assert results.loc[4, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 1623
        assert results.loc[2, 'pos'] == 5925
        assert results.loc[3, 'pos'] == 10447
        assert results.loc[4, 'pos'] == 11226
Example #10
0
    def test_query_incl_range_using_file(self):
        # prepare
        genoq = GenoQuery(get_repository_path('example01'))
        # positions are not ordered in the file, but they should be returned ordered
        positions_file = get_repository_path('example01/positions01.txt')

        # run
        bgen_file = genoq.get_incl_range_from_file(2, positions_file)

        # validate
        assert bgen_file is not None
        assert isfile(bgen_file)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs2000003'
        assert results.loc[1, 'rsid'] == 'rs2000008'
        assert results.loc[2, 'rsid'] == 'rs2000094'
        assert results.loc[3, 'rsid'] == 'rs2000118'
        assert results.loc[4, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'C'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'A'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'G'

        assert results.loc[3, 'allele1'] == 'T'
        assert results.loc[3, 'allele2'] == 'C'

        assert results.loc[4, 'allele1'] == 'G'
        assert results.loc[4, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.7889
        assert results.loc[0, '1.ab'] == 0.1538
        assert results.loc[0, '1.bb'] == 0.0573

        assert results.loc[1, '2.aa'] == 0.8776
        assert results.loc[1, '2.ab'] == 0.0670
        assert results.loc[1, '2.bb'] == 0.0554

        assert results.loc[2, '3.aa'] == 0.0553
        assert results.loc[2, '3.ab'] == 0.0939
        assert results.loc[2, '3.bb'] == 0.8509

        assert results.loc[3, '1.aa'] == 0.1219
        assert results.loc[3, '1.ab'] == 0.8459
        assert results.loc[3, '1.bb'] == 0.0323

        assert results.loc[4, '2.aa'] == 0.0137
        assert results.loc[4, '2.ab'] == 0.0953
        assert results.loc[4, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 300
        assert results.loc[1, 'pos'] == 661
        assert results.loc[2, 'pos'] == 7181
        assert results.loc[3, 'pos'] == 8949
        assert results.loc[4, 'pos'] == 11226
Example #11
0
    def test_genotype_positions_different_file_naming_chr2(self):
        # Prepare
        data_dir = get_repository_path('example02')

        if os.path.isdir(data_dir):
            shutil.rmtree(data_dir)

        shutil.copytree(get_repository_path('example01'), data_dir)

        shutil.move(os.path.join(data_dir, 'chr1impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr1.bgen'))
        shutil.move(os.path.join(data_dir, 'chr1impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr1.bgen.bgi'))
        shutil.move(os.path.join(data_dir, 'chr2impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr2.bgen'))
        shutil.move(os.path.join(data_dir, 'chr2impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr2.bgen.bgi'))
        shutil.move(os.path.join(data_dir, 'chr3impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr3.bgen'))
        shutil.move(os.path.join(data_dir, 'chr3impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr3.bgen.bgi'))

        self.setUp(data_dir='example02', bgen_names='ukb_chr{:d}.bgen')

        positions_file = get_repository_path('example02/positions01.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/positions',
            data={'file': (open(positions_file, 'rb'), positions_file)})

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs2000003'
        assert results.loc[1, 'rsid'] == 'rs2000008'
        assert results.loc[2, 'rsid'] == 'rs2000094'
        assert results.loc[3, 'rsid'] == 'rs2000118'
        assert results.loc[4, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'C'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'A'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'G'

        assert results.loc[3, 'allele1'] == 'T'
        assert results.loc[3, 'allele2'] == 'C'

        assert results.loc[4, 'allele1'] == 'G'
        assert results.loc[4, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.7889
        assert results.loc[0, '1.ab'] == 0.1538
        assert results.loc[0, '1.bb'] == 0.0573

        assert results.loc[1, '2.aa'] == 0.8776
        assert results.loc[1, '2.ab'] == 0.0670
        assert results.loc[1, '2.bb'] == 0.0554

        assert results.loc[2, '3.aa'] == 0.0553
        assert results.loc[2, '3.ab'] == 0.0939
        assert results.loc[2, '3.bb'] == 0.8509

        assert results.loc[3, '1.aa'] == 0.1219
        assert results.loc[3, '1.ab'] == 0.8459
        assert results.loc[3, '1.bb'] == 0.0323

        assert results.loc[4, '2.aa'] == 0.0137
        assert results.loc[4, '2.ab'] == 0.0953
        assert results.loc[4, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 300
        assert results.loc[1, 'pos'] == 661
        assert results.loc[2, 'pos'] == 7181
        assert results.loc[3, 'pos'] == 8949
        assert results.loc[4, 'pos'] == 11226

        shutil.rmtree(data_dir)
Example #12
0
    def test_genotype_positions_different_file_naming_chr1(self):
        # Prepare
        data_dir = get_repository_path('example02')

        if os.path.isdir(data_dir):
            shutil.rmtree(data_dir)

        shutil.copytree(get_repository_path('example01'), data_dir)

        shutil.move(os.path.join(data_dir, 'chr1impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr1.bgen'))
        shutil.move(os.path.join(data_dir, 'chr1impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr1.bgen.bgi'))
        shutil.move(os.path.join(data_dir, 'chr2impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr2.bgen'))
        shutil.move(os.path.join(data_dir, 'chr2impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr2.bgen.bgi'))
        shutil.move(os.path.join(data_dir, 'chr3impv1.bgen'),
                    os.path.join(data_dir, 'ukb_chr3.bgen'))
        shutil.move(os.path.join(data_dir, 'chr3impv1.bgen.bgi'),
                    os.path.join(data_dir, 'ukb_chr3.bgen.bgi'))

        self.setUp(data_dir='example02', bgen_names='ukb_chr1.bgen')

        # Run
        response = self.app.get(
            '/ukbrest/api/v1.0/genotype/1/positions/100/276')

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 3

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 3
        assert results.loc[0, 'rsid'] == 'rs1'
        assert results.loc[1, 'rsid'] == 'rs2'
        assert results.loc[2, 'rsid'] == 'rs3'

        assert results.loc[0, 'allele1'] == 'G'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'A'

        assert results.loc[0, '1.aa'] == 0.7491
        assert results.loc[0, '1.ab'] == 0.0133
        assert results.loc[0, '1.bb'] == 0.2376

        assert results.loc[1, '2.aa'] == 0.8654
        assert results.loc[1, '2.ab'] == 0.1041
        assert results.loc[1, '2.bb'] == 0.0306

        assert results.loc[2, '300.aa'] == 0.0828
        assert results.loc[2, '300.ab'] == 0.7752
        assert results.loc[2, '300.bb'] == 0.1421

        pos_values = results['pos'].unique()
        assert len(pos_values) == 3
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 181
        assert results.loc[2, 'pos'] == 276

        shutil.rmtree(data_dir)
Example #13
0
    def test_genotype_rsids_one_rsid_not_found(self):
        # Prepare
        rsids_file = get_repository_path('example01/rsids01_bug1.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/rsids',
            data={'file': (open(rsids_file, 'rb'), rsids_file)})

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 4

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 4
        assert results.loc[0, 'rsid'] == 'rs2000000'
        assert results.loc[1, 'rsid'] == 'rs2000020'
        assert results.loc[2, 'rsid'] == 'rs2000138'
        assert results.loc[3, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'A'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'G'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'A'
        assert results.loc[2, 'allele2'] == 'G'

        assert results.loc[3, 'allele1'] == 'G'
        assert results.loc[3, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.9440
        assert results.loc[0, '1.ab'] == 0.0298
        assert results.loc[0, '1.bb'] == 0.0262

        assert results.loc[1, '2.aa'] == 0.1534
        assert results.loc[1, '2.ab'] == 0.7249
        assert results.loc[1, '2.bb'] == 0.1218

        assert results.loc[2, '1.aa'] == 0.8246
        assert results.loc[2, '1.ab'] == 0.0686
        assert results.loc[2, '1.bb'] == 0.1068

        assert results.loc[3, '2.aa'] == 0.0137
        assert results.loc[3, '2.ab'] == 0.0953
        assert results.loc[3, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 4
        assert results.loc[0, 'pos'] == 100
        assert results.loc[1, 'pos'] == 1623
        assert results.loc[2, 'pos'] == 10447
        assert results.loc[3, 'pos'] == 11226
Example #14
0
    def test_genotype_positions_using_file(self):
        # Prepare
        positions_file = get_repository_path('example01/positions01.txt')

        # Run
        response = self.app.post(
            '/ukbrest/api/v1.0/genotype/2/positions',
            data={'file': (open(positions_file, 'rb'), positions_file)})

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs2000003'
        assert results.loc[1, 'rsid'] == 'rs2000008'
        assert results.loc[2, 'rsid'] == 'rs2000094'
        assert results.loc[3, 'rsid'] == 'rs2000118'
        assert results.loc[4, 'rsid'] == 'rs2000149'

        assert results.loc[0, 'allele1'] == 'C'
        assert results.loc[0, 'allele2'] == 'G'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'A'

        assert results.loc[2, 'allele1'] == 'C'
        assert results.loc[2, 'allele2'] == 'G'

        assert results.loc[3, 'allele1'] == 'T'
        assert results.loc[3, 'allele2'] == 'C'

        assert results.loc[4, 'allele1'] == 'G'
        assert results.loc[4, 'allele2'] == 'T'

        assert results.loc[0, '1.aa'] == 0.7889
        assert results.loc[0, '1.ab'] == 0.1538
        assert results.loc[0, '1.bb'] == 0.0573

        assert results.loc[1, '2.aa'] == 0.8776
        assert results.loc[1, '2.ab'] == 0.0670
        assert results.loc[1, '2.bb'] == 0.0554

        assert results.loc[2, '3.aa'] == 0.0553
        assert results.loc[2, '3.ab'] == 0.0939
        assert results.loc[2, '3.bb'] == 0.8509

        assert results.loc[3, '1.aa'] == 0.1219
        assert results.loc[3, '1.ab'] == 0.8459
        assert results.loc[3, '1.bb'] == 0.0323

        assert results.loc[4, '2.aa'] == 0.0137
        assert results.loc[4, '2.ab'] == 0.0953
        assert results.loc[4, '2.bb'] == 0.8909

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 300
        assert results.loc[1, 'pos'] == 661
        assert results.loc[2, 'pos'] == 7181
        assert results.loc[3, 'pos'] == 8949
        assert results.loc[4, 'pos'] == 11226
Example #15
0
    def test_genotype_positions_lower_limit_only(self):
        # Prepare
        # Run
        response = self.app.get('/ukbrest/api/v1.0/genotype/1/positions/18058')

        # Validate
        assert response.status_code == 200, response.status_code

        bgen_file = self._save_file(response)

        results = qctool(bgen_file)

        assert results is not None
        assert hasattr(results, 'shape')
        assert hasattr(results, 'columns')
        assert results.shape[1] == 6 + 300 * 3
        assert results.shape[0] == 5

        rsid_values = results['rsid'].unique()
        assert len(rsid_values) == 5
        assert results.loc[0, 'rsid'] == 'rs246'
        assert results.loc[1, 'rsid'] == 'rs247'
        assert results.loc[2, 'rsid'] == 'rs248'
        assert results.loc[3, 'rsid'] == 'rs249'
        assert results.loc[4, 'rsid'] == 'rs250'

        assert results.loc[0, 'allele1'] == 'C'
        assert results.loc[0, 'allele2'] == 'A'

        assert results.loc[1, 'allele1'] == 'T'
        assert results.loc[1, 'allele2'] == 'C'

        assert results.loc[2, 'allele1'] == 'G'
        assert results.loc[2, 'allele2'] == 'C'

        assert results.loc[3, 'allele1'] == 'G'
        assert results.loc[3, 'allele2'] == 'A'

        assert results.loc[4, 'allele1'] == 'T'
        assert results.loc[4, 'allele2'] == 'C'

        assert results.loc[0, '1.aa'] == 0.0537
        assert results.loc[0, '1.ab'] == 0.9160
        assert results.loc[0, '1.bb'] == 0.0302

        assert results.loc[1, '2.aa'] == 0.0698
        assert results.loc[1, '2.ab'] == 0.9116
        assert results.loc[1, '2.bb'] == 0.0186

        assert results.loc[2, '300.aa'] == 0.0826
        assert results.loc[2, '300.ab'] == 0.0316
        assert results.loc[2, '300.bb'] == 0.8858

        assert results.loc[3, '299.aa'] == 0.7988
        assert results.loc[3, '299.ab'] == 0.1666
        assert results.loc[3, '299.bb'] == 0.0346

        assert results.loc[4, '150.aa'] == 0.0773
        assert results.loc[4, '150.ab'] == 0.8683
        assert results.loc[4, '150.bb'] == 0.0544

        pos_values = results['pos'].unique()
        assert len(pos_values) == 5
        assert results.loc[0, 'pos'] == 18058
        assert results.loc[1, 'pos'] == 18139
        assert results.loc[2, 'pos'] == 18211
        assert results.loc[3, 'pos'] == 18294
        assert results.loc[4, 'pos'] == 18389