def test_genotype_temp_files_removed_in_server_side(self): # Prepare shutil.rmtree('/tmp/ukbrest2tmp/', ignore_errors=True) genoq = GenoQuery(get_repository_path('example01'), tmpdir='/tmp/ukbrest2tmp/') # Configure app.app.config['TESTING'] = True app.app.config['genoquery'] = genoq test_client = app.app.test_client() # Run response = test_client.get( '/ukbrest/api/v1.0/genotype/1/positions/100/276') # Validate assert response.status_code == 200, response.status_code bgen_file = self._save_file(response) results = qctool(bgen_file) assert results is not None assert hasattr(results, 'shape') assert hasattr(results, 'columns') assert results.shape[1] == 6 + 300 * 3 assert results.shape[0] == 3 assert os.path.isdir('/tmp/ukbrest2tmp/') assert len(os.listdir('/tmp/ukbrest2tmp/')) == 0
def test_process_users_file_file_does_not_exist_test00(self): # prepare users_file = get_repository_path('no/existing/file/here.txt') # run ph = PasswordHasher(users_file, method='pbkdf2:sha256') ph.process_users_file()
def test_process_users_file_one_password_hashed_rest_not_test01(self): # prepare orig_user_file = get_repository_path('wsgi/test01/users.txt') users_file = orig_user_file + '.bak' copyfile(orig_user_file, users_file) orig_users = self.load_data(orig_user_file) # run ph = PasswordHasher(users_file, method='pbkdf2:sha256') ph.process_users_file() # evaluate assert os.path.isfile(users_file) users = self.load_data(users_file) assert len(users) == 3 for user, password in users.items(): assert user in orig_users.keys(), user if user != 'adams': assert password != orig_users[user], user + ' / ' + password + ' / ' + orig_users[user] else: assert password == users[user], user + password + ' / ' + users[user] assert len(password) == 93, len(password) os.remove(users_file)
def test_process_users_file_already_hashed_test00(self): # prepare orig_user_file = get_repository_path('wsgi/test00/users.txt') users_file = orig_user_file + '.bak' copyfile(orig_user_file, users_file) orig_users = self.load_data(orig_user_file) ph = PasswordHasher(users_file, method='pbkdf2:sha256') ph.process_users_file() users = self.load_data(users_file) # run ph = PasswordHasher(users_file, method='pbkdf2:sha256') ph.process_users_file() # evaluate assert os.path.isfile(users_file) new_users = self.load_data(users_file) assert len(users) == 3 for user, password in new_users.items(): assert user in orig_users.keys(), user assert password == users[user], password + ' / ' + users[user] assert len(password) == 93, len(password) os.remove(users_file)
def setUp(self, data_dir='example01', bgen_names='chr{:d}impv1.bgen', bgenix_path='bgenix', user_pass_line=None): super(TestRestApiGenotype, self).setUp() # Load data genoq = GenoQuery(get_repository_path(data_dir), bgen_names=bgen_names, bgenix_path=bgenix_path) # Configure app.app.config['testing'] = True app.app.config['auth'] = None app.app.config['genoquery'] = genoq if user_pass_line is not None: f = tempfile.NamedTemporaryFile(delete=False) f.close() with open(f.name, 'w') as fi: fi.write(user_pass_line) ph = PasswordHasher(f.name, method='pbkdf2:sha256') app.app.config['auth'] = ph.setup_http_basic_auth() self.app = app.app.test_client()
def test_get_iterator_repeated_variant_positions(self): # Prepare bgen_dosage = BGENDosage( get_repository_path('set06_repeated_positions/chr1impv1.bgen')) # Run all_items = list(bgen_dosage.items(n_rows_cached=5)) assert len(all_items) == 11, len(all_items) # snp 1 assert all_items[0].chr == 1 assert all_items[0].position == 100 assert all_items[0].allele0 == 'T' assert all_items[0].allele1 == 'A' assert all_items[0].rsid == 'rs1' assert all_items[0].dosages.shape == (20, ) assert truncate(all_items[0].dosages[0]) == truncate( np.dot([0.06817, 0.27690, 0.65493], [0, 1, 2])) == 1.5867 assert truncate(all_items[0].dosages[19]) == truncate( np.dot([0.00219, 0.08983, 0.90798], [0, 1, 2])) == 1.9057 # snp 5 assert all_items[4].chr == 1 assert all_items[4].position == 418 assert all_items[4].allele0 == 'T' assert all_items[4].allele1 == 'A' assert all_items[4].rsid == 'rs5' assert all_items[4].dosages.shape == (20, ) assert truncate(all_items[4].dosages[0]) == truncate( np.dot([0.09158, 0.16910, 0.73933], [0, 1, 2])) == 1.6477 assert truncate(all_items[4].dosages[1]) == truncate( np.dot([0.09820, 0.09934, 0.80246], [0, 1, 2])) == 1.7042 assert truncate(all_items[4].dosages[19]) == truncate( np.dot([0.02833, 0.93189, 0.03978], [0, 1, 2])) == 1.0114 # snp 6 assert all_items[5].chr == 1 assert all_items[5].position == 418 assert all_items[5].allele0 == 'T' assert all_items[5].allele1 == 'C' assert all_items[5].rsid == 'rs5' assert all_items[5].dosages.shape == (20, ) assert truncate(all_items[5].dosages[0]) == truncate( np.dot([0.00598, 0.02878, 0.96524], [0, 1, 2])) == 1.9592 assert truncate(all_items[5].dosages[1]) == truncate( np.dot([0.01553, 0.14800, 0.83647], [0, 1, 2])) == 1.8209 assert truncate(all_items[5].dosages[19]) == truncate( np.dot([0.08347, 0.02509, 0.89144], [0, 1, 2])) == 1.8079 # snp last assert all_items[10].chr == 1 assert all_items[10].position == 839 assert all_items[10].allele0 == 'G' assert all_items[10].allele1 == 'A' assert all_items[10].rsid == 'rs10' assert all_items[10].dosages.shape == (20, ) assert truncate(all_items[10].dosages[0]) == truncate( np.dot([0.03161, 0.82957, 0.13882], [0, 1, 2])) == 1.1072 assert truncate(all_items[10].dosages[19]) == truncate( np.dot([0.96104, 0.03167, 0.00729], [0, 1, 2])) == 0.0462
def test_genotype_rsids_using_file_http_auth_with_credentials(self): # Prepare self.setUp(user_pass_line='user: thepassword2') rsids_file = get_repository_path('example01/rsids01.txt') # Run response = self.app.post( '/ukbrest/api/v1.0/genotype/2/rsids', data={'file': (open(rsids_file, 'rb'), rsids_file)}, headers=self._get_http_basic_auth_header('user', 'thepassword2'), ) # Validate assert response.status_code == 200, response.status_code bgen_file = self._save_file(response) results = qctool(bgen_file) assert results is not None assert hasattr(results, 'shape') assert hasattr(results, 'columns') assert results.shape[1] == 6 + 300 * 3 assert results.shape[0] == 5
def test_init(self): # Prepare # Run bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen')) # Validate assert bgen_dosage is not None
def test_postload_load_samples_data_no_eid_column(self): # prepare directory = get_repository_path('postloader/samples_data03') # run pl = Postloader(POSTGRESQL_ENGINE) pl.load_samples_data(directory) # Validate db_engine = create_engine(POSTGRESQL_ENGINE) # samplesqc table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('samplesqc'), db_engine) assert not table.iloc[0, 0] # relatedness table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('relatedness'), db_engine) assert not table.iloc[0, 0]
def test_performance_get_iterator_with_cache(self): # measure time with no cache bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen')) start_time = time() no_cache_results = list(bgen_dosage.items(n_rows_cached=1)) no_cache_time = time() - start_time # measure time with cache bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen')) start_time = time() cache_results = list(bgen_dosage.items(n_rows_cached=200)) cache_time = time() - start_time assert len(no_cache_results) == len(cache_results) assert cache_time * 3.0 <= no_cache_time, (cache_time, no_cache_time)
def test_postload_samples_data_check_constrains_exist(self): # prepare directory = get_repository_path('postloader/samples_data04') # run pl = Postloader(POSTGRESQL_ENGINE) pl.load_samples_data(directory, identifier_columns={ 'relatedness.txt': 'ID1', 'samplesqc.txt': 'ID', }, separators={ 'relatedness.txt': '\t', 'samplesqc.txt': ',', }) # Validate ## Check samplesqc table exists table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('samplesqc'), create_engine(POSTGRESQL_ENGINE)) assert table.iloc[0, 0] # primary key constraint_sql = self._get_table_contrains('samplesqc', relationship_query='pk_%%') constraints_results = pd.read_sql(constraint_sql, create_engine(POSTGRESQL_ENGINE)) assert constraints_results is not None assert not constraints_results.empty columns = constraints_results['column_name'].tolist() assert len(columns) == 1 assert 'eid' in columns ## Check relatedness table exists table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('relatedness'), create_engine(POSTGRESQL_ENGINE)) assert table.iloc[0, 0] # primary key constraint_sql = self._get_table_contrains('relatedness', relationship_query='pk_%%') constraints_results = pd.read_sql(constraint_sql, create_engine(POSTGRESQL_ENGINE)) assert constraints_results is not None assert not constraints_results.empty columns = constraints_results['column_name'].tolist() assert len(columns) == 1 assert 'eid' in columns
def test_basic_call(self): os.environ[GENOTYPE_PATH_ENV] = get_repository_path('pheno2sql/example12/') os.environ[PHENOTYPE_PATH] = get_repository_path('pheno2sql/example12/') os.environ[DB_URI_ENV] = POSTGRESQL_ENGINE os.environ[GENOTYPE_BGEN_SAMPLE] = 'impv2.sample' os.environ[LOAD_DATA_VACUUM] = "yes" _setup_genotype_path() _setup_phenotype_path() _setup_db_uri() options = [ 'python', self.load_data_path, ] return_code = call(options) assert return_code == 0
def test_postload_codings_table_many_tab_characters_and_na(self): # prepare directory = get_repository_path('postloader/codings04_many_tabs') # run pl = Postloader(POSTGRESQL_ENGINE) pl.load_codings(directory) # validate ## Check samples table exists table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('codings'), create_engine(POSTGRESQL_ENGINE)) assert table.iloc[0, 0] codings = pd.read_sql( "select * from codings order by data_coding, coding", create_engine(POSTGRESQL_ENGINE)) assert codings is not None expected_columns = ['data_coding', 'coding', 'meaning'] assert len(codings.columns) >= len(expected_columns) assert all(x in codings.columns for x in expected_columns) assert not codings.empty assert codings.shape[0] == 5 cidx = 0 assert codings.loc[cidx, 'data_coding'] == 7 assert codings.loc[cidx, 'coding'] == '0' assert codings.loc[cidx, 'meaning'] == 'No' cidx += 1 assert codings.loc[cidx, 'data_coding'] == 7 assert codings.loc[cidx, 'coding'] == '1' assert codings.loc[cidx, 'meaning'] == 'Yes' cidx += 1 assert codings.loc[cidx, 'data_coding'] == 9 assert codings.loc[cidx, 'coding'] == '0' assert codings.loc[cidx, 'meaning'] == 'Female' cidx += 1 assert codings.loc[cidx, 'data_coding'] == 9 assert codings.loc[cidx, 'coding'] == '1' assert codings.loc[cidx, 'meaning'] == 'Male' cidx += 1 assert codings.loc[cidx, 'data_coding'] == 9 assert codings.loc[cidx, 'coding'] == '2' assert codings.loc[cidx, 'meaning'] == 'N/A'
def test_postload_load_samples_data_one_file(self): # prepare directory = get_repository_path('postloader/samples_data01') # run pl = Postloader(POSTGRESQL_ENGINE) pl.load_samples_data(directory) # Validate db_engine = create_engine(POSTGRESQL_ENGINE) ## Check samples table exists table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('samplesqc'), db_engine) assert table.iloc[0, 0] samplesqc = pd.read_sql("select * from samplesqc order by eid asc", create_engine(POSTGRESQL_ENGINE), index_col='eid') assert samplesqc is not None expected_columns = [ 'ccolumn_name_0_0', 'canothercolumn_0_0', 'cthird_column_0_0', 'cother_measure_col_umn_0_0' ] assert len(samplesqc.columns) == len(expected_columns) assert all(x in samplesqc.columns for x in expected_columns) assert not samplesqc.empty assert samplesqc.shape[0] == 4 assert samplesqc.loc[10, 'ccolumn_name_0_0'] == 'UKBB' assert samplesqc.loc[10, 'canothercolumn_0_0'] == 'Batch' assert samplesqc.loc[10, 'cthird_column_0_0'] == 'SomeValue' assert samplesqc.loc[10, 'cother_measure_col_umn_0_0'] == 8.33992 assert samplesqc.loc[20, 'ccolumn_name_0_0'] == 'Other' assert samplesqc.loc[20, 'canothercolumn_0_0'] == 'Some' assert samplesqc.loc[20, 'cthird_column_0_0'] == 'AnotherValue' assert samplesqc.loc[20, 'cother_measure_col_umn_0_0'] == -772.1234 assert samplesqc.loc[30, 'ccolumn_name_0_0'] == 'Other12' assert samplesqc.loc[30, 'canothercolumn_0_0'] == 'Some12' assert samplesqc.loc[30, 'cthird_column_0_0'] == 'AnotherValue12' assert samplesqc.loc[30, 'cother_measure_col_umn_0_0'] == -0.000001234 assert samplesqc.loc[2222240, 'ccolumn_name_0_0'] == 'Other13' assert samplesqc.loc[2222240, 'canothercolumn_0_0'] == 'Some13' assert samplesqc.loc[2222240, 'cthird_column_0_0'] == 'AnotherValue13' assert samplesqc.loc[2222240, 'cother_measure_col_umn_0_0'] == 0.051234
def test_verify_password_users_file_does_not_exist_test01(self): # prepare users_file = get_repository_path('no/existing/file/here.txt') ph = PasswordHasher(users_file, method='pbkdf2:sha256') ph.process_users_file() # evaluate assert not ph.verify_password('milton', 'whatever') assert not ph.verify_password('john', 'mypassword') assert not ph.verify_password('adams', 'anotherpassword') assert not ph.verify_password('james', 'mypassword')
def test_verify_password_users_file_empty_test01(self): # prepare orig_user_file = get_repository_path('wsgi/test02/users.txt') users_file = orig_user_file + '.bak' ph = PasswordHasher(users_file, method='pbkdf2:sha256') ph.process_users_file() # evaluate assert not ph.verify_password('milton', 'whatever') assert not ph.verify_password('john', 'mypassword') assert not ph.verify_password('adams', 'anotherpassword') assert not ph.verify_password('james', 'mypassword')
def test_genotype_rsids_using_file_http_auth_no_credentials(self): # Prepare self.setUp(user_pass_line='user: thepassword2') rsids_file = get_repository_path('example01/rsids01.txt') # Run response = self.app.post( '/ukbrest/api/v1.0/genotype/2/rsids', data={'file': (open(rsids_file, 'rb'), rsids_file)}) # Validate assert response.status_code == 401, response.status_code
def test_query_incl_range_upper_limit_only(self): # prepare genoq = GenoQuery(get_repository_path('example01')) # run bgen_file = genoq.get_incl_range(chr=1, stop=276) # validate assert bgen_file is not None assert isfile(bgen_file) results = qctool(bgen_file) assert results is not None assert hasattr(results, 'shape') assert hasattr(results, 'columns') assert results.shape[1] == 6 + 300 * 3 assert results.shape[0] == 3 rsid_values = results['rsid'].unique() assert len(rsid_values) == 3 assert results.loc[0, 'rsid'] == 'rs1' assert results.loc[1, 'rsid'] == 'rs2' assert results.loc[2, 'rsid'] == 'rs3' assert results.loc[0, 'allele1'] == 'G' assert results.loc[0, 'allele2'] == 'A' assert results.loc[1, 'allele1'] == 'G' assert results.loc[1, 'allele2'] == 'C' assert results.loc[2, 'allele1'] == 'C' assert results.loc[2, 'allele2'] == 'A' assert results.loc[0, '1.aa'] == 0.7491 assert results.loc[0, '1.ab'] == 0.0133 assert results.loc[0, '1.bb'] == 0.2376 assert results.loc[1, '2.aa'] == 0.8654 assert results.loc[1, '2.ab'] == 0.1041 assert results.loc[1, '2.bb'] == 0.0306 assert results.loc[2, '300.aa'] == 0.0828 assert results.loc[2, '300.ab'] == 0.7752 assert results.loc[2, '300.bb'] == 0.1421 pos_values = results['pos'].unique() assert len(pos_values) == 3 assert results.loc[0, 'pos'] == 100 assert results.loc[1, 'pos'] == 181 assert results.loc[2, 'pos'] == 276
def test_query_incl_rsids_multiple(self): # prepare genoq = GenoQuery(get_repository_path('example01')) # run bgen_file = genoq.get_incl_rsids(2, ['rs2000082', 'rs2000142']) # validate assert bgen_file is not None assert isfile(bgen_file) results = qctool(bgen_file) assert results is not None assert hasattr(results, 'shape') assert hasattr(results, 'columns') assert results.shape[1] == 6 + 300 * 3 assert results.shape[0] == 2 rsid_values = results['rsid'].unique() assert len(rsid_values) == 2 assert results.loc[0, 'rsid'] == 'rs2000082' assert results.loc[1, 'rsid'] == 'rs2000142' assert results.loc[0, 'allele1'] == 'A' assert results.loc[0, 'allele2'] == 'T' assert results.loc[1, 'allele1'] == 'T' assert results.loc[1, 'allele2'] == 'G' assert results.loc[0, '1.aa'] == 0.0016 assert results.loc[0, '1.ab'] == 0.8613 assert results.loc[0, '1.bb'] == 0.1371 assert results.loc[0, '300.aa'] == 0.0234 assert results.loc[0, '300.ab'] == 0.0148 assert results.loc[0, '300.bb'] == 0.9618 assert results.loc[1, '1.aa'] == 0.9619 assert results.loc[1, '1.ab'] == 0.0015 assert results.loc[1, '1.bb'] == 0.0366 assert results.loc[1, '300.aa'] == 0.0185 assert results.loc[1, '300.ab'] == 0.1408 assert results.loc[1, '300.bb'] == 0.8407 pos_values = results['pos'].unique() assert len(pos_values) == 2 assert results.loc[0, 'pos'] == 6192 assert results.loc[1, 'pos'] == 10750
def test_genotype_positions_different_file_naming_chr1_wrong_bgen_name( self): # Prepare data_dir = get_repository_path('example02') if os.path.isdir(data_dir): shutil.rmtree(data_dir) shutil.copytree(get_repository_path('example01'), data_dir) shutil.move(os.path.join(data_dir, 'chr1impv1.bgen'), os.path.join(data_dir, 'ukb_chr1.bgen')) shutil.move(os.path.join(data_dir, 'chr1impv1.bgen.bgi'), os.path.join(data_dir, 'ukb_chr1.bgen.bgi')) shutil.move(os.path.join(data_dir, 'chr2impv1.bgen'), os.path.join(data_dir, 'ukb_chr2.bgen')) shutil.move(os.path.join(data_dir, 'chr2impv1.bgen.bgi'), os.path.join(data_dir, 'ukb_chr2.bgen.bgi')) shutil.move(os.path.join(data_dir, 'chr3impv1.bgen'), os.path.join(data_dir, 'ukb_chr3.bgen')) shutil.move(os.path.join(data_dir, 'chr3impv1.bgen.bgi'), os.path.join(data_dir, 'ukb_chr3.bgen.bgi')) self.setUp(data_dir='example02', bgen_names='wrong.bgen') # Run response = self.app.get( '/ukbrest/api/v1.0/genotype/1/positions/100/276') # Validate assert response.status_code == 400, response.status_code data = json.load(io.StringIO(response.data.decode('utf-8'))) assert 'message' in data, data assert 'wrong.bgen' in data['message'], data['message'] shutil.rmtree(data_dir)
def test_get_last_row(self): # Prepare bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen')) # Run dosage_row = bgen_dosage.get_row(-1) assert dosage_row is not None assert hasattr(dosage_row, 'chr') assert dosage_row.chr == 1 assert hasattr(dosage_row, 'rsid') assert dosage_row.rsid == 'rs250' assert hasattr(dosage_row, 'position') assert dosage_row.position == 18389 assert hasattr(dosage_row, 'allele0') assert dosage_row.allele0 == 'T' assert hasattr(dosage_row, 'allele1') assert dosage_row.allele1 == 'C' # assert hasattr(dosage_row, 'maf') # assert dosage_row.maf == 0.4722 assert hasattr(dosage_row, 'dosages') assert dosage_row.dosages is not None assert hasattr(dosage_row.dosages, 'shape') assert len(dosage_row.dosages) == 300 # 1 assert round(dosage_row.dosages[0], 4) == round( np.dot([0.04713, 0.94817, 0.00470], [0, 1, 2]), 4) == 0.9576, dosage_row.dosages[0] # NA (plink) assert round(dosage_row.dosages[2], 4) == round( np.dot([0.07355, 0.50369, 0.42276], [0, 1, 2]), 4) == 1.3492, dosage_row.dosages[2] # 2 assert round(dosage_row.dosages[8], 4) == round( np.dot([0.01488, 0.07935, 0.90576], [0, 1, 2]), 4) == 1.8909, dosage_row.dosages[8] # 0 assert round(dosage_row.dosages[12], 4) == round( np.dot([0.95152, 0.02008, 0.02840], [0, 1, 2]), 4) == 0.0769, dosage_row.dosages[12]
def test_get_first_row(self): # Prepare bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen')) # Run dosage_row = bgen_dosage.get_row(0) assert dosage_row is not None assert hasattr(dosage_row, 'chr') assert dosage_row.chr == 1 assert hasattr(dosage_row, 'rsid') assert dosage_row.rsid == 'rs1' assert hasattr(dosage_row, 'position') assert dosage_row.position == 100 assert hasattr(dosage_row, 'allele0') assert dosage_row.allele0 == 'G' assert hasattr(dosage_row, 'allele1') assert dosage_row.allele1 == 'A' # assert hasattr(dosage_row, 'maf') # assert dosage_row.maf == 0.4894 assert hasattr(dosage_row, 'dosages') assert dosage_row.dosages is not None assert hasattr(dosage_row.dosages, 'shape') assert len(dosage_row.dosages) == 300 # NA assert round(dosage_row.dosages[0], 4) == round( np.dot([0.74909, 0.01333, 0.23758], [0, 1, 2]), 4) == 0.4885, dosage_row.dosages[0] # 1 assert round(dosage_row.dosages[2], 4) == round( np.dot([0.05437, 0.91567, 0.02996], [0, 1, 2]), 4) == 0.9756, dosage_row.dosages[2] # 2 assert round(dosage_row.dosages[3], 4) == round( np.dot([0.00650, 0.02577, 0.96773], [0, 1, 2]), 4) == 1.9612, dosage_row.dosages[3] # 0 assert round(dosage_row.dosages[5], 4) == round( np.dot([0.95803, 0.03895, 0.00302], [0, 1, 2]), 4) == 0.0450, dosage_row.dosages[5]
def test_postload_codings_check_constrains_exist(self): # prepare directory = get_repository_path('postloader/codings03_tree') # run pl = Postloader(POSTGRESQL_ENGINE) pl.load_codings(directory) # Validate ## Check samples table exists table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('codings'), create_engine(POSTGRESQL_ENGINE)) assert table.iloc[0, 0] # primary key constraint_sql = self._get_table_contrains('codings', relationship_query='pk_%%') constraints_results = pd.read_sql(constraint_sql, create_engine(POSTGRESQL_ENGINE)) assert constraints_results is not None assert not constraints_results.empty columns = constraints_results['column_name'].tolist() assert len(columns) == 3 assert 'data_coding' in columns assert 'coding' in columns assert 'meaning' in columns # index on 'event' column constraint_sql = self._get_table_contrains('codings', relationship_query='ix_%%') constraints_results = pd.read_sql(constraint_sql, create_engine(POSTGRESQL_ENGINE)) assert constraints_results is not None assert not constraints_results.empty columns = constraints_results['column_name'].tolist() assert len(columns) == 5 assert 'data_coding' in columns assert 'coding' in columns assert 'node_id' in columns assert 'parent_id' in columns assert 'selectable' in columns
def test_get_iterator(self): # Prepare bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen')) # Run all_items = list(bgen_dosage.items(n_rows_cached=10)) assert len(all_items) == 150 # snp 1 assert all_items[0].chr == 2 assert all_items[0].position == 100 assert all_items[0].allele0 == 'A' assert all_items[0].allele1 == 'G' assert all_items[0].rsid == 'rs2000000' assert all_items[0].dosages.shape == (300, ) assert truncate(all_items[0].dosages[0]) == truncate( np.dot([0.94401, 0.02976, 0.02623], [0, 1, 2])) == 0.0822 assert truncate(all_items[0].dosages[2]) == truncate( np.dot([0.00658, 0.92760, 0.06582], [0, 1, 2])) == 1.0592 # snp middle assert all_items[99].chr == 2 assert all_items[99].position == 7516 assert all_items[99].allele0 == 'T' assert all_items[99].allele1 == 'A' assert all_items[99].rsid == 'rs2000099' assert all_items[99].dosages.shape == (300, ) assert truncate( all_items[99].dosages[0] ) == 1.1071 # truncate(np.dot([0.03148, 0.82993, 0.13854], [0, 1, 2])) == 1.1070, truncate(all_items[99].dosages[0]) assert truncate(all_items[99].dosages[5]) == truncate( np.dot([0.04327, 0.89103, 0.06570], [0, 1, 2])) == 1.0224 # snp last assert all_items[149].chr == 2 assert all_items[149].position == 11226 assert all_items[149].allele0 == 'G' assert all_items[149].allele1 == 'T' assert all_items[149].rsid == 'rs2000149' assert all_items[149].dosages.shape == (300, ) assert truncate( all_items[149].dosages[1] ) == 1.8772 # truncate(np.dot([0.01371, 0.09532, 0.89091], [0, 1, 2])) == 1.8771, truncate(all_items[149].dosages[1]) assert truncate( all_items[149].dosages[2] ) == 1.7562 # truncate(np.dot([0.07391, 0.09597, 0.83011], [0, 1, 2])) == 1.7561, truncate(all_items[149].dosages[2])
def test_genotype_rsids_bgenix_not_in_path(self): # Prepare self.setUp(bgenix_path='/path/not/found/bgenix') rsids_file = get_repository_path('example01/rsids01.txt') # Run response = self.app.post( '/ukbrest/api/v1.0/genotype/2/rsids', data={'file': (open(rsids_file, 'rb'), rsids_file)}) # Validate assert response.status_code == 400, response.status_code data = json.load(io.StringIO(response.data.decode('utf-8'))) assert 'message' in data, data assert 'bgenix' in data['message'], data['message'] assert '/path/not/found/bgenix' in data['message'], data['message']
def test_genotype_positions_using_file_wrong_format(self): # Prepare positions_file = get_repository_path('example01/positions01_bug.txt') # Run response = self.app.post( '/ukbrest/api/v1.0/genotype/2/positions', data={'file': (open(positions_file, 'rb'), positions_file)}) # Validate assert response.status_code == 400, response.status_code data = json.load(io.StringIO(response.data.decode('utf-8'))) assert 'message' in data, data assert 'bgenix' in data['message'], data['message'] assert 'output' in data, data assert 'Welcome to bgenix' in data['output'], data['output'] assert 'spec="02:8949/8949"' in data['output'], data['output']
def test_get_last_row_other_chromosome(self): # Prepare bgen_dosage = BGENDosage(get_repository_path('set00/chr2impv1.bgen')) # Run dosage_row = bgen_dosage.get_row(-1) assert dosage_row is not None assert hasattr(dosage_row, 'chr') assert dosage_row.chr == 2 assert hasattr(dosage_row, 'rsid') assert dosage_row.rsid == 'rs2000149' assert hasattr(dosage_row, 'position') assert dosage_row.position == 11226 assert hasattr(dosage_row, 'allele0') assert dosage_row.allele0 == 'G' assert hasattr(dosage_row, 'allele1') assert dosage_row.allele1 == 'T' # assert hasattr(dosage_row, 'maf') # assert dosage_row.maf == 0.4722 assert hasattr(dosage_row, 'dosages') assert dosage_row.dosages is not None assert hasattr(dosage_row.dosages, 'shape') assert len(dosage_row.dosages) == 300 # 1 assert round(dosage_row.dosages[0], 4) == round( np.dot([0.94620, 0.05350, 0.00030], [0, 1, 2]), 4) == 0.0541, dosage_row.dosages[0] assert round(dosage_row.dosages[1], 4) == round( np.dot([0.01373, 0.09532, 0.89094], [0, 1, 2]), 4) == 1.8772, dosage_row.dosages[1] assert round(dosage_row.dosages[299], 4) == round( np.dot([0.04675, 0.93974, 0.01351], [0, 1, 2]), 4) == 0.9668, dosage_row.dosages[299]
def test_postload_codings_negative_coding(self): # prepare directory = get_repository_path('postloader/codings02_negative') # run pl = Postloader(POSTGRESQL_ENGINE) pl.load_codings(directory) # validate ## Check samples table exists table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('codings'), create_engine(POSTGRESQL_ENGINE)) assert table.iloc[0, 0] codings = pd.read_sql( "select * from codings order by data_coding, coding", create_engine(POSTGRESQL_ENGINE)) assert codings is not None expected_columns = ['data_coding', 'coding', 'meaning'] assert len(codings.columns) >= len(expected_columns) assert all(x in codings.columns for x in expected_columns) assert not codings.empty assert codings.shape[0] == 2 cidx = 0 assert codings.loc[cidx, 'data_coding'] == 13 assert codings.loc[cidx, 'coding'] == '-1' assert codings.loc[cidx, 'meaning'] == 'Date uncertain or unknown' cidx += 1 assert codings.loc[cidx, 'data_coding'] == 13 assert codings.loc[cidx, 'coding'] == '-3' assert codings.loc[cidx, 'meaning'] == 'Preferred not to answer'
def test_get_second_row(self): # Prepare bgen_dosage = BGENDosage(get_repository_path('set00/chr1impv1.bgen')) # Run dosage_row = bgen_dosage.get_row(1) assert dosage_row is not None assert hasattr(dosage_row, 'chr') assert dosage_row.chr == 1 assert hasattr(dosage_row, 'rsid') assert dosage_row.rsid == 'rs2' assert hasattr(dosage_row, 'position') assert dosage_row.position == 181 assert hasattr(dosage_row, 'allele0') assert dosage_row.allele0 == 'G' assert hasattr(dosage_row, 'allele1') assert dosage_row.allele1 == 'C' # assert hasattr(dosage_row, 'maf') # assert dosage_row.maf == 0.4894 assert hasattr(dosage_row, 'dosages') assert dosage_row.dosages is not None assert hasattr(dosage_row.dosages, 'shape') assert len(dosage_row.dosages) == 300 assert round(dosage_row.dosages[0], 4) == round( np.dot([0.75232, 0.11725, 0.13043], [0, 1, 2]), 4) == 0.3781, dosage_row.dosages[0] assert round( dosage_row.dosages[299], 4 ) == 1.8471 # round(np.dot([0.00937, 0.13421, 0.85642], [0, 1, 2]), 4) == 1.8471, dosage_row.dosages[299]
def test_postload_codings_vacuum(self): # prepare directory = get_repository_path('postloader/codings03_tree') # run pl = Postloader(POSTGRESQL_ENGINE) pl.load_codings(directory) # Validate db_engine = create_engine(POSTGRESQL_ENGINE) ## Check samples table exists table = pd.read_sql( """ SELECT EXISTS ( SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = '{}' )""".format('codings'), db_engine) assert table.iloc[0, 0] vacuum_data = pd.DataFrame() query_count = 0 # FIXME waits for vacuum to finish while vacuum_data.empty and query_count < 150: vacuum_data = pd.read_sql( """ select relname, last_vacuum, last_analyze from pg_stat_user_tables where schemaname = 'public' and last_vacuum is not null and last_analyze is not null """, db_engine) query_count += 1 assert vacuum_data is not None assert not vacuum_data.empty tables = vacuum_data['relname'].tolist() assert 'codings' in tables