def test_roundtrip_plink_or_bolt_file(self): num_entries = 1000 ids = np.arange(num_entries) continuous_int = np.random.choice(np.arange(40, 70), size=num_entries) continuous_float = np.random.choice(np.arange(30.5, 50.5).astype(float), size=num_entries) binary_zero_one = np.random.choice([0, 1], size=num_entries) binary_one_two = np.random.choice([1, 2], size=num_entries) continuous_two_values = np.random.choice([1, 3], size=num_entries) nulled_continuous = np.random.choice(np.arange(0.5, 20.5), size=num_entries) nulled_continuous[:10] = np.NaN # Note: This input nullable binary must be of type 'Int64' otherwise it will # get converted to float values prior to the initial writing to disk. nulled_binary = pd.Series(np.random.choice([1., 2.], size=num_entries), dtype='Int64') nulled_binary[-10:] = pd.NA init_df = pd.DataFrame( { 'FID': ids, 'IID': ids, 'ci': continuous_int, 'cf': continuous_float, 'bzo': binary_zero_one, 'bot': binary_one_two, 'ctv': continuous_two_values, 'nc': nulled_continuous, 'nb': nulled_binary }, columns=[ 'FID', 'IID', 'ci', 'cf', 'bzo', 'bot', 'ctv', 'nc', 'nb' ]) initial_filename = os.path.join(absltest.get_default_test_tmpdir(), 'init.tsv') final_filename = os.path.join(absltest.get_default_test_tmpdir(), 'final.tsv') init_df.to_csv(initial_filename, sep='\t', na_rep='NA', index=False) deepnull_df, mapping = data.load_plink_or_bolt_file( initial_filename, 'NA') data.write_plink_or_bolt_file(deepnull_df, final_filename, mapping, 'NA') with open(initial_filename, 'rt') as f: initial_contents = f.read() with open(final_filename, 'rt') as g: final_contents = g.read() self.assertEqual(initial_contents, final_contents)
def test_data_prep_beam_params(self, tfds, input_format): if tfds: flags.FLAGS.tfds_dataset = 'savee' else: flags.FLAGS.train_input_glob = os.path.join( absltest.get_default_test_srcdir(), TESTDIR, 'test.tfrecord*') flags.FLAGS.validation_input_glob = os.path.join( absltest.get_default_test_srcdir(), TESTDIR, 'test.tfrecord*') flags.FLAGS.test_input_glob = os.path.join( absltest.get_default_test_srcdir(), TESTDIR, 'test.tfrecord*') flags.FLAGS.skip_existing_error = False flags.FLAGS.output_filename = os.path.join( absltest.get_default_test_tmpdir(), f'data_prep_test_{tfds}') flags.FLAGS.embedding_modules = ['mod1', 'mod2'] flags.FLAGS.embedding_names = ['emb1', 'emb2'] flags.FLAGS.module_output_keys = ['k1', 'k2'] prep_params, input_filenames_list, output_filenames, run_data_prep = data_prep_and_eval_beam_main._get_data_prep_params_from_flags( ) self.assertTrue(run_data_prep) self.assertLen(input_filenames_list, 3) self.assertLen(output_filenames, 3) self.assertTrue(output_filenames[0].endswith( f'{flags.FLAGS.output_filename}.train'), output_filenames[0]) self.assertTrue(output_filenames[1].endswith( f'{flags.FLAGS.output_filename}.validation'), output_filenames[1]) self.assertTrue(output_filenames[2].endswith( f'{flags.FLAGS.output_filename}.test'), output_filenames[2]) self.assertIsInstance(prep_params, dict)
def test_atomic_write_series_with_scalar_data(self, name): series_data = dict(a=1, b=4.0) output_file = os.path.join(absltest.get_default_test_tmpdir(), name) utils_impl.atomic_write_series_to_csv(series_data, output_file) dataframe = pd.read_csv(output_file, index_col=0) pd.testing.assert_frame_equal( pd.DataFrame(pd.Series(series_data), columns=['0']), dataframe)
def test_atomic_read(self, name): dataframe = pd.DataFrame(dict(a=[1, 2], b=[4.0, 5.0])) csv_file = os.path.join(absltest.get_default_test_tmpdir(), name) utils_impl.atomic_write_to_csv(dataframe, csv_file) dataframe2 = utils_impl.atomic_read_from_csv(csv_file) pd.testing.assert_frame_equal(dataframe, dataframe2)
def test_atomic_write_raises_on_dict_input(self): output_file = os.path.join(absltest.get_default_test_tmpdir(), 'foo.csv') with self.assertRaisesRegex( ValueError, 'dataframe must be an instance of `pandas.DataFrame`'): utils_impl.atomic_write_to_csv(dict(a=1), output_file)
def setUp(self): super().setUp() FLAGS.model_path = os.path.join(absltest.get_default_test_tmpdir(), "saved_models") FLAGS.num_epochs = 1 FLAGS.test_savedmodel = True FLAGS.mock_data = True
def test_generate_tfrecords(self): examples_out = os.path.join(absltest.get_default_test_tmpdir(), 'examples_output') train_test_val_split = [0.7, 0.2, 0.1] ngs_read_length = ngs_errors.generate_tfrecord_datasets( train_test_val_split, ref_path=test_utils.genomics_core_testdata( 'ucsc.hg19.chr20.unittest.fasta.gz'), vcf_path=test_utils.genomics_core_testdata( 'test_nist.b37_chr20_100kbp_at_10mb.vcf.gz'), bam_path=test_utils.genomics_core_testdata( 'NA12878_S1.chr20.10_10p1mb.bam'), out_dir=examples_out, max_reads=100) actual_examples = self._read_examples(train_test_val_split, examples_out) golden_examples = self._read_examples( train_test_val_split, test_utils.genomics_core_testdata('golden.examples.ngs_errors')) self.assertEqual(len(actual_examples), len(golden_examples)) matched_examples = [] for expected in golden_examples: for actual in actual_examples: if all(actual.features.feature[key] == expected.features.feature[key] for key in expected.features.feature.keys()): matched_examples.append(expected) self.assertEqual(golden_examples, matched_examples)
def setUp(self): super(MakeCloudMasksTest, self).setUp() self.tmp_dir = os.path.join(absltest.get_default_test_tmpdir(), 'data') if not os.path.exists(self.tmp_dir): os.makedirs(self.tmp_dir) self.bucket_name = 'test_bucket' self.gcs_client = fake_gcs.FakeClient(self.tmp_dir) self.ds_client = fake_datastore.FakeClient()
def setUp(self): super(SpectraPredictorTest, self).setUp() self.np_fingerprint_input = np.ones((2, 4096)) self.np_mol_weight_input = np.reshape(np.array([18., 16.]), (2, 1)) self.test_data_directory = test_utils.test_dir("testdata/") self.temp_dir = tempfile.mkdtemp(dir=absltest.get_default_test_tmpdir()) self.test_file_short = os.path.join(self.test_data_directory, "test_2_mend.sdf")
def test_full_flow(self): flags.FLAGS.model_type = 'efficientnetv2b0' flags.FLAGS.file_patterns = 'dummy' flags.FLAGS.shuffle_buffer_size = 4 flags.FLAGS.samples_key = 'audio' flags.FLAGS.logdir = absltest.get_default_test_tmpdir() train_keras.train_and_report(debug=True, target_dim=10)
def test_tmpfile(name, contents=None): """Returns a path to a tempfile named name in the test_tmpdir. Args: name: str; the name of the file, should not contain any slashes. contents: bytes, or None. If not None, tmpfile's contents will be set to contents before returning the path. Returns: str path to a tmpfile with filename name in our test tmpfile directory. """ path = os.path.join(absltest.get_default_test_tmpdir(), name) if contents is not None: with gfile.FastGFile(path, 'wb') as fout: fout.write(contents) return path