Exemple #1
0
    def test_roundtrip_plink_or_bolt_file(self):
        num_entries = 1000
        ids = np.arange(num_entries)
        continuous_int = np.random.choice(np.arange(40, 70), size=num_entries)
        continuous_float = np.random.choice(np.arange(30.5,
                                                      50.5).astype(float),
                                            size=num_entries)
        binary_zero_one = np.random.choice([0, 1], size=num_entries)
        binary_one_two = np.random.choice([1, 2], size=num_entries)
        continuous_two_values = np.random.choice([1, 3], size=num_entries)

        nulled_continuous = np.random.choice(np.arange(0.5, 20.5),
                                             size=num_entries)
        nulled_continuous[:10] = np.NaN
        # Note: This input nullable binary must be of type 'Int64' otherwise it will
        # get converted to float values prior to the initial writing to disk.
        nulled_binary = pd.Series(np.random.choice([1., 2.], size=num_entries),
                                  dtype='Int64')
        nulled_binary[-10:] = pd.NA

        init_df = pd.DataFrame(
            {
                'FID': ids,
                'IID': ids,
                'ci': continuous_int,
                'cf': continuous_float,
                'bzo': binary_zero_one,
                'bot': binary_one_two,
                'ctv': continuous_two_values,
                'nc': nulled_continuous,
                'nb': nulled_binary
            },
            columns=[
                'FID', 'IID', 'ci', 'cf', 'bzo', 'bot', 'ctv', 'nc', 'nb'
            ])

        initial_filename = os.path.join(absltest.get_default_test_tmpdir(),
                                        'init.tsv')
        final_filename = os.path.join(absltest.get_default_test_tmpdir(),
                                      'final.tsv')

        init_df.to_csv(initial_filename, sep='\t', na_rep='NA', index=False)

        deepnull_df, mapping = data.load_plink_or_bolt_file(
            initial_filename, 'NA')
        data.write_plink_or_bolt_file(deepnull_df, final_filename, mapping,
                                      'NA')

        with open(initial_filename, 'rt') as f:
            initial_contents = f.read()
        with open(final_filename, 'rt') as g:
            final_contents = g.read()
        self.assertEqual(initial_contents, final_contents)
Exemple #2
0
  def test_data_prep_beam_params(self, tfds, input_format):
    if tfds:
      flags.FLAGS.tfds_dataset = 'savee'
    else:
      flags.FLAGS.train_input_glob = os.path.join(
          absltest.get_default_test_srcdir(), TESTDIR, 'test.tfrecord*')
      flags.FLAGS.validation_input_glob = os.path.join(
          absltest.get_default_test_srcdir(), TESTDIR, 'test.tfrecord*')
      flags.FLAGS.test_input_glob = os.path.join(
          absltest.get_default_test_srcdir(), TESTDIR, 'test.tfrecord*')
    flags.FLAGS.skip_existing_error = False
    flags.FLAGS.output_filename = os.path.join(
        absltest.get_default_test_tmpdir(), f'data_prep_test_{tfds}')

    flags.FLAGS.embedding_modules = ['mod1', 'mod2']
    flags.FLAGS.embedding_names = ['emb1', 'emb2']
    flags.FLAGS.module_output_keys = ['k1', 'k2']
    prep_params, input_filenames_list, output_filenames, run_data_prep = data_prep_and_eval_beam_main._get_data_prep_params_from_flags(
    )
    self.assertTrue(run_data_prep)
    self.assertLen(input_filenames_list, 3)
    self.assertLen(output_filenames, 3)
    self.assertTrue(output_filenames[0].endswith(
        f'{flags.FLAGS.output_filename}.train'), output_filenames[0])
    self.assertTrue(output_filenames[1].endswith(
        f'{flags.FLAGS.output_filename}.validation'), output_filenames[1])
    self.assertTrue(output_filenames[2].endswith(
        f'{flags.FLAGS.output_filename}.test'), output_filenames[2])
    self.assertIsInstance(prep_params, dict)
 def test_atomic_write_series_with_scalar_data(self, name):
     series_data = dict(a=1, b=4.0)
     output_file = os.path.join(absltest.get_default_test_tmpdir(), name)
     utils_impl.atomic_write_series_to_csv(series_data, output_file)
     dataframe = pd.read_csv(output_file, index_col=0)
     pd.testing.assert_frame_equal(
         pd.DataFrame(pd.Series(series_data), columns=['0']), dataframe)
    def test_atomic_read(self, name):
        dataframe = pd.DataFrame(dict(a=[1, 2], b=[4.0, 5.0]))
        csv_file = os.path.join(absltest.get_default_test_tmpdir(), name)
        utils_impl.atomic_write_to_csv(dataframe, csv_file)

        dataframe2 = utils_impl.atomic_read_from_csv(csv_file)
        pd.testing.assert_frame_equal(dataframe, dataframe2)
 def test_atomic_write_raises_on_dict_input(self):
     output_file = os.path.join(absltest.get_default_test_tmpdir(),
                                'foo.csv')
     with self.assertRaisesRegex(
             ValueError,
             'dataframe must be an instance of `pandas.DataFrame`'):
         utils_impl.atomic_write_to_csv(dict(a=1), output_file)
Exemple #6
0
 def setUp(self):
     super().setUp()
     FLAGS.model_path = os.path.join(absltest.get_default_test_tmpdir(),
                                     "saved_models")
     FLAGS.num_epochs = 1
     FLAGS.test_savedmodel = True
     FLAGS.mock_data = True
    def test_generate_tfrecords(self):
        examples_out = os.path.join(absltest.get_default_test_tmpdir(),
                                    'examples_output')
        train_test_val_split = [0.7, 0.2, 0.1]
        ngs_read_length = ngs_errors.generate_tfrecord_datasets(
            train_test_val_split,
            ref_path=test_utils.genomics_core_testdata(
                'ucsc.hg19.chr20.unittest.fasta.gz'),
            vcf_path=test_utils.genomics_core_testdata(
                'test_nist.b37_chr20_100kbp_at_10mb.vcf.gz'),
            bam_path=test_utils.genomics_core_testdata(
                'NA12878_S1.chr20.10_10p1mb.bam'),
            out_dir=examples_out,
            max_reads=100)

        actual_examples = self._read_examples(train_test_val_split,
                                              examples_out)
        golden_examples = self._read_examples(
            train_test_val_split,
            test_utils.genomics_core_testdata('golden.examples.ngs_errors'))
        self.assertEqual(len(actual_examples), len(golden_examples))

        matched_examples = []
        for expected in golden_examples:
            for actual in actual_examples:
                if all(actual.features.feature[key] ==
                       expected.features.feature[key]
                       for key in expected.features.feature.keys()):
                    matched_examples.append(expected)
        self.assertEqual(golden_examples, matched_examples)
Exemple #8
0
 def setUp(self):
     super(MakeCloudMasksTest, self).setUp()
     self.tmp_dir = os.path.join(absltest.get_default_test_tmpdir(), 'data')
     if not os.path.exists(self.tmp_dir):
         os.makedirs(self.tmp_dir)
     self.bucket_name = 'test_bucket'
     self.gcs_client = fake_gcs.FakeClient(self.tmp_dir)
     self.ds_client = fake_datastore.FakeClient()
 def setUp(self):
   super(SpectraPredictorTest, self).setUp()
   self.np_fingerprint_input = np.ones((2, 4096))
   self.np_mol_weight_input = np.reshape(np.array([18., 16.]), (2, 1))
   self.test_data_directory = test_utils.test_dir("testdata/")
   self.temp_dir = tempfile.mkdtemp(dir=absltest.get_default_test_tmpdir())
   self.test_file_short = os.path.join(self.test_data_directory,
                                       "test_2_mend.sdf")
Exemple #10
0
    def test_full_flow(self):
        flags.FLAGS.model_type = 'efficientnetv2b0'
        flags.FLAGS.file_patterns = 'dummy'
        flags.FLAGS.shuffle_buffer_size = 4
        flags.FLAGS.samples_key = 'audio'
        flags.FLAGS.logdir = absltest.get_default_test_tmpdir()

        train_keras.train_and_report(debug=True, target_dim=10)
Exemple #11
0
def test_tmpfile(name, contents=None):
  """Returns a path to a tempfile named name in the test_tmpdir.

  Args:
    name: str; the name of the file, should not contain any slashes.
    contents: bytes, or None. If not None, tmpfile's contents will be set to
      contents before returning the path.

  Returns:
    str path to a tmpfile with filename name in our test tmpfile directory.
  """
  path = os.path.join(absltest.get_default_test_tmpdir(), name)
  if contents is not None:
    with gfile.FastGFile(path, 'wb') as fout:
      fout.write(contents)
  return path