def test_data_ingestion(self):
        cache_dir = os.path.join(self._test_data_dir, 'jens_memory')
        tmp_dir = self.create_tempdir().full_path
        tf_dir = os.path.join(tmp_dir, 'jens_memory')
        num_subjects = 1  # Only 1 of 22 subjects loaded for test.
        num_trials = 5  # That one subject has been shortened to 5/40 trials.

        # Create the data object and make sure we have the downloaded archive file.
        rd = regression_data.RegressionDataJensMemory()
        self.assertTrue(rd.is_data_local(cache_dir, num_subjects))

        # Now ingest the data, making sure it's not present at start, then present.
        self.assertFalse(rd.is_data_ingested(tmp_dir, num_subjects))
        rd.ingest_data(cache_dir, tf_dir, 128)
        self.assertTrue(rd.is_data_ingested(tf_dir, num_subjects, num_trials))

        # Check the data files.
        test_file = os.path.join(tf_dir, 'subject_01', 'trial_01.tfrecords')
        features = brain_data.discover_feature_shapes(test_file)
        self.assertIn('eeg', features)
        self.assertEqual(features['eeg'].shape, [69])
        self.assertIn('intensity', features)
        self.assertEqual(features['intensity'].shape, [1])

        self.assertEqual(brain_data.count_tfrecords(test_file), (7442, False))
Example #2
0
def write_summary(cache_dir, tf_dir, frame_rate, all_ingested_files=None):
    """Write a summary of the experiment into the directory's readme file.

  The README.txt file contains the source directory and the frame rate, as
  well as the feature names, and the length of each file.

  Args:
    cache_dir: where the original data was cached locally.
    tf_dir: Output directory for the ingested TFRecord files.
    frame_rate: The ingested data file frame rate.
    all_ingested_files: A list of files that were ingested.
  """
    readme_file = os.path.join(tf_dir, 'README.txt')
    with tf.io.gfile.GFile(readme_file, 'w') as fp:
        print('These files were ingested from:', cache_dir, file=fp)
        print('Using:', sys.argv, file=fp)
        print('With a output frame rate of %gHz' % frame_rate, file=fp)

        if all_ingested_files:
            features = brain_data.discover_feature_shapes(
                all_ingested_files[0])
            print('\nFeature shapes are:', file=fp)
            for k, v in features.items():
                print('\t%s: %s' % (k, v), file=fp)

            print('\nAll ingested files:', file=fp)
            for filename in all_ingested_files:
                count, error = brain_data.count_tfrecords(filename)
                error_string = ''
                if error:
                    error_string = 'READ ERROR'
                print(
                    '\t%s: %d records (%s seconds) %s' %
                    (filename, count, count / float(frame_rate), error_string),
                    file=fp)
    def get_feature_shapes_from_file(self):
        all_files = []
        for (path, _, files) in tf.io.gfile.walk(self._test_data_dir):
            all_files += [
                path + '/' + f for f in files if f.endswith('.tfrecords')
            ]
        self.assertNotEmpty(all_files)

        return discover_feature_shapes(all_files[0])
Example #4
0
    def test_sample_data_file(self):
        """Basic test to make sure we can create the data file and it has data."""
        num_dimensions = 4

        features = brain_data.discover_feature_shapes(self._train_filename)
        print('sample_data_file features are:', features)
        self.assertEqual(features['eeg'].shape, [num_dimensions])
        self.assertEqual(features['intensity1'].shape, [num_dimensions])
        self.assertEqual(features['intensity2'].shape, [num_dimensions])

        count, error = brain_data.count_tfrecords(self._train_filename)
        self.assertEqual(count, _NUM_TEST_POINTS)
        self.assertFalse(error)
 def test_discover_feature_shapes(self):
     """Make sure we get the right kinds of records in our test file."""
     all_files = []
     for (path, _, files) in tf.io.gfile.walk(self._test_data_dir):
         all_files += [
             path + '/' + f for f in files if f.endswith('.tfrecords')
         ]
     self.assertLen(all_files, 3)
     feature_dict = discover_feature_shapes(all_files[0])
     print('test_read_tfrecords: Record dictionary is', feature_dict)
     expected_feature_widths = {
         'phonetic_features': 19,
         'mel_spectrogram': 64,
         'meg': 148,
         'phonemes': 38,
         'envelope': 1,
     }
     for k in expected_feature_widths:
         self.assertIn(k, feature_dict)
         self.assertIsInstance(feature_dict[k], tf.io.FixedLenFeature)
         self.assertEqual(feature_dict[k].shape[0],
                          expected_feature_widths[k])
    def test_data_ingestion(self):
        cache_dir = os.path.join(self._test_data_dir, 'telluride4')
        tmp_dir = self.create_tempdir().full_path
        tf_dir = os.path.join(tmp_dir, 'telluride4_tf')

        # Create the data object and make sure we have the downloaded archive file.
        rd = regression_data.RegressionDataTelluride4()
        self.assertTrue(rd.is_data_local(cache_dir))

        # Now ingest the data, making sure it's not present at start, then present.
        self.assertFalse(rd.is_data_ingested(tmp_dir))
        rd.ingest_data(cache_dir, tf_dir, 128)
        self.assertTrue(rd.is_data_ingested(tf_dir))

        # Check the data files.
        test_file = os.path.join(tf_dir, 'trial_01.tfrecords')
        features = brain_data.discover_feature_shapes(test_file)
        print('Telluride features:', features)
        self.assertIn('eeg', features)
        self.assertEqual(features['eeg'].shape, [63])
        self.assertIn('intensity', features)
        self.assertEqual(features['intensity'].shape, [1])

        self.assertEqual(brain_data.count_tfrecords(test_file), (8297, False))