Example #1
0
  def ingest_data(self, cache_dir, tf_dir, desired_frame_rate):
    """Reads the Jens data from mat files and ingest it to TFrecords.

    Args:
      cache_dir: Local copy of the original archive file from the Internet
      tf_dir: Folder where tfrecord files are written out
      desired_frame_rate: Desired frame rate after ingestion.
    """
    mat_files_list = sorted(tf.io.gfile.glob(os.path.join(cache_dir,
                                                          '*.mat')))
    eeg_dir = '.'
    sound_dir = '.'
    make_if_not_exists(tf_dir)

    print('Ingesting %d files of Jens data.' % len(mat_files_list),
          file=regression_data_print)
    all_ingested_files = []
    for sid, mat_file in enumerate(mat_files_list):
      print('Ingesting %s' % mat_file,
            file=regression_data_print)
      tf_dir_subject = os.path.join(tf_dir,
                                    'subject_{:02d}'.format(sid + 1))
      mat_data = loadmat(mat_file)
      mat_object = mat_data['data']
      # Both framte rates should be 128Hz according to:
      #   https://zenodo.org/record/1158410/#.XvqtpZNKjVs
      wav_fs = mat_object['fsample']
      eeg_fs = mat_object['fsample']
      trial_dict = {}
      for trial_idx, trial in enumerate(mat_object['trial']):
        eeg_signal = trial[:69, :].T
        audio_signal = trial[69:70, :].T
        p_eeg = preprocess.Preprocessor('eeg', eeg_fs, desired_frame_rate)
        ds_eeg_signal = p_eeg.resample(eeg_signal)
        p_audio = preprocess.Preprocessor('audio', wav_fs, desired_frame_rate)
        ds_audio_signal = p_audio.resample(audio_signal)
        eeg_dict = {'eeg_data': ds_eeg_signal}
        audio_files_dict = {'intensity': ds_audio_signal}
        trial_key = 'trial_{:02d}'.format(trial_idx + 1)
        trial_dict[trial_key] = [
            audio_files_dict,
            ingest.MemoryBrainDataFile(eeg_dict, sr=desired_frame_rate)
        ]
        logging.info('Audio and EEG data shapes: %s, %s',
                     ''.join(str(audio_signal.shape)),
                     ''.join(str(eeg_signal.shape)))
      exp = ingest.BrainExperiment(
          trial_dict, sound_dir, eeg_dir, frame_rate=desired_frame_rate)
      exp.load_all_data(sound_dir, eeg_dir)
      exp.z_score_all_data()
      for trial in exp.iterate_trials():
        trial.assemble_brain_data('eeg_data')
        for k in trial.model_features:
          logging.info('Trial # %s, audio shapes %s', str(k),
                       ''.join(str(trial.model_features[k].shape)))
      make_if_not_exists(tf_dir_subject)
      all_ingested_files.extend(exp.write_all_data(tf_dir_subject))

    write_summary(cache_dir, tf_dir, desired_frame_rate, all_ingested_files)
Example #2
0
  def test_high_pass_by_frame(self):
    """Make sure we get the same result all at once, and sample by sample."""
    fs_in = 100.0
    fs_out = 100.0
    highpass_cutoff = 20
    highpass_order = 2
    p = preprocess.Preprocessor('test', fs_in, fs_out,
                                highpass_cutoff=highpass_cutoff,
                                highpass_order=highpass_order)

    num_frames = 1000
    input_data = np.random.rand(num_frames, 1)
    input_data[num_frames//2:] += 1.0   # So there is a discontinuity

    full_result = p.highpass_filter(input_data, reset=True)

    stepwise_result = full_result * 0.0
    p.highpass_filter_reset(input_data)
    for i in range(num_frames):
      stepwise_result[i] = p.highpass_filter(input_data[i:i+1, :])

    with tf.io.gfile.GFile('/tmp/test_highpass_by_frame.png', mode='w') as fp:
      plt.clf()
      plt.plot(full_result[0:40])
      plt.plot(stepwise_result[0:40])
      plt.title('Stepwise Highpass Filter Test')
      plt.savefig(fp)

    np.testing.assert_allclose(full_result, stepwise_result, rtol=1e-07)
Example #3
0
  def test_processing(self):
    """Test preprocessing pipeline."""
    fs_in = 100.0
    fs_out = 100.0
    num_frames = 1000
    highpass_cutoff = 10
    channel_numbers = '1,3,42,23,30-33'
    p = preprocess.Preprocessor('test', fs_in, fs_out,
                                channel_numbers=channel_numbers,
                                highpass_cutoff=highpass_cutoff)

    good_channel = 42
    input_data = np.random.rand(num_frames, 64)
    input_data[:, good_channel] = 1
    output_data = p.process(input_data)
    with tf.io.gfile.GFile('/tmp/test_processing.png', mode='w') as fp:
      plt.clf()
      plt.plot(output_data)
      plt.title('Full Processing Test')
      plt.savefig(fp)

    # This checks the output of channel 42, which is the last channel (-1) of
    # the channels selected above.
    np.testing.assert_array_less(np.abs(output_data[100:, -1]), 0.01)
    self.assertEqual(output_data.shape[1], 8)
Example #4
0
 def test_channel_selector_parsing(self):
   fs_in = 100.0
   fs_out = 100.0
   channel_numbers = '1,3,42,23,30-33'
   p = preprocess.Preprocessor('test', fs_in, fs_out,
                               channel_numbers=channel_numbers)
   self.assertEqual(p._channel_numbers, [1, 3, 23, 30, 31, 32, 33, 42])
 def preprocess_list(self, name_params_list, frame_rate):
     if not name_params_list:
         return []
     pp_list = []
     for name_param in name_params_list:
         pp_list.append(
             preprocess.Preprocessor(name_param, frame_rate, frame_rate))
     return pp_list
Example #6
0
 def test_downsample_bad(self, fs_in, fs_out, batch_size, data_size):
   data = np.reshape(np.arange(data_size), [data_size, 1])
   p = preprocess.Preprocessor('test', fs_in, fs_out)
   results = np.empty((0, 1))
   with self.assertRaisesRegex(
       ValueError, 'New sample rate incompatable with batch size.'):
     for i in range(0, data_size, batch_size):
       r = p.resample(data[i:(i+batch_size), :])
       results = np.concatenate((results, r), axis=0)
Example #7
0
 def test_downsample_good(self, fs_in, fs_out, batch_size, data_size):
   data = np.reshape(np.arange(data_size), [data_size, 1])
   ds_rate = fs_in/fs_out
   p = preprocess.Preprocessor('test', fs_in, fs_out)
   results = np.empty((0, 1))
   for i in range(0, data_size, batch_size):
     r = p.resample(data[i:(i+batch_size), :])
     results = np.concatenate((results, r), axis=0)
   idx = range(0, data_size, int(round(ds_rate)))
   np.testing.assert_allclose(results, data[idx, :])
Example #8
0
  def test_channel_selection(self):
    """Test the channel selecting parsing code."""
    fs_in = 100.0
    fs_out = 100.0
    num_frames = 1000
    channel_numbers = '1,3,42,23,30-33'
    p = preprocess.Preprocessor('test', fs_in, fs_out,
                                channel_numbers=channel_numbers)

    data = np.ones((num_frames, 64), dtype=np.int32)
    data = np.cumsum(data, axis=1) - 1

    new_data = p.select_channels(data)
    self.assertEqual(list(new_data[0, :]), [1, 3, 23, 30, 31, 32, 33, 42])
    self.assertEqual(list(new_data[-1, :]), [1, 3, 23, 30, 31, 32, 33, 42])
Example #9
0
  def test_processing_add_context(self):
    """Test case for adding context as we would in live data.

    This assumes we're passing off each second of data to the preprocessing
    mechanism as it arrives. It'll add precontext from the previous frames.
    Post context would not be possible for the last post_context frames.
    """
    fs_in = 100.0
    fs_out = 100.0
    num_secs = 10
    pre_context = 10
    post_context = 5
    num_features = 64
    highpass_cutoff = 0
    total_context = pre_context + 1 + post_context
    all_data = np.random.rand(num_secs * int(fs_in), num_features)
    # Just do the context addition in preprocessing step
    p = preprocess.Preprocessor('test', fs_in, fs_out,
                                highpass_cutoff=highpass_cutoff,
                                pre_context=pre_context,
                                post_context=post_context)
    c_out = np.empty((0, num_features*total_context))
    # Passing in multiple timesteps (batches) of data to ensure that edge
    # effects are handled correctly.
    for i in range(num_secs):
      input_data = all_data[i * int(fs_in):(i + 1) * int(fs_in), :]
      context_filled_data = p.add_context(input_data)
      self.assertEqual(context_filled_data.shape[1],
                       num_features * total_context)
      print(input_data.shape)
      print(context_filled_data.shape)
      c_out = np.concatenate([c_out, context_filled_data], axis=0)
    np.testing.assert_array_equal(c_out[pre_context, :],
                                  all_data[:total_context, :].flatten())
    # Test that the pre context and post context using preprocess.py matches
    # with what we get from TestBrainData which uses the tf.signal.frame to
    # automate the addition of pre and post context
    test_brain_data = TestBrainData('input', 'output', fs_in,
                                    repeat_count=1,
                                    pre_context=pre_context,
                                    post_context=post_context)
    test_brain_data.preserve_test_data(all_data, all_data[:, :1], None)
    test_dataset = test_brain_data.create_dataset(mode='program_test')
    for i, _ in test_dataset.take(1):
      input_data_td = i
    np.testing.assert_array_equal(c_out,
                                  input_data_td['input_1'][:-post_context, :])
Example #10
0
 def test_resample(self):
   """Test the resampling code."""
   # Generate a 10Hz sinusoidal signal, sampled at 1kHz.
   fs_in = 1000.0
   fs_out = 100.0
   sig_len = 2  # seconds
   t_in = np.reshape(np.linspace(0, sig_len, int(sig_len*fs_in), False),
                     (-1, 1))
   t_out = np.reshape(np.linspace(0, sig_len, int(sig_len*fs_out), False),
                      (-1, 1))
   sig_in = np.sin(2*np.pi*10*t_in)
   sig_out = np.sin(2*np.pi*10*t_out)
   # Make it 2 channels for better test.
   sig_in = np.hstack((sig_in, -sig_in))
   sig_out = np.hstack((sig_out, -sig_out))
   # Downsample signal to 100Hz.
   p = preprocess.Preprocessor('test', fs_in, fs_out)
   sig_resamp = p.resample(sig_in)
   # Test for equal signal length.
   self.assertEqual(sig_resamp.shape[0], sig_out.shape[0])
Example #11
0
 def test_reref(self):
   """Test the re-referencing code."""
   # Generate 14-channel synthetic EEG data.
   fs_in = 100.0
   fs_out = 100.0
   num_frames = 500
   num_channels = 14
   input_data = np.random.randn(num_frames, num_channels)
   # Create preprocessor object.
   ref_channels = [[11], [4]]
   channels_to_ref = [range(7), range(7, 14)]
   p = preprocess.Preprocessor('test', fs_in, fs_out,
                               ref_channels=ref_channels,
                               channels_to_ref=channels_to_ref)
   # Re-reference the data.
   output_data = p.reref_data(np.copy(input_data))
   # Test that re-referenced data are close to expected values.
   np.testing.assert_allclose(output_data[:, :7],
                              input_data[:, :7]-input_data[:, [11]])
   np.testing.assert_allclose(output_data[:, 7:],
                              input_data[:, 7:]-input_data[:, [4]])
Example #12
0
  def test_parsing(self):
    """Test the preprocessing parameter parsing by creating a preprocessor.

    Make sure that the string representation contains the right parameters.
    """
    fs_in = 100.0
    fs_out = 100.0
    feature_name = 'eeg'
    param_dict = {'channel_numbers': '2',
                  'highpass_order': 6,
                  'highpass_cutoff': 42,
                 }
    param_list = ['{}={}'.format(k, param_dict[k]) for k in param_dict]
    name_string = '{}({})'.format(feature_name, ';'.join(param_list))
    print('test_parsing Preprocessor(%s, %g)' % (name_string, fs_in))
    p = preprocess.Preprocessor(name_string, fs_in, fs_out)
    print('test_parsing:', p)
    self.assertIn(feature_name, str(p))
    for k, v in param_dict.items():
      if k == 'channel_numbers':
        v = '%s' % param_dict['channel_numbers']
      self.assertIn('{}={}'.format(k, v), str(p))
Example #13
0
  def test_highpass_freq_resp(self):
    """Test the frequency response of the filter."""
    fs_in = 100.0
    fs_out = 100.0
    highpass_cutoff = 2
    highpass_order = 2
    p = preprocess.Preprocessor('test', fs_in, fs_out,
                                highpass_cutoff=highpass_cutoff,
                                highpass_order=highpass_order)

    num_frames = 512
    impulse = np.zeros((num_frames, 1))
    # Don't put impulse at zero because of filter initialization
    impulse[1, 0] = 1
    response = p.highpass_filter(impulse, reset=True)
    freq_resp = 20*np.log10(np.abs(np.fft.fft(response, axis=0,
                                              n=2*num_frames)))
    freqs = np.fft.fftfreq(2*num_frames)*fs_in

    with tf.io.gfile.GFile('/tmp/test_highpass_freq_resp.png', mode='w') as fp:
      plt.clf()
      plt.semilogx(freqs[0:num_frames//2], freq_resp[0:num_frames//2])
      plt.ylim([-20, 0])
      plt.plot(highpass_cutoff, -3.02, 'x')
      plt.xlabel('Frequency (Hz)')
      plt.ylabel('Response (dB)')
      plt.grid(True, which='both')
      plt.title('%gHz Highpass Filter Test' % highpass_cutoff)
      plt.savefig(fp)

    three_db_point = np.abs(freqs - highpass_cutoff).argmin()
    # Check gain at cutoff frequency
    self.assertAlmostEqual(freq_resp[three_db_point], -3.02, delta=.3)
    # Make sure gain is less than -3dB below the cutoff frequency
    np.testing.assert_array_less(freq_resp[0:three_db_point], -3.02)
    # Make sure gain is better than -3dB above the cutoff frequency
    np.testing.assert_array_less(-3.02, freq_resp[three_db_point+1:num_frames])