def get_binned_sentence_features(data, number_of_bins): sentence_features = [] for sentence in data: sentence_features.append([ get_word_features(word) for word in sentence.word if is_real_word(word) ]) bins = [] for n_feature in np.arange(N_ET_FEATURES): all_values_this_feature = [] for sentence in sentence_features: all_values_this_feature.extend( [word[n_feature] for word in sentence]) bin = np.percentile(all_values_this_feature, q=np.linspace(100 / number_of_bins, 100, number_of_bins)) bin[number_of_bins - 1] += 1 bins.append(bin) binned_sentence_features = [] for sentence in sentence_features: sentence_2d_array = np.array(sentence) for n_feature in np.arange(N_ET_FEATURES): sentence_2d_array[:, n_feature] = np.digitize( sentence_2d_array[:, n_feature], bins[n_feature]) binned_sentence_features.append(sentence_2d_array.astype(int)) return binned_sentence_features
def get_sentence_features(sentence): sentence_features = [ get_word_features(word) for word in sentence.word if is_real_word(word) ] return np.array([ np.zeros(N_ELECTRODES) if sf is None else sf for sf in sentence_features ])
def get_normalized_sentence_features(sentence, normalization_values): # Get and normalize word features for each sentence mu, sigma = normalization_values['mu'], normalization_values['sigma'] sentence_features = [ get_word_features(word) for word in sentence.word if is_real_word(word) ] sentence_features = np.array(sentence_features) # Center and rescale centered_sentence_features = sentence_features - normalization_values['mu'] return centered_sentence_features / sigma
def get_power_spectrum_sentence_features(sentence, eeg_config): sentence_features = [ get_power_spectrum_word_features(word, eeg_config) for word in sentence.word if is_real_word(word) ] sentence_features_with_value = [ sf for sf in sentence_features if sf.size > 0 ] if len(sentence_features_with_value) > 0: default_value = np.mean([sentence_features_with_value], axis=1) else: default_value = np.zeros(N_ELECTRODES * N_FREQUENCY_BANDS) return np.vstack( [default_value if sf.size == 0 else sf for sf in sentence_features])
def get_normalized_sentence_features(sentence, normalization_values): #Get and normalize word features for each sentence mu, sigma = normalization_values['mu'], normalization_values['sigma'] sentence_features = [ get_word_features(word) for word in sentence.word if is_real_word(word) ] #Replace non-fixated by averages sentence_features = np.array( [mu if sf is None else sf for sf in sentence_features]) #Center, remove outliers and rescale centered_sentence_features = sentence_features - normalization_values['mu'] # TODO: assigning mu here appears to be a mistake, perhaps it shall be modified to 0? Will do, consider re-evaluating in future. #filtered_sentence_features = np.where(np.abs(centered_sentence_features) < MAX_OUTLIER_TOLERANCE * sigma, centered_sentence_features, mu) filtered_sentence_features = np.where( np.abs(centered_sentence_features) < MAX_OUTLIER_TOLERANCE * sigma, centered_sentence_features, 0) return filtered_sentence_features / (MAX_OUTLIER_TOLERANCE * sigma)
def get_normalization_values(data): # Get average and std for each electrode sentence_features = [] for sentence in data: sentence_features.append([ get_word_features(word) for word in sentence.word if is_real_word(word) ]) mu, sigma = [], [] for n_feature in np.arange(N_ET_FEATURES): all_values_this_feature = [] for sentence in sentence_features: all_values_this_feature.extend( [word[n_feature] for word in sentence]) mu.append(np.average(all_values_this_feature)) sigma.append(np.max([np.std(all_values_this_feature), 0.0001])) return np.array(mu), np.array(sigma)
def get_normalization_values(data): #Get average and std for each electrode sentence_features = [] count = 0 for sentence in data: #print("Sentence position: " + str(count)) sentence_features.append([ get_word_features(word) for word in sentence.word if is_real_word(word) ]) #sentence_features.append([get_word_features(word) for word in sentence.word if is_real_word(word) and hasattr(word, 'rawEEG') and word.rawEEG.size != 0]) count += 1 mu, sigma = [], [] for n_electrode in np.arange(N_ELECTRODES): all_voltages_electrode = [] for sentence in sentence_features: all_voltages_electrode.extend( [word[n_electrode] for word in sentence if word is not None]) mu.append(np.average(all_voltages_electrode)) sigma.append(np.max([np.std(all_voltages_electrode), 0.0001])) return np.array(mu), np.array(sigma)
def count_words(word_array): return sum(1 for i in range(len(word_array)) if is_real_word(word_array[i]))