Esempio n. 1
0
def get_binned_sentence_features(data, number_of_bins):
    sentence_features = []
    for sentence in data:
        sentence_features.append([
            get_word_features(word) for word in sentence.word
            if is_real_word(word)
        ])

    bins = []

    for n_feature in np.arange(N_ET_FEATURES):
        all_values_this_feature = []
        for sentence in sentence_features:
            all_values_this_feature.extend(
                [word[n_feature] for word in sentence])
        bin = np.percentile(all_values_this_feature,
                            q=np.linspace(100 / number_of_bins, 100,
                                          number_of_bins))
        bin[number_of_bins - 1] += 1
        bins.append(bin)

    binned_sentence_features = []
    for sentence in sentence_features:
        sentence_2d_array = np.array(sentence)
        for n_feature in np.arange(N_ET_FEATURES):
            sentence_2d_array[:, n_feature] = np.digitize(
                sentence_2d_array[:, n_feature], bins[n_feature])
        binned_sentence_features.append(sentence_2d_array.astype(int))

    return binned_sentence_features
Esempio n. 2
0
def get_sentence_features(sentence):
    sentence_features = [
        get_word_features(word) for word in sentence.word if is_real_word(word)
    ]
    return np.array([
        np.zeros(N_ELECTRODES) if sf is None else sf
        for sf in sentence_features
    ])
Esempio n. 3
0
def get_normalized_sentence_features(sentence, normalization_values):
    # Get and normalize word features for each sentence
    mu, sigma = normalization_values['mu'], normalization_values['sigma']
    sentence_features = [
        get_word_features(word) for word in sentence.word if is_real_word(word)
    ]
    sentence_features = np.array(sentence_features)
    # Center and rescale
    centered_sentence_features = sentence_features - normalization_values['mu']

    return centered_sentence_features / sigma
Esempio n. 4
0
def get_power_spectrum_sentence_features(sentence, eeg_config):
    sentence_features = [
        get_power_spectrum_word_features(word, eeg_config)
        for word in sentence.word if is_real_word(word)
    ]
    sentence_features_with_value = [
        sf for sf in sentence_features if sf.size > 0
    ]
    if len(sentence_features_with_value) > 0:
        default_value = np.mean([sentence_features_with_value], axis=1)
    else:
        default_value = np.zeros(N_ELECTRODES * N_FREQUENCY_BANDS)
    return np.vstack(
        [default_value if sf.size == 0 else sf for sf in sentence_features])
Esempio n. 5
0
def get_normalized_sentence_features(sentence, normalization_values):
    #Get and normalize word features for each sentence
    mu, sigma = normalization_values['mu'], normalization_values['sigma']
    sentence_features = [
        get_word_features(word) for word in sentence.word if is_real_word(word)
    ]
    #Replace non-fixated by averages
    sentence_features = np.array(
        [mu if sf is None else sf for sf in sentence_features])
    #Center, remove outliers and rescale
    centered_sentence_features = sentence_features - normalization_values['mu']
    # TODO: assigning mu here appears to be a mistake, perhaps it shall be modified to 0? Will do, consider re-evaluating in future.
    #filtered_sentence_features = np.where(np.abs(centered_sentence_features) < MAX_OUTLIER_TOLERANCE * sigma, centered_sentence_features, mu)
    filtered_sentence_features = np.where(
        np.abs(centered_sentence_features) < MAX_OUTLIER_TOLERANCE * sigma,
        centered_sentence_features, 0)
    return filtered_sentence_features / (MAX_OUTLIER_TOLERANCE * sigma)
Esempio n. 6
0
def get_normalization_values(data):
    # Get average and std for each electrode
    sentence_features = []
    for sentence in data:
        sentence_features.append([
            get_word_features(word) for word in sentence.word
            if is_real_word(word)
        ])

    mu, sigma = [], []

    for n_feature in np.arange(N_ET_FEATURES):
        all_values_this_feature = []
        for sentence in sentence_features:
            all_values_this_feature.extend(
                [word[n_feature] for word in sentence])
        mu.append(np.average(all_values_this_feature))
        sigma.append(np.max([np.std(all_values_this_feature), 0.0001]))

    return np.array(mu), np.array(sigma)
Esempio n. 7
0
def get_normalization_values(data):
    #Get average and std for each electrode
    sentence_features = []
    count = 0
    for sentence in data:
        #print("Sentence position: " + str(count))
        sentence_features.append([
            get_word_features(word) for word in sentence.word
            if is_real_word(word)
        ])
        #sentence_features.append([get_word_features(word) for word in sentence.word if is_real_word(word) and hasattr(word, 'rawEEG') and word.rawEEG.size != 0])
        count += 1

    mu, sigma = [], []

    for n_electrode in np.arange(N_ELECTRODES):
        all_voltages_electrode = []
        for sentence in sentence_features:
            all_voltages_electrode.extend(
                [word[n_electrode] for word in sentence if word is not None])
        mu.append(np.average(all_voltages_electrode))
        sigma.append(np.max([np.std(all_voltages_electrode), 0.0001]))

    return np.array(mu), np.array(sigma)
Esempio n. 8
0
def count_words(word_array):
    return sum(1 for i in range(len(word_array))
               if is_real_word(word_array[i]))