def initData():
    initialization = InitDataSet()
    doas = initialization.get_dataset_as_doas()
    encoding = Encoding('./../../data_to_be_saved/alphabet_3.txt')
    # SAU 1 SAU 2
    #  1 th
    mark_bursts_regions_one_threshold(doas)

    # diff th
    # mark_bursts_regions(doas)

    # remove_bursted_trials_when_segment(doas)


    # doas_train, doas_test, ind_test = train_test_doa(doas, 0.2)
    # sa imi fac propriul test and train
    doas_train, doas_test, ind_test = train_test_doa_check_trials(doas, 0.2)

    train_data = ExtractData(doas_train, [channel], ['light', 'medium', 'deep'], [segment], ['all'])
    test_data = ExtractData(doas_test, [channel], ['light', 'medium', 'deep'], [segment], ['all'])

    # doar fft features
    X_train, y_train = obtain_TESPAR_A_FFT_features(train_data)
    x_test, y_test = obtain_TESPAR_A_FFT_features(test_data)

    return X_train, y_train, x_test, y_test
Esempio n. 2
0
    def get_data(self):

        #     initialization = InitDataSet()
        #     doas = initialization.get_dataset_as_doas()

        # dataset, channels, levels, segment, orientation
        # split_data = SplitData(self.doas, self.channels, self.level, self.segment, self.orientation)
        split_data = ExtractData(self.doas, self.channels, self.level,
                                 self.segment, self.orientation)
        self.X = []
        self.X_validate = []
        for i in range(len(split_data.result.arrays)):
            for j in range(len(split_data.result.arrays[i].array_data)):
                self.X.append(split_data.result.arrays[i].array_data[j])
            for j in range(len(split_data.result.arrays[i].array_validate)):
                self.X_validate.append(
                    split_data.result.arrays[i].array_validate[j])
        return self.X, self.X_validate
for run in range(run_nr):
    print('************************RUN ' + str(run) +
          '************************')
    # firstly split the input into train test
    doas_train, doas_test, ind_test = train_test_doa(doas, 0.2)
    np.savetxt(write_file, np.array(ind_test), fmt="%s", newline=' ')
    write_file.write('\n')

    for ind_segment, segment in enumerate(segments):
        for channel in range(len(all_channels)):
            print("start running for channel " + str(all_channels[channel]) +
                  ' ' + segment + '\n')

            # SplitData(self, doas, channels, levels, segment, orientation):
            train_data = ExtractData(doas_train, [all_channels[channel]],
                                     ['light', 'deep'], [segment], ['all'])
            test_data = ExtractData(doas_test, [all_channels[channel]],
                                    ['light', 'deep'], [segment], ['all'])

            X_train, y_train = obtain_features_labels(train_data, encoding)
            x_test, y_test = obtain_features_labels(test_data, encoding)

            model = DecisionTreeClassifier(random_state=99,
                                           criterion='gini',
                                           max_depth=2)
            model.fit(X_train, y_train)
            predictions = model.predict(x_test)

            report = classification_report(y_test,
                                           predictions,
                                           output_dict=True)
Esempio n. 4
0
good_channels_spont = [1, 3, 5, 12, 14]

run_nr = 5

channel = 2
segment = 'stimulus'

print('test for overfitting RF 0.2')

for run in range(run_nr):
    # firstly split the input into train test
    doas_train, doas_test = train_test_doa_remake_balanced(doas)
    #
    print()

    train_data = ExtractData(doas_train, [2], ['light', 'medium', 'deep'],
                             ['spontaneous', 'stimulus'], ['all'])
    test_data = ExtractData(doas_test, [2], ['light', 'medium', 'deep'],
                            ['spontaneous', 'stimulus'], ['all'])

    X_train, y_train = obtain_concatenate_segments_fft(train_data)
    x_test, y_test = obtain_concatenate_segments_fft(test_data)

    # classify with parameters tunning
    model = RandomForestClassifier(n_estimators=5000,
                                   max_depth=5,
                                   min_samples_split=5,
                                   min_samples_leaf=10)
    # model = RandomForestClassifier()
    model.fit(X_train, y_train)
    predictions_train = model.predict(X_train)
    predictions_test = model.predict(x_test)
Esempio n. 5
0
f1scores = [[[] for i in range(channels_range - 1)]
            for j in range(len(segments))]

for run in range(run_nr):
    # firstly split the input into train test
    doas_train, doas_test = train_test_doa_remake_balanced(doas)
    # np.savetxt(write_file, np.array(ind_test), fmt="%s", newline=' ')
    # write_file.write('\n')

    for ind_segment, segment in enumerate(segments):
        for channel in range(len(all_channels)):
            print("start running for channel " + str(channel) + '\n')

            # SplitData(self, doas, channels, levels, segment, orientation):
            train_data = ExtractData(doas_train, [all_channels[channel]],
                                     ['medium', 'deep'],
                                     ['spontaneous', 'stimulus'], ['all'])
            test_data = ExtractData(doas_test, [all_channels[channel]],
                                    ['medium', 'deep'],
                                    ['spontaneous', 'stimulus'], ['all'])

            X_train, y_train = obtain_concatenate_segments_fft(train_data)
            x_test, y_test = obtain_concatenate_segments_fft(test_data)

            model = RandomForestClassifier(n_estimators=5000,
                                           max_depth=5,
                                           min_samples_split=5,
                                           min_samples_leaf=10)
            model.fit(X_train, y_train)
            predictions = model.predict(x_test)
Esempio n. 6
0
    2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23,
    24, 25, 26, 27, 28, 29, 30, 31, 32
]

encoding = Encoding('./../../data_to_be_saved/alphabet_3.txt')

data_dir = os.path.join('../..', '..')

initialization = InitDataSet()
doas = initialization.get_dataset_as_doas()
mark_bursts_regions(doas)

for segment in segments:
    for channel in all_channels:
        print("start running for channel " + str(channel) + ' ' + segment)
        data = ExtractData(doas, [channel], ['light', 'medium', 'deep'],
                           [segment], ['all'])
        X, y = obtain_TESPAR_A_FFT_features(data, encoding)

        model = RandomForestClassifier(n_estimators=5000,
                                       max_depth=5,
                                       min_samples_split=5,
                                       min_samples_leaf=10)

        skf = StratifiedKFold(n_splits=10)

        skf.get_n_splits(X, y)

        results = cross_validate(model,
                                 X,
                                 y,
                                 scoring=['accuracy', 'f1_weighted'],