예제 #1
0
def kfold(k=5):
    subjects = PARTICIPANT_LIST
    w_lengths = [100, 50, 25, 5]

    for _end in w_lengths:
        print(_end)
        for _subj in subjects:
            # if True:  # total
            # if _subj.find("pen") >= 0:
            # if _subj.find("umbr") >= 0:
            if _subj.find("pen") < 0 and _subj.find("umbr") < 0:  # fh
                print(f"subj: {_subj} len: {_end}")
                params = {"end": _end, "dir": _subj, "set": INPUT_SET}
                x, y = DataLoader().load(params)
                x = scale_input(x)

                kf = KFold(n_splits=k, shuffle=True, random_state=293)

                for train_index, test_index in kf.split(x):
                    x_train, x_test = x[train_index], x[test_index]
                    y_train, y_test = y[train_index], y[test_index]

                    y_train = to_categorical(y_train)
                    x_train = reshape_for_cnn(x_train)
                    x_test = reshape_for_cnn(x_test)

                    cnn = get_cnn_adv(x_train[0], len(INPUT_SET))
                    model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test)
                    print(accuracy)

                    backend.clear_session()

    pass
예제 #2
0
def train_models_main():
    subjects = [PARTICIPANT_LIST[0]]

    _set = BUZZ_SET

    w_lengths = [WINDOW_LENGTHS[0]]

    total_accuracy = 0.0

    count = 0

    for _subj in subjects:
        for _end in w_lengths:
            params = {"end": _end, "dir": _subj, "set": _set}
            print(f"CNN {_subj}, {_end}")

            x, y = DataLoader().load(params)
            x = scale_input(x)
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)
            y_train = to_categorical(y_train)
            x_train = reshape_for_cnn(x_train)
            x_test = reshape_for_cnn(x_test)

            cnn = get_cnn_adv(x_train[0], len(_set))
            model, accuracy = test_model(cnn, x_train, x_test, y_train, y_test)
            model_name = _subj + "_" + str(_end)
            model.save(model_name)

            count += 1
            total_accuracy += accuracy

    print("Average accuracy is " + str(total_accuracy / count))

    return model_name
예제 #3
0
def one_hot_encode(*labels, class_amount=10):
    encoded_labels = []
    for i in range(len(labels)):
        one_hot_encoded = np_utils.to_categorical(labels[i], class_amount)
        encoded_labels.append(one_hot_encoded)

    return tuple(encoded_labels)
예제 #4
0
 def encode_categories(y):
     # encode class values as integers
     encoder = LabelEncoder()
     encoded_Y = encoder.fit_transform(y)
     # convert integers to dummy variables (i.e. one hot encoded)
     dummy_y = to_categorical(encoded_Y).astype(int)
     # print(dummy_y)
     return dummy_y
예제 #5
0
def test_model_new(model, x_train, x_test, y_train, y_test, epochs=KERAS_EPOCHS):
    model.fit(x_train, y_train, validation_data=(x_test, to_categorical(y_test)), batch_size=KERAS_BATCH_SIZE,
              epochs=epochs,
              verbose=1)
    y_pred = model.predict(np.ndarray.astype(x_test, 'float32'))
    y_pred = [np.argmax(y, axis=None, out=None) for y in y_pred]
    accuracy = metrics.accuracy_score(y_test, y_pred)
    plot_confusion_matrix(y_true=np.asarray(y_test).astype(int),
                          y_pred=np.asarray(y_pred).astype(int),
                          title=str(accuracy), normalize=True,
                          classes=[str(i + 1) for i in range(len(y_train[0]))])
    return model, accuracy
예제 #6
0
파일: test4.py 프로젝트: xiaoduli/notekeras
def generator_data():
    for data in ds:
        image = data['features'].numpy()
        label = data['label'].numpy()

        # image = image.reshape([32, 28, 28, 1])
        # label = label.reshape([32, 1])
        # print(label)

        label = np_utils.to_categorical(label, num_classes=3)
        print(image.shape, label.shape)

        yield image, label, [None]
예제 #7
0
def key_test():
    merger = KeyfileMerger()
    merger.load_files()
    merger.merge()
    merger.analyse()
    x, y = merger.cut_trials()
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, shuffle=True, random_state=42)
    y_train = to_categorical(y_train)
    x_train = reshape_for_cnn(x_train)
    x_test = reshape_for_cnn(x_test)
    cnn = get_cnn(x_train[0], len(y_train[0]))
    model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test)
    model.save(KeyConstants.MODEL_PATH)
예제 #8
0
파일: train.py 프로젝트: Slownite/Prequel
def to_integer_base(notes, sequence_size):
    vocab_size = len(set(notes))
    pitchnames = sorted(set(item for item in notes))
    notes_to_int = dict(
        (note, number) for number, note in enumerate(pitchnames))
    network_output = []
    network_input = []
    for i in range(0, len(notes) - sequence_size, 1):
        sequence_in = notes[i:i + sequence_size]
        sequence_out = notes[i + sequence_size]
        network_input.append([notes_to_int[char] for char in sequence_in])
        network_output.append(notes_to_int[sequence_out])

    patterns = len(network_input)
    network_input = np.reshape(network_input, (patterns, sequence_size, 1))
    network_input = network_input / float(vocab_size)
    network_output = np_utils.to_categorical(network_output)
    return network_input, network_output, vocab_size, pitchnames
예제 #9
0
def generate_sample(size, n_patterns, parameter=None):
    X, y = list(), list()
    for i in range(n_patterns):
        # print("gen{}/{}".format(i,n_patterns))
        frames, labels = build_frames2(size)
        code = np.array(labels)
        label_encoder = LabelEncoder()
        vec = label_encoder.fit_transform(code)

        X.append(frames)
        y.append(vec)
    # resize as [samples, timesteps, width, height, channels]

    X = np.array(X).reshape(n_patterns, len(X[0]), size, size, 1)
    y = np.array(y).reshape(n_patterns, 4)
    labels = to_categorical(y, 4)

    return X, labels
예제 #10
0
def train_data(features, labels):

    features = features / 255.0
    y_labels = np_utils.to_categorical(labels)

    dense_layers = [0, 1, 2, 3, 4, 5]
    sizes_layers = [32, 64, 128, 256]
    conv_layers = [1, 2, 3, 4]

    for dense in dense_layers:
        for size in sizes_layers:
            for conv in conv_layers:

                name_model = 'Training_Model_{}_Dense_{}_Size_{}_Conv_{}'.format(dense,size,conv,int(time.time()))

                tensorboard = TensorBoard(log_dir='logs\\{}'.format(name_model))

                model = Sequential()

                model.add(Conv2D(size, (3, 3), input_shape=features.shape[1:]))
                model.add(Activation("relu"))
                model.add(MaxPool2D(pool_size=(2, 2)))

                for layer in range(conv-1):
                    model.add(Conv2D(size, (3, 3)))
                    model.add(Activation("relu"))
                    model.add(MaxPool2D(pool_size=(2, 2)))

                model.add(Flatten())

                for layer in range(dense):
                    model.add(Dense(size))
                    model.add(Activation('relu'))

                model.add(Dense(7))
                model.add(Activation('softmax'))

                model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

                print(name_model)
                print(model.summary())
                model.fit(features, y_labels, batch_size=32, epochs=25, validation_split=0.1, callbacks=[tensorboard])

    '''model = Sequential()
예제 #11
0
def generate_DB_A(size, n_patterns, parameter=None):
    X, y = list(), list()
    for i in range(n_patterns):
        print("gen{}/{}".format(i, n_patterns))
        frames, labels = build_frames_DB_A(size=size,
                                           shuff=parameter['shuff'][0])
        code = np.array(labels)
        label_encoder = LabelEncoder()
        vec = label_encoder.fit_transform(code)

        X.append(frames)
        y.append(vec)
    # resize as [samples, timesteps, width, height, channels]

    #XX = np.array(X)
    #XX.shape = (n_patterns, len(X[0]), size, size, 1)
    X = np.array(X).reshape(n_patterns, len(X[0]), size, size, 1)
    # y = np.array(y).reshape(n_patterns, 8)
    labels = to_categorical(y, 5)

    return X, labels
예제 #12
0
def get_tflite():
    _set = INPUT_SET
    _subj = "kirillpen"

    w_lengths = [100, 50, 25, 10]

    for _end in w_lengths:
        params = {"end": _end, "dir": _subj, "set": _set}
        x, y = DataLoader().load(params)
        x = scale_input(x)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)
        y_train = to_categorical(y_train)
        x_train = reshape_for_cnn(x_train)
        x_test = reshape_for_cnn(x_test)
        cnn = get_cnn_adv(x_train[0], len(_set))
        model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test)
        model_name = f"{_subj}_{_end}"
        model.save(model_name)
        converter = TFLiteConverter.from_keras_model_file(model_name)
        tflite_model = converter.convert()
        open(model_name + '.tflite', "wb").write(tflite_model)
예제 #13
0
    def train(self, number_of_epochs=100, test_size=0.2, callbacks=[]):
        self.log_name = self.log_name + 'epochs=' + str(
            number_of_epochs) + 'test=' + str(test_size)
        log_dir = "logs\\new_results\\" + self.log_name + '_' + datetime.now(
        ).strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
        csv_callback = tf.keras.callbacks.CSVLogger(filename=log_dir +
                                                    'logs.csv')
        callbacks.append(tensorboard_callback)
        callbacks.append(csv_callback)

        x = []
        y = []
        for track in self.data_set:
            for j in range(0, len(track) - self.sequence_length):
                input_vector = []
                for i in range(self.sequence_length):
                    input_vector.append(
                        track[i + j] /
                        self.unique_events_list.get_event_list_size())
                x.append(input_vector)
                y.append(track[j + self.sequence_length])

        self.prepare_model(self.unique_events_list.get_event_list_size())
        x = np.reshape(x, (len(x), self.sequence_length, 1))
        y = np_utils.to_categorical(y)
        x_train, x_test, y_train, y_test = train_test_split(
            x, y, test_size=test_size, random_state=1)

        history = self.model.fit(x_train,
                                 y_train,
                                 epochs=number_of_epochs,
                                 batch_size=32,
                                 callbacks=callbacks,
                                 validation_data=(x_test, y_test))
        pyplot.plot(history.history['loss'])
        pyplot.plot(history.history['val_loss'])
        pyplot.show()
예제 #14
0
def get_gcm():
    subjects = PARTICIPANT_LIST

    _set = INPUT_SET

    _end = 50

    cum_cm = np.zeros((len(INPUT_SET), len(INPUT_SET)))

    print(f'cm for length {_end}')

    count = 0

    for _subj in subjects:
        # if _subj.find("pen") >= 0:
        # if _subj.find("umbr") >= 0:
        if _subj.find("pen") < 0 and _subj.find("umbr") < 0:  # fh
            count += 1
            params = {"end": _end, "dir": _subj, "set": _set}
            x, y = DataLoader().load(params)
            x = scale_input(x)
            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True)
            y_train = to_categorical(y_train)
            x_train = reshape_for_cnn(x_train)
            x_test = reshape_for_cnn(x_test)
            cnn = get_cnn_adv(x_train[0], len(_set))
            model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test)
            cum_cm += cm
            print(accuracy)

    ax = plt.axes()
    ax.ylabel = "Target"
    mx = cum_cm
    mx = mx / count  # normalize dat
    disp = ConfusionMatrixDisplay(confusion_matrix=mx, display_labels=["suggestion", "top", "mid", "bottom", "rest"])
    disp.plot(include_values=True, ax=ax, cmap='Blues')
    plt.show()
예제 #15
0
def windDir(location):
    filename = location+"_preprocessed.csv"

    df = pd.read_csv(filename)
    data = pd.DataFrame(
        columns=['day', 'month', 'WindDir']
    )

    data['day'] = df['day']
    data['month'] = df['month']
    data['WindDir'] = df['WindDir']

    le_pred = LabelEncoder()
    y = le_pred.fit_transform(data.WindDir)
    y = np_utils.to_categorical(y)

    y = y.astype('int32')
    out_classes = y.shape[1]

    model = Sequential()

    model.add(Dense(units=16, input_dim=2, activation='relu'))
    model.add(Dense(units=32, activation='relu'))

    model.add(Dense(units=64, activation='relu'))
    model.add(Dense(units=64, activation='relu'))

    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=128, activation='relu'))

    model.add(Dense(units=64, activation='relu'))
    model.add(Dense(units=64, activation='relu'))

    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))
    model.add(Dense(units=32, activation='relu'))

    model.add(Dense(units=out_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam', metrics=['accuracy'])

    model.fit(data.iloc[:, :-1], y, epochs=200, batch_size=512)

    last_date = df['date'].iloc[-1]
    last_date = datetime.datetime.strptime(last_date, "%Y-%m-%d").date()

    pred_date = []

    for i in range(1, 91):
        pred_date.append(last_date + datetime.timedelta(days=i))

    pred_input = [[] for i in range(90)]
    i = 0

    for j in pred_date:
        pred_input[i].append(j.day)
        pred_input[i].append(j.month)
        i += 1

    sample = pd.DataFrame(
        columns=['day', 'month'],
        data=pred_input
    )

    sample_p = model.predict(sample)
    sample_pred = [0 for i in range(len(sample_p))]

    for i in range(len(sample_p)):
        maxm = max(sample_p[i])

        for j in range(len(sample_p[i])):
            if sample_p[i][j] == maxm:
                index = j

        sample_pred[i] = index

    directions = le_pred.inverse_transform(sample_pred)
    directions

    for i in range(len(directions)):
        if directions[i] == 'N':
            c = np.random.randint(0, 1)
            if c == 0:
                directions[i] = np.random.randint(0, 22.5)
            elif c == 1:
                directions[i] = np.random.randint(337.5, 360)

        elif directions[i] == 'NE':
            directions[i] = np.random.randint(22.5, 67.5)

        elif directions[i] == 'E':
            directions[i] = np.random.randint(67.5, 112.5)

        elif directions[i] == 'SE':
            directions[i] = np.random.randint(112.5, 157.5)

        elif directions[i] == 'S':
            directions[i] = np.random.randint(157.5, 202.5)

        elif directions[i] == 'SW':
            directions[i] = np.random.randint(202.5, 247.5)

        elif directions[i] == 'W':
            directions[i] = np.random.randint(247.5, 292.5)

        elif directions[i] == 'NW':
            directions[i] = np.random.randint(292.5, 337.5)

    pred_data = pd.DataFrame(
        columns=['date', 'direction', 'speed']
    )

    pred_data['date'] = pred_date
    pred_data['direction'] = directions

    pred_filename = location+".csv"
    filepath = "data/"+pred_filename

    pred_data.to_csv(filepath, mode='a', header=False, index=False)
예제 #16
0
def single_target(EXPERIMENT_PATH, DATA_PATH, TENSOR_DATA_PATH, window_sequences, list_num_neurons, learning_rate,
                  features_to_use, DROPOUT, EPOCHS, PATIENCE,BATCH_SIZE,test_set):

    #################### FOLDER SETUP ####################
    MODELS_PATH = "models"
    RESULT_PATH = "result"
    # starting from the testing set
    for crypto_name in os.listdir(DATA_PATH):
        # create a folder for data in tensor format
        folder_creator(TENSOR_DATA_PATH + "/" + crypto_name, 0)
        # create a folder for results
        folder_creator(EXPERIMENT_PATH + "/" + MODELS_PATH + "/" + crypto_name, 0)
        folder_creator(EXPERIMENT_PATH + "/" + RESULT_PATH + "/" + crypto_name, 0)
        for window, num_neurons in product(window_sequences, list_num_neurons):
            print('Current configuration: ')
            print("Crypto: ",crypto_name,"\t","Window_sequence: ", window, "\t", "Neurons: ", num_neurons)
            predictions_file = {'symbol': [], 'date': [], 'observed_class': [], 'predicted_class': []}
            macro_avg_recall_file = {'symbol': [], 'macro_avg_recall': []}
            # New folders for this configuration
            configuration_name = "LSTM_" + str(num_neurons) + "_neurons_" + str(window) + "_days"
            # Create a folder to save
            # - best model checkpoint
            # - statistics (results)
            statistics = "stats"
            model_path = EXPERIMENT_PATH + "/" + MODELS_PATH + "/" + crypto_name + "/" + configuration_name + "/"
            results_path = EXPERIMENT_PATH + "/" + RESULT_PATH + "/" + crypto_name + "/" + configuration_name + "/" + statistics + "/"
            folder_creator(model_path, 0)
            folder_creator(results_path, 0)
            for date_to_predict in test_set:
                #format of dataset name: Crypto_DATE_TO_PREDICT.csv
                dataset_name=crypto_name+"_"+str(date_to_predict)+".csv"
                dataset, features_without_date = \
                    prepare_input_forecasting(os.path.join(DATA_PATH,crypto_name),dataset_name,features_to_use)
                #print(dataset.dtypes)
                dataset_tensor_format = fromtemporal_totensor(np.array(dataset), window,
                                                              TENSOR_DATA_PATH + "/" + crypto_name + "/",
                                                              crypto_name+"_"+date_to_predict)

                #train, validation,test = get_training_validation_testing_set(dataset_tensor_format, date_to_predict)
                train, test = get_training_validation_testing_set(dataset_tensor_format, date_to_predict)

                index_of_target_feature = features_without_date.index('trend')

                x_train = train[:, :-1, :index_of_target_feature]
                """print("X_TRAIN")
                print(x_train)
                print(x_train.shape)"""

                y_train = train[:, -1, index_of_target_feature]
                """print("Y_TRAIN")
                print(y_train)
                print(y_train.shape)"""

                x_test = test[:, :-1, :index_of_target_feature]
                """print("X_TEST")
                print(x_test)
                print(x_test.shape)"""

                y_test = test[:, -1, index_of_target_feature]
                """print("Y_TEST")
                print(y_test)
                print(y_test.shape)"""

                # change the data type, from object to float
                x_train = x_train.astype('float')
                x_test = x_test.astype('float')

                # one hot encode y
                y_train  = to_categorical(y_train)
                y_test = to_categorical(y_test)
                """print(y_train)
                print(y_test)"""

                #batch size must be a factor of the number of training elements
                if BATCH_SIZE == None:
                    BATCH_SIZE = x_train.shape[0]

                model, history = train_single_target_model(x_train, y_train,
                                             num_neurons=num_neurons,
                                             learning_rate=learning_rate,
                                             dropout=DROPOUT,
                                             epochs=EPOCHS,
                                             batch_size=BATCH_SIZE,
                                             patience=PATIENCE,
                                             num_categories=len(y_train[0]),
                                             date_to_predict=date_to_predict,
                                             model_path=model_path)
                # plot neural network's architecture
                plot_model(model, to_file=model_path + "neural_network.png", show_shapes=True,
                           show_layer_names=True, expand_nested=True, dpi=150)

                #plot loss
                """filename="model_train_val_loss_bs_"+str(BATCH_SIZE)+"_target_"+str(date_to_predict)
                plot_train_and_validation_loss(pd.Series(history.history['loss']),pd.Series(history.history['val_loss']),model_path,filename)

                #plot accuracy
                filename = "model_train_val_accuracy_bs_" + str(BATCH_SIZE) + "_target_" + str(date_to_predict)
                plot_train_and_validation_accuracy(pd.Series(history.history['accuracy']),
                                               pd.Series(history.history['val_accuracy']), model_path, filename)"""

                # Predict for each date in the validation set
                test_prediction = model.predict(x_test)
                # this is important!!
                K.clear_session()
                tf_core.random.set_seed(42)
                gc.collect()
                del model
                del dataset_tensor_format
                del dataset

                print("Num of entries for training: ", x_train.shape[0])
                # invert encoding: argmax of numpy takes the higher value in the array
                print("Predicting for: ", date_to_predict)
                print("Predicted: ", np.argmax(test_prediction))
                print("Actual: ", np.argmax(y_test))
                print("\n")

                # Saving the predictions on the dictionarie
                predictions_file['symbol'].append(crypto_name)
                predictions_file['date'].append(date_to_predict)
                predictions_file['observed_class'].append(np.argmax(y_test))
                predictions_file['predicted_class'].append(np.argmax(test_prediction))
            save_results(macro_avg_recall_file, crypto_name, predictions_file, results_path)
    return
예제 #17
0
import matplotlib.pyplot as plt
from tensorflow.keras import Sequential
# from tensorflow_core.python.keras import Sequential
from tensorflow_core.python.keras.callbacks import EarlyStopping
from tensorflow_core.python.keras.datasets import mnist
from tensorflow_core.python.keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense
from tensorflow_core.python.keras.utils.np_utils import to_categorical

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print(f'X_train: {X_train.shape}, Y_train: {Y_train.shape}')
print(f'X_test: {X_test.shape}, Y_test: {Y_test.shape}')

X_train = X_train.reshape(*X_train.shape, 1).astype('float16') / 255
X_test = X_test.reshape(*X_test.shape, 1).astype('float16') / 255
Y_train = to_categorical(Y_train, 10, dtype='float16')
Y_test = to_categorical(Y_test, 10, dtype='float16')

print(f'X_train: {X_train.shape}, Y_train: {Y_train.shape}')
print(f'X_test: {X_test.shape}, Y_test: {Y_test.shape}')

model = Sequential()

model.add(
    Conv2D(filters=32,
           kernel_size=(3, 3),
           activation='relu',
           input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=2))
model.add(Dropout(rate=0.25))
model.add(Flatten())