Ejemplo n.º 1
0
def evaluate_model(model: Model,
                   dataset_id,
                   method,
                   proto_num,
                   dataset_prefix,
                   batch_size=128,
                   test_data_subset=None,
                   cutoff=None,
                   normalize_timeseries=False,
                   checkpoint_prefix="loss"):
    X_train1, X_train2, y_train, X_test1, X_test2, y_test, is_timeseries = load_dataset_at(
        dataset_id,
        method,
        proto_num,
        normalize_timeseries=normalize_timeseries)

    y_test = to_categorical(y_test, nb_classes(dataset_id))

    optm = Adam(lr=1e-3)
    model.compile(optimizer=optm,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.load_weights(
        "./weights/%s_%s_%s_%s_weights.h5" %
        (dataset_prefix, method, str(proto_num), checkpoint_prefix))

    print("\nEvaluating : ")
    loss, accuracy = model.evaluate([X_test1, X_test2],
                                    y_test,
                                    batch_size=batch_size)
    print()
    print("Final Accuracy : ", accuracy)

    return accuracy
        exit()
    version = sys.argv[1]
    selection = sys.argv[2]
    classwise = sys.argv[3]
    proto_number = int(sys.argv[4])

    print("Starting: {} {} {}".format(version, selection, classwise))

    # load settings
    full_train_file = os.path.join("data", version + "_TRAIN")
    full_test_file = os.path.join("data", version + "_TEST")
    # load data
    full_train = np.genfromtxt(full_train_file, delimiter=',')
    full_test = np.genfromtxt(full_test_file, delimiter=',')

    no_classes = nb_classes(version)
    # print(proto_number)

    train_max = np.max(full_train[:,1:])
    train_min = np.min(full_train[:,1:])

    train_data = 2. * (full_train[:,1:] - train_min) / (train_max - train_min) - 1.
    train_labels = (full_train[:,0] + class_modifier_add(version))*class_modifier_multi(version)

    train_number = np.shape(train_labels)[0]
    #print(np.shape(train_data))
    #print(np.shape(train_labels))

    test_data = 2. * (full_test[:,1:] - train_min) / (train_max - train_min) - 1.
    test_labels = (full_test[:,0] + class_modifier_add(version))*class_modifier_multi(version)
    #print(np.shape(test_data))
Ejemplo n.º 3
0
#from utils.models import cnn_raw_model, cnn_dtwfeatures_model, cnn_earlyfusion_model, cnn_midfusion_model, cnn_latefusion_model
from utils.models import cnn_midfusion_model_v2, lstm_model

import sys
import math
import numpy as np
import os

if __name__ == "__main__":
    dataset = sys.argv[1]
    method = sys.argv[2]
    proto_num = int(sys.argv[3])
    os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[4]

    max_seq_lenth = max_seq_len(dataset)
    nb_class = nb_classes(dataset)
    dim_num = nb_dims(dataset)
    nb_cnn = int(round(math.log(max_seq_lenth, 2)) - 3)

    #model = cnn_raw_model(nb_cnn, proto_num, max_seq_lenth, nb_class)
    #model = cnn_dtwfeatures_model(nb_cnn, proto_num, max_seq_lenth, nb_class)
    #model = cnn_earlyfusion_model(nb_cnn, proto_num, max_seq_lenth, nb_class)
    model = cnn_midfusion_model_v2(nb_cnn, dim_num, proto_num, max_seq_lenth,
                                   nb_class)
    #model = cnn_latefusion_model(nb_cnn, proto_num, max_seq_lenth, nb_class)

    print("Number of Pooling Layers: %s" % str(nb_cnn))

    train_model(model,
                dataset,
                method,
Ejemplo n.º 4
0
def train_model(model: Model,
                dataset_id,
                method,
                proto_num,
                fold,
                dataset_prefix,
                nb_iterations=100000,
                batch_size=128,
                val_subset=None,
                cutoff=None,
                normalize_timeseries=False,
                opt='Adam',
                learning_rate=1e-3,
                early_stop=False,
                balance_classes=True,
                run_ver=''):
    X_train1, X_train2, y_train, X_test1, X_test2, y_test, is_timeseries = load_dataset_at(
        dataset_id,
        method,
        proto_num,
        fold,
        normalize_timeseries=normalize_timeseries)

    #calculate num of batches
    nb_epochs = math.ceil(nb_iterations * (batch_size / X_train1.shape[0]))

    if balance_classes == True:
        classes = np.arange(0, nb_classes(dataset_id))  #np.unique(y_train)
        le = LabelEncoder()
        y_ind = le.fit_transform(y_train.ravel())
        recip_freq = len(y_train) / (len(le.classes_) *
                                     np.bincount(y_ind).astype(np.float64))
        class_weight = recip_freq[le.transform(classes)]

        print("Class weights : ", class_weight)

    y_train = to_categorical(y_train, nb_classes(dataset_id))
    y_test = to_categorical(y_test, nb_classes(dataset_id))

    #not used
    factor = 1. / np.cbrt(2)
    reduce_lr = ReduceLROnPlateau(monitor='loss',
                                  patience=math.ceil(nb_epochs / 20),
                                  mode='auto',
                                  factor=factor,
                                  cooldown=0,
                                  min_lr=learning_rate / 10.,
                                  verbose=2)

    model_checkpoint1 = ModelCheckpoint(
        "./weights/fold%s_%s_%s_%s_%sloss_weights.h5" %
        (fold, dataset_prefix, method, str(proto_num), run_ver),
        verbose=2,
        monitor='loss',
        save_best_only=True,
        save_weights_only=True)
    model_checkpoint2 = ModelCheckpoint(
        "./weights/fold%s_%s_%s_%s_%sval_acc_weights.h5" %
        (fold, dataset_prefix, method, str(proto_num), run_ver),
        verbose=2,
        monitor='val_acc',
        save_best_only=True,
        save_weights_only=True)

    tensorboard = TensorBoard(
        log_dir='./logs/fold%s_%s%s_%s_%s' %
        (fold, run_ver, dataset_prefix, method, str(proto_num)),
        batch_size=batch_size)
    csv_logger = CSVLogger(
        './logs/fold%s_%s%s_%s_%s.csv' %
        (fold, run_ver, dataset_prefix, method, str(proto_num)))
    if early_stop:
        early_stopping = EarlyStopping(monitor='loss',
                                       patience=500,
                                       mode='auto',
                                       verbose=2,
                                       restore_best_weights=True)
        callback_list = [
            model_checkpoint1, model_checkpoint2, early_stopping, tensorboard,
            csv_logger
        ]
    else:
        callback_list = [
            model_checkpoint1, model_checkpoint2, tensorboard, csv_logger
        ]

    if opt == 'SGD':
        optm = SGD(lr=learning_rate, momentum=0.9, decay=5e-4)
    elif opt == 'Nadam':
        optm = Nadam(lr=learning_rate)
    elif opt == 'Adam_decay':
        optm = Adam(lr=learning_rate, decay=9. / nb_iterations)
    else:
        optm = Adam(lr=learning_rate)

    model.compile(optimizer=optm,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    if balance_classes:
        model.fit([X_train1, X_train2],
                  y_train,
                  batch_size=batch_size,
                  epochs=nb_epochs,
                  callbacks=callback_list,
                  class_weight=class_weight,
                  verbose=2,
                  validation_data=([X_test1, X_test2], y_test))
    else:
        model.fit([X_train1, X_train2],
                  y_train,
                  batch_size=batch_size,
                  epochs=nb_epochs,
                  callbacks=callback_list,
                  verbose=2,
                  validation_data=([X_test1, X_test2], y_test))
Ejemplo n.º 5
0
def load_dataset_at(index,
                    method,
                    proto_num,
                    normalize_timeseries=False,
                    verbose=True) -> (np.array, np.array):
    dim = nb_dims(index)

    train_data1 = "data/all-raw-train-data-%s-%s-%s.txt" % (index, method,
                                                            str(proto_num))
    test_data1 = "data/all-raw-test-data-%s-%s-%s.txt" % (index, method,
                                                          str(proto_num))

    train_data2 = "data/all-dtw_features-train-data-%s-%s-%s.txt" % (
        index, method, str(proto_num))
    test_data2 = "data/all-dtw_features-test-data-%s-%s-%s.txt" % (
        index, method, str(proto_num))

    train_labels = "data/all-train-label-%s-%s-%s.txt" % (index, method,
                                                          str(proto_num))
    test_labels = "data/all-test-label-%s-%s-%s.txt" % (index, method,
                                                        str(proto_num))

    is_timeseries = True  # assume all input data is univariate time series

    if os.path.exists(train_data1):
        df = pd.read_csv(train_data1,
                         delimiter=' ',
                         header=None,
                         encoding='latin-1')
    else:
        raise FileNotFoundError('File %s not found!' % (train_data1))
    X_train1 = df.values
    X_train1 = np.reshape(
        X_train1,
        (np.shape(X_train1)[0], dim, int(np.shape(X_train1)[1] / (dim))))

    if normalize_timeseries:
        X_train1_min = np.min(X_train1)
        X_train1_max = np.max(X_train1)
        X_train1 = 2. * (X_train1 - X_train1_min) / (X_train1_max -
                                                     X_train1_min) - 1.

    if os.path.exists(train_data2):
        df = pd.read_csv(train_data2,
                         delimiter=' ',
                         header=None,
                         encoding='latin-1')
    else:
        raise FileNotFoundError('File %s not found!' % (train_data2))

    X_train2 = df.values
    X_train2 = np.reshape(X_train2, (np.shape(X_train2)[0], proto_num,
                                     int(np.shape(X_train2)[1] / (proto_num))))

    if normalize_timeseries:
        X_train2_min = np.min(X_train2)
        X_train2_max = np.max(X_train2)
        X_train2 = 2. * (X_train2 - X_train2_min) / (X_train2_max -
                                                     X_train2_min) - 1.

    if os.path.exists(train_labels):
        df = pd.read_csv(train_labels,
                         delimiter=' ',
                         header=None,
                         encoding='latin-1')
    else:
        raise FileNotFoundError('File %s not found!' % (train_labels))

    y_train = df[[1]].values

    no_classes = nb_classes(index)  #len(np.unique(y_train))

    if os.path.exists(test_data1):
        df = pd.read_csv(test_data1,
                         delimiter=' ',
                         header=None,
                         encoding='latin-1')
    else:
        raise FileNotFoundError('File %s not found!' % (test_data1))
    X_test1 = df.values
    X_test1 = np.reshape(
        X_test1,
        (np.shape(X_test1)[0], dim, int(np.shape(X_test1)[1] / (dim))))

    if normalize_timeseries:
        X_test1 = 2. * (X_test1 - X_train1_min) / (X_train1_max -
                                                   X_train1_min) - 1.

    if os.path.exists(test_data2):
        df = pd.read_csv(test_data2,
                         delimiter=' ',
                         header=None,
                         encoding='latin-1')
    else:
        raise FileNotFoundError('File %s not found!' % (test_data2))

    X_test2 = df.values
    X_test2 = np.reshape(X_test2, (np.shape(X_test2)[0], proto_num,
                                   int(np.shape(X_test2)[1] / (proto_num))))

    if normalize_timeseries:
        X_test2 = 2. * (X_test2 - X_train2_min) / (X_train2_max -
                                                   X_train2_min) - 1.

    if os.path.exists(test_labels):
        df = pd.read_csv(test_labels,
                         delimiter=' ',
                         header=None,
                         encoding='latin-1')
    else:
        raise FileNotFoundError('File %s not found!' % (test_labels))

    y_test = df[[1]].values

    if verbose:
        print("Finished loading test dataset..")
        print()
        print("Number of train samples : ", X_train1.shape[0],
              "Number of test samples : ", X_test1.shape[0])
        print("Number of classes : ", no_classes)
        print("Sequence length : ", X_train1.shape[-1])

    return X_train1, X_train2, y_train, X_test1, X_test2, y_test, is_timeseries
Ejemplo n.º 6
0
def train_model(model: Model,
                dataset_id,
                method,
                proto_num,
                dataset_prefix,
                nb_iterations=100000,
                batch_size=128,
                val_subset=None,
                cutoff=None,
                normalize_timeseries=False,
                learning_rate=1e-3,
                early_stop=False,
                balance_classes=True,
                run_ver=''):
    X_train, y_train, X_test, y_test, is_timeseries = load_dataset_at(
        dataset_id,
        method,
        proto_num,
        normalize_timeseries=normalize_timeseries)

    #calculate num of batches
    nb_epochs = math.ceil(nb_iterations * (batch_size / X_train.shape[0]))

    if balance_classes == True:
        classes = np.arange(0, nb_classes(dataset_id))  #np.unique(y_train)
        le = LabelEncoder()
        y_ind = le.fit_transform(y_train.ravel())
        recip_freq = len(y_train) / (len(le.classes_) *
                                     np.bincount(y_ind).astype(np.float64))
        class_weight = recip_freq[le.transform(classes)]

        print("Class weights : ", class_weight)

    y_train = to_categorical(y_train, nb_classes(dataset_id))
    y_test = to_categorical(y_test, nb_classes(dataset_id))

    if is_timeseries:
        factor = 1. / np.cbrt(2)
    else:
        factor = 1. / np.sqrt(2)

    reduce_lr = ReduceLROnPlateau(monitor='loss',
                                  patience=math.ceil(nb_epochs / 20),
                                  mode='auto',
                                  factor=factor,
                                  cooldown=0,
                                  min_lr=learning_rate / 10.,
                                  verbose=2)

    if early_stop:
        early_stopping = EarlyStopping(monitor='loss',
                                       patience=500,
                                       mode='auto',
                                       verbose=2,
                                       restore_best_weights=True)
        callback_list = [early_stopping]
    else:
        callback_list = []

    optm = Adam(lr=learning_rate)
    #optm = SGD(lr=learning_rate, momentum=0.9, decay=5e-4)

    model.compile(optimizer=optm,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    if val_subset is not None:
        X_test = X_test[:val_subset]
        y_test = y_test[:val_subset]

    if balance_classes:
        model.fit(X_train,
                  y_train,
                  batch_size=batch_size,
                  epochs=nb_epochs,
                  callbacks=callback_list,
                  class_weight=class_weight,
                  verbose=2,
                  validation_data=(X_test, y_test))
    else:
        model.fit(X_train,
                  y_train,
                  batch_size=batch_size,
                  epochs=nb_epochs,
                  callbacks=callback_list,
                  verbose=2,
                  validation_data=(X_test, y_test))