def fit_model(X_train, y_train):
    #define model
    model = Model(inputs=[admiss_data], outputs=main_output)
    #
    print(model.summary())
    #
    adam = optimizers.Adam(lr=0.0001,
                           beta_1=0.9,
                           beta_2=0.999,
                           epsilon=1e-08,
                           decay=0.0)
    #
    model.compile(optimizer=adam,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    #
    class_weight = {
        0: 1.,
        1: cw,  # 1: 20.
    }
    histories = my_callbacks.Histories()
    #model fit
    model.fit([X_train],
              y_train,
              epochs=n_epochs,
              batch_size=n_batch_size,
              validation_data=([[X_val], y_val]),
              class_weight=class_weight,
              callbacks=[histories])
    model.save('base_nn.h5')
    return model
Exemple #2
0
	batch_size = 512
	epochs = 500

	num_classes = 9

	m = Model_(batch_size, 100, num_classes)

	if one_d == True:
		model = m.cnn()
	else:
		model = m.cnn2()

	model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## callbacks
	history = my_callbacks.Histories()
	rocauc = my_callbacks.ROC_AUC(X_train, y_train, X_test, y_test)
	inception = my_callbacks.Inception(X_test, num_classes)

	checkpoint = ModelCheckpoint('TSTR_'+ date +'/train/'+ folder +'/weights.best.trainonsynthetic.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')
	earlyStopping = EarlyStopping(monitor='val_loss',min_delta = 0.00000001  , patience=10, verbose=1, mode='min') #0.00000001   patience 0

	model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data = (X_val, y_val),
		callbacks = [history,
					checkpoint,
					earlyStopping,
					rocauc,
					inception
					])

	model.save('TSTR_'+ date +'/train/'+ folder +'/trainonsynthetic_model.h5')
def main(result_dict={}, N_CLASSES=2):

    folder = 'catalina_amp_irregular_' + str(N_CLASSES) + 'classes'
    dataset_real = 'catalina_north' + str(N_CLASSES) + 'classes'#'catalina_random_sample_augmented_90k_' + str(N_CLASSES) + 'classes'
    result_dict[str(N_CLASSES)] = {'training': {}, 'testing': {}}

    def read_data(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        X_train = np.asarray(data[0]['generated_magnitude'])
        #print(X_train.shape)
        X_train =  X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)
        #print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        X_val = np.asarray(data[1]['generated_magnitude'])
        X_val =  X_val.reshape(X_val.shape[0], X_val.shape[1], 1, 1)
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        X_test = np.asarray(data[2]['generated_magnitude'])
        X_test =  X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_original_irr(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['original_magnitude_random'])
        t = np.asarray(data[0]['time_random'])
        X_train = np.stack((mgt, t), axis=-1)
        #print(X_train.shape)
        #print(X_train.T.shape)
        X_train =  X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2])
        #print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        #print(np.unique(y_train))
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['original_magnitude_random'])
        t = np.asarray(data[1]['time_random'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val =  X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['original_magnitude_random'])
        t = np.asarray(data[2]['time_random'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test =  X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_generated_irr(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['generated_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        #print(X_train.shape)
        #print(X_train.T.shape)
        X_train =  X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2])
        #print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        #print(np.unique(y_train))
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
    #   for i in y_train:
    #       if i != None:
    #           print(i)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['generated_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val =  X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['generated_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test =  X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test


    def change_classes(targets):
        #print(targets)
        target_keys = np.unique(targets)
        #print(target_keys)
        target_keys_idxs = np.argsort(np.unique(targets))
        targets = target_keys_idxs[np.searchsorted(target_keys, targets, sorter=target_keys_idxs)]

        return targets


    def open_data(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(len(data['generated_magnitude']))

        X = np.asarray(data['generated_magnitude'])
        X =  X.reshape(X.shape[0], X.shape[1], 1, 1)
        y = np.asarray(data['class'])
        X, y = shuffle(X, y, random_state=42)
        y = change_classes(y)
        y = to_categorical(y)


        return X, y


    def evaluation(X_test, y_test, n_classes):
        y_pred_prob = model.predict_proba(X_test)

        n = 10
        probs = np.array_split(y_pred_prob, n)

        score = []
        mean = []
        std = []

        Y = []
        for prob in probs:
            ys = np.zeros(n_classes)#[0, 0
            for class_i in range(n_classes):
                for j in prob:
                    ys[class_i] = ys[class_i] + j[class_i]

            ys[:] = [x/len(prob) for x in ys]


            Y.append(np.asarray(ys))

        ep = 1e-12
        tmp = []
        for s in range(n):
            kl = (probs[s] * np.log((probs[s] + ep)/Y[s])).sum(axis=1)
            E = np.mean(kl)
            IS = np.exp(E)
            #pdb.set_trace()
            tmp.append(IS)

        score.append(tmp)
        mean.append(np.mean(tmp))
        std.append(np.std(tmp))

        print('Inception Score:\nMean score : ', mean[-1])
        print('Std : ', std[-1])

        return score, mean, std

    def check_dir(directory):
        if not os.path.exists(directory):
            os.makedirs(directory)


    check_dir('TRTS_'+ date)
    check_dir('TRTS_'+ date +'/train/')
    check_dir('TRTS_'+ date +'/train/')
    check_dir('TRTS_'+ date +'/train/'+ folder)
    check_dir('TRTS_'+ date +'/test/')
    check_dir('TRTS_'+ date +'/test/'+ folder)


    if os.path.isfile('TRTS_'+ date +'/train/'+ folder +'/train_model.h5'):

        print('\nTrain metrics:')

        mean = np.load('TRTS_'+ date +'/train/'+ folder +'/train_is_mean.npy')

        std = np.load('TRTS_'+ date +'/train/'+ folder +'/train_is_std.npy')

        print('Training metrics:')
        print('Inception Score:\nMean score : ', mean[-1])
        print('Std : ', std[-1])

        acc = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_acc.npy')
        val_acc = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_val_acc.npy')
        loss = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_loss.npy')
        val_loss = np.load('TRTS_'+ date +'/train/'+ folder +'/train_history_val_loss.npy')

        print('ACC : ', np.mean(acc))
        print('VAL_ACC : ', np.mean(val_acc))
        print('LOSS : ', np.mean(loss))
        print('VAL_LOSS : ', np.mean(val_loss))

        print('\nTest metrics:')

        score = np.load('TRTS_'+ date +'/train/'+ folder +'/test_score.npy')
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

    else:

        irr = True
        one_d = False

    ## Train on real
        #dataset_real = 'catalina_random_sample_augmented_' + str(N_CLASSES) + 'classes'
        #dataset_real = 'catalina_north' + str(N_CLASSES) + 'classes'
        if irr == True:
            X_train, y_train, X_val, y_val, X_test, y_test  = read_data_original_irr('TSTR_data/'+ in_TSTR_FOLDER + dataset_real +'.pkl')#datasets_original/REAL/'+ dataset_real +'.pkl')
        else:
            X_train, y_train, X_val, y_val, X_test, y_test  = read_data('TSTR_data/'+in_TSTR_FOLDER+ dataset_real +'.pkl')#datasets_original/REAL/'+ dataset_real +'.pkl')

        print('')
        print ('Training new model')
        print('')

        batch_size = 512
        epochs = 200

        m = Model_(batch_size, 100, N_CLASSES)

        if one_d == True:
            model = m.cnn()
        else:
            model = m.cnn2()

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    ## callbacks
        history = my_callbacks.Histories()
        inception = my_callbacks.Inception(X_test, N_CLASSES)

        checkpoint = ModelCheckpoint('TRTS_'+ date +'/train/'+ folder +'/weights.best.train.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        earlyStopping = EarlyStopping(monitor='val_acc',min_delta = 0.00000001  , patience=10, verbose=1, mode='max') #0.00000001   patience 0

        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data = (X_val, y_val),
            callbacks = [history,
                        checkpoint,
                        earlyStopping,
                        inception
                        ])

        model = load_model('TRTS_'+ date +'/train/'+ folder +'/weights.best.train.hdf5')

        #Create dictionary, then save into two different documments.
    ## Loss
        history_dictionary_loss = history.loss
        np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_loss.npy', history_dictionary_loss)
    ## Val Loss
        history_dictionary_val_loss = history.val_loss
        np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_val_loss.npy', history_dictionary_val_loss)
    ## Acc
        history_dictionary_acc = history.acc
        np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_acc.npy', history_dictionary_acc)
    ## Val Acc
        history_dictionary_val_acc = history.val_acc
        np.save('TRTS_'+ date +'/train/'+ folder +'/train_history_val_acc.npy', history_dictionary_val_acc)
    ## IS
        scores_dict = inception.score
        np.save('TRTS_'+ date +'/train/'+ folder +'/train_is.npy', scores_dict)
        mean_scores_dict = inception.mean
        np.save('TRTS_'+ date +'/train/'+ folder +'/train_is_mean.npy', mean_scores_dict)
        std_scores_dict = inception.std
        np.save('TRTS_'+ date +'/train/'+ folder +'/train_is_std.npy', std_scores_dict)


    ### plot loss and validation_loss v/s epochs
        plt.figure(1)
        plt.yscale("log")
        plt.plot(history.loss)
        plt.plot(history.val_loss)
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.savefig('TRTS_'+ date +'/train/'+ folder +'/train_loss.png')
    ### plot acc and validation acc v/s epochs
        plt.figure(2)
        plt.yscale("log")
        plt.plot(history.acc)
        plt.plot(history.val_acc)
        plt.title('model acc')
        plt.ylabel('Acc')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.savefig('TRTS_'+ date +'/train/'+ folder +'/train_acc.png')



        print('Training metrics:')
        print('Inception Score:\nMean score : ', mean_scores_dict[-1])
        print('Std : ', std_scores_dict[-1])

        print('ACC : ', history_dictionary_acc[-1])
        print('VAL_ACC : ', history_dictionary_val_acc[-1])
        print('LOSS : ', history_dictionary_loss[-1])
        print('VAL_LOSS : ', history_dictionary_val_loss[-1])

        # Test on real, then thest on synthetic

        # Test on real

        print('\nTest metrics:')
        print('\nTest on real:')

        dataset_syn = 'catalina_amp_irregular_' + str(N_CLASSES) + 'classes_generated'

        if irr == True:
            X_train2, y_train2, X_val2, y_val2, X_test2, y_test2  = read_data_generated_irr('TSTR_data/generated/'+ folder +'/' + dataset_syn + '.pkl')
        else:
            X_train2, y_train2, X_val2, y_val2, X_test2, y_test2  = read_data('TSTR_data/generated/'+ folder + '/' + dataset_syn + '.pkl')

        sc, me, st = evaluation(X_test, y_test, N_CLASSES)
        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_is.npy', sc)
        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_is_mean.npy', me)
        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_is_std.npy', st)

        score_real = model.evaluate(X_test, y_test, verbose=1)
        print('Test loss:', score_real[0])
        print('Test accuracy:', score_real[1])

        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onreal_score.npy', score_real)

        # Test on synthetic

        print('\nTest on synthetic:')

        sc, me, st = evaluation(X_test2, y_test2, N_CLASSES)
        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_is.npy', sc)
        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_is_mean.npy', me)
        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_is_std.npy', st)

        score_syn = model.evaluate(X_test2, y_test2, verbose=1)
        print('Test loss:', score_syn[0])
        print('Test accuracy:', score_syn[1])

        np.save('TRTS_'+ date +'/test/'+ folder +'/test_onsyn_score.npy', score_syn)

        result_dict[str(N_CLASSES)]['training'] = {
            'IS mean': mean_scores_dict[-1],
            'IS std': std_scores_dict[-1], 
            'acc': history_dictionary_acc[-1],
            'val_acc': history_dictionary_val_acc[-1],
            'loss': history_dictionary_loss[-1], 
            'val_loss': history_dictionary_val_loss[-1]
        }

        result_dict[str(N_CLASSES)]['testing'] = {
            'test_onreal_loss': score_real[0],
            'test_onreal_acc': score_real[1], 
            'test_onsyn_loss': score_syn[0],
            'test_onsyn_score': score_syn[1]
        }
def main(result_dict={}, catalina_n_classes=1):
    real_data_folder = os.path.join('datasets_original', 'REAL',
                                    '%iclasses_100_100' % catalina_n_classes)
    dataset_real_pkl = '%s%iclasses.pkl' % (BASE_REAL_NAME, catalina_n_classes)
    # syn_data_name = os.path.join('%s%s%.2f' % (BASE_GEN_DATA_FOLDER_NAME, gan_version, catalina_n_classes))

    catalina_n_classes_str = catalina_n_classes  #str(catalina_n_classes)
    result_dict[catalina_n_classes_str] = {'training': {}, 'testing': {}}
    #result_dict = {'training': {}, 'testing': {}}
    print("\nREAL Training set to load %s\n" % dataset_real_pkl)

    # print("SYN Training set to load %s" % syn_data_name)

    def read_data_original_irr(file):

        with open(file, 'rb') as f:
            data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0][ORIGNAL_MAG_KEY])
        t = np.asarray(data[0][ORIGINAL_TIME_KEY])
        X_train = np.stack((mgt, t), axis=-1)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1,
                                  X_train.shape[2])

        y_train = np.asarray(data[0]['class'])

        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1][ORIGNAL_MAG_KEY])
        t = np.asarray(data[1][ORIGINAL_TIME_KEY])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1,
                              X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2][ORIGNAL_MAG_KEY])
        t = np.asarray(data[2][ORIGINAL_TIME_KEY])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1,
                                X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_generated_irr(file):

        with open(file, 'rb') as f:
            data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['generated_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1,
                                  X_train.shape[2])
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        print(np.unique(y_train))
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        #  for i in y_train:
        #     if i != None:
        #        print(i)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['generated_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1,
                              X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['generated_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1,
                                X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def change_classes(targets):
        # print(targets)
        target_keys = np.unique(targets)
        # print(target_keys)
        target_keys_idxs = np.argsort(np.unique(targets))
        targets = target_keys_idxs[np.searchsorted(target_keys,
                                                   targets,
                                                   sorter=target_keys_idxs)]

        return targets

    def open_data(file):

        with open(file, 'rb') as f:
            data = pickle.load(f)

        print(len(data['generated_magnitude']))

        X = np.asarray(data['generated_magnitude'])
        X = X.reshape(X.shape[0], X.shape[1], 1, 1)
        y = np.asarray(data['class'])
        X, y = shuffle(X, y, random_state=42)
        y = change_classes(y)
        y = to_categorical(y)

        return X, y

    def check_dir(directory):
        if not os.path.exists(directory):
            os.makedirs(directory)

    check_dir('TSTR_' + date)
    check_dir('TSTR_' + date + '/train/')
    check_dir('TSTR_' + date + '/train/')
    check_dir('TSTR_' + date + '/train/' + real_data_folder)
    check_dir('TSTR_' + date + '/test/')
    check_dir('TSTR_' + date + '/test/' + real_data_folder)

    # if else
    # irr = True
    # dataset_syn_pkl = syn_data_name + '_generated.pkl'
    # one_d = False

    ## Train on real

    # X_train_syn, y_train_syn, X_val_syn, y_val_syn, X_test_syn, y_test_syn = read_data_generated_irr(
    #     os.path.join('TSTR_data', 'generated', syn_data_name, dataset_syn_pkl))
    X_train_real, y_train_real, X_val_real, y_val_real, X_test_real, y_test_real = read_data_original_irr(
        os.path.join('TSTR_data', real_data_folder, dataset_real_pkl))

    print('')
    print('Training new model')
    print('')

    batch_size = 512
    epochs = 10000

    num_classes = catalina_n_classes

    m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE)

    if BN_CONDITION == 'batch_norm_':
        model = m.cnn2_batch()
    else:
        model = m.cnn2()

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    ## callbacks
    history = my_callbacks.Histories()

    checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' +
                                 real_data_folder +
                                 '/weights.best.trainonreal.hdf5',
                                 monitor=EARLY_STOP_ON,
                                 verbose=1,
                                 save_best_only=True,
                                 mode=EARLY_STOP_ON_COD)
    earlyStopping = EarlyStopping(monitor=EARLY_STOP_ON,
                                  min_delta=0.00000001,
                                  patience=PATIENCE,
                                  verbose=1,
                                  mode=EARLY_STOP_ON_COD)

    model.fit(
        X_train_real,
        y_train_real,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val_real, y_val_real),
        callbacks=[
            history,
            checkpoint,
            earlyStopping  # ,
            # rocauc,
            # inception
        ])

    model = load_model('TSTR_' + date + '/train/' + real_data_folder +
                       '/weights.best.trainonreal.hdf5')

    print('Training metrics:')

    score_train = model.evaluate(X_train_real, y_train_real, verbose=1)
    score_val = model.evaluate(X_val_real, y_val_real, verbose=1)

    print('ACC : ', score_train[1])
    print('VAL_ACC : ', score_val[1])
    print('LOSS : ', score_train[0])
    print('VAL_LOSS : ', score_val[0])

    # fine tunning
    # K.set_value(model.optimizer.lr, 0.00005)
    #
    # checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainfinetune.hdf5',
    #                              monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    # earlyStopping = EarlyStopping(monitor='val_acc', min_delta=0.00000001, patience=PATIENCE_FINE, verbose=1, mode='max')
    # model.fit(X_train_real, y_train_real, epochs=epochs, batch_size=batch_size, validation_data=(X_val_real, y_val_real),
    #           callbacks=[history,
    #                      checkpoint,
    #                      earlyStopping  # ,
    #                      # rocauc,
    #                      # inception
    #                      ])
    #
    # model = load_model('TSTR_' + date + '/train/' + syn_data_name + '/weights.best.trainfinetune.hdf5')

    ## Test on real

    # score_val = model.evaluate(X_val_real, y_val_real, verbose=1)
    #
    # print('fine tune VAL_ACC : ', score_val[1])
    # print('fine tune VAL_LOSS : ', score_val[0])

    print('\nTest metrics:')
    print('\nTest on real:')

    score_test = model.evaluate(X_test_real, y_test_real, verbose=1)
    print('Test loss:', score_test[0])
    print('Test accuracy:', score_test[1])

    result_dict[catalina_n_classes_str]['testing'] = {
        'test loss on real': score_test[0],
        'Test accuracy on real': score_test[1]  # , 'auc roc on real': roc
    }

    ## Test on syn

    print('\nTest on synthetic:')

    # score = model.evaluate(X_test_syn, y_test_syn, verbose=1)
    # print('Test loss:', score[0])
    # print('Test accuracy:', score[1])

    result_dict[catalina_n_classes_str]['training'] = {
        'VAL_ACC': score_val[1],
        'TRAIN_ACC': score_train[1],
        'TRAIN_LOSS': score_train[0],
        'VAL_LOSS': score_val[0]
    }

    # result_dict[catalina_n_classes_str]['testing']['test loss on syn'] = score[0]
    # result_dict[catalina_n_classes_str]['testing']['Test accuracy on syn'] = score[1]

    y_predict_prob_test = model.predict(X_test_real)
    y_predict_classes_test = y_predict_prob_test.argmax(axis=-1)
    confusion_matrix = sklearn.metrics.confusion_matrix(
        y_test_real.argmax(axis=-1), y_predict_classes_test)
    print(
        'Accuracy Test conf %.4f, accuracy eval %.4f' %
        (np.trace(confusion_matrix) / np.sum(confusion_matrix), score_test[1]))

    keras.backend.clear_session()
    del model
    return confusion_matrix
Exemple #5
0
def main(result_dict={}, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE=1.0, v=''):
    folder = '%s%s%.2f' % (BASE_REAL_NAME, v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    if AUGMENTED_OR_NOT_EXTRA_STR == '':
        in_TSTR_FOLDER = 'datasets_original/REAL/'
        dataset_real = '%s%s%s%.2f' % (
            BASE_REAL_NAME, AUGMENTED_OR_NOT_EXTRA_STR, '', PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    else:
        in_TSTR_FOLDER = 'augmented/'
        dataset_real = '%s%s%s%.2f' % (
            BASE_REAL_NAME, AUGMENTED_OR_NOT_EXTRA_STR, v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    #folder = dataset_real
    # folder = 'starlight_amp_noisy_irregular_all_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    # dataset_real = 'starlight_noisy_irregular_all_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    # same_set
    # folder = 'starlight_noisy_irregular_all_same_set_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    # dataset_real = 'starlight_noisy_irregular_all_same_set_%.2f' % PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE
    # for augmented
    # dataset_real = 'starlight_random_sample_augmented_%s%.2f' % (v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)

    PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY = str(PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY] = {'training': {}, 'testing': {}}

    def read_data(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        X_train = np.asarray(data[0]['generated_magnitude'])
        # print(X_train.shape)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        X_val = np.asarray(data[1]['generated_magnitude'])
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, 1)
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        X_test = np.asarray(data[2]['generated_magnitude'])
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_original_irr(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['original_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        # print(X_train.shape)
        # print(X_train.T.shape)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2])
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        # print(np.unique(y_train))
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['original_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['original_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_generated_irr(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['generated_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        # print(X_train.shape)
        # print(X_train.T.shape)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, X_train.shape[2])
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        # print(np.unique(y_train))
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        #	for i in y_train:
        #		if i != None:
        #			print(i)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['generated_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['generated_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def change_classes(targets):
        # print(targets)
        target_keys = np.unique(targets)
        # print(target_keys)
        target_keys_idxs = np.argsort(np.unique(targets))
        targets = target_keys_idxs[np.searchsorted(target_keys, targets, sorter=target_keys_idxs)]

        return targets

    def open_data(file):

        with open(file, 'rb') as f: data = pickle.load(f)

        print(len(data['generated_magnitude']))

        X = np.asarray(data['generated_magnitude'])
        X = X.reshape(X.shape[0], X.shape[1], 1, 1)
        y = np.asarray(data['class'])
        X, y = shuffle(X, y, random_state=42)
        y = change_classes(y)
        y = to_categorical(y)

        return X, y

    def evaluation(X_test, y_test, n_classes):
        y_pred_prob = model.predict_proba(X_test)

        n = 10
        probs = np.array_split(y_pred_prob, n)

        score = []
        mean = []
        std = []

        Y = []
        for prob in probs:
            ys = np.zeros(n_classes)  # [0, 0
            for class_i in range(n_classes):
                for j in prob:
                    ys[class_i] = ys[class_i] + j[class_i]

            ys[:] = [x / len(prob) for x in ys]

            Y.append(np.asarray(ys))

        ep = 1e-12
        tmp = []
        for s in range(n):
            kl = (probs[s] * np.log((probs[s] + ep) / Y[s])).sum(axis=1)
            E = np.mean(kl)
            IS = np.exp(E)
            # pdb.set_trace()
            tmp.append(IS)

        score.append(tmp)
        mean.append(np.mean(tmp))
        std.append(np.std(tmp))

        print('Inception Score:\nMean score : ', mean[-1])
        print('Std : ', std[-1])

        return score, mean, std

    def check_dir(directory):
        if not os.path.exists(directory):
            os.makedirs(directory)

    check_dir('TRTS_' + date)
    check_dir('TRTS_' + date + '/train/')
    check_dir('TRTS_' + date + '/train/')
    check_dir('TRTS_' + date + '/train/' + folder)
    check_dir('TRTS_' + date + '/test/')
    check_dir('TRTS_' + date + '/test/' + folder)

    # if os.path.isfile('TRTS_' + date + '/train/' + folder + '/train_model.h5'):
    #    os.remove('TRTS_' + date + '/train/' + folder + '/train_model.h5')
    #    shutil.rmtree('TRTS_' + date + '/test/' + folder)

    # else:

    irr = True
    one_d = False

    ## Train on real

    # dataset_real = 'catalina_random_full_north_9classes'
    if irr == True:
        X_train, y_train, X_val, y_val, X_test, y_test = read_data_original_irr(
            'TSTR_data/' + in_TSTR_FOLDER + dataset_real + '.pkl')  # datasets_original/REAL/' + dataset_real + '.pkl')
    else:
        X_train, y_train, X_val, y_val, X_test, y_test = read_data(
            'TSTR_data/' + in_TSTR_FOLDER + dataset_real + '.pkl')

    print('')
    print('Training new model')
    print('')

    batch_size = 512
    epochs = 200

    num_classes = 3

    m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE)

    # if one_d == True:
    #    model = m.cnn()
    # else:
    #    model = m.cnn2()
    if BN_CONDITION == 'batch_norm_':
        model = m.cnn2_batch()
    else:
        model = m.cnn2()

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    ## callbacks
    history = my_callbacks.Histories()
    # rocauc = my_callbacks.ROC_AUC(X_train, y_train, X_test, y_test)
    # inception = my_callbacks.Inception(X_test, num_classes)

    checkpoint = ModelCheckpoint('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5', monitor='val_acc',
                                 verbose=1, save_best_only=True, mode='max')
    earlyStopping = EarlyStopping(monitor='val_acc', min_delta=0.00000001, patience=PATIENCE, verbose=1, mode='max')

    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val),
              callbacks=[history,
                         checkpoint,
                         earlyStopping  # ,
                         # rocauc,
                         # inception
                         ])

    model = load_model('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5')
    os.remove('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5')

    # Create dictionary, then save into two different documments.
    ## Loss
    history_dictionary_loss = history.loss
    np.save('TRTS_' + date + '/train/' + folder + '/train_history_loss.npy', history_dictionary_loss)
    ## Val Loss
    history_dictionary_val_loss = history.val_loss
    np.save('TRTS_' + date + '/train/' + folder + '/train_history_val_loss.npy', history_dictionary_val_loss)
    ## Acc
    history_dictionary_acc = history.acc
    np.save('TRTS_' + date + '/train/' + folder + '/train_history_acc.npy', history_dictionary_acc)
    ## Val Acc
    history_dictionary_val_acc = history.val_acc
    np.save('TRTS_' + date + '/train/' + folder + '/train_history_val_acc.npy', history_dictionary_val_acc)
    ## AUC ROC
    # roc_auc_dictionary = rocauc.roc_auc
    # np.save('TRTS_' + date + '/train/' + folder + '/train_rocauc_dict.npy', roc_auc_dictionary)
    ## IS
    # scores_dict = inception.score
    # np.save('TRTS_' + date + '/train/' + folder + '/train_is.npy', scores_dict)
    # mean_scores_dict = inception.mean
    # np.save('TRTS_' + date + '/train/' + folder + '/train_is_mean.npy', mean_scores_dict)
    # std_scores_dict = inception.std
    # np.save('TRTS_' + date + '/train/' + folder + '/train_is_std.npy', std_scores_dict)

    ### plot loss and validation_loss v/s epochs
    plt.figure(1)
    plt.yscale("log")
    plt.plot(history.loss)
    plt.plot(history.val_loss)
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper right')
    plt.savefig('TRTS_' + date + '/train/' + folder + '/train_loss.png')
    ### plot acc and validation acc v/s epochs
    plt.figure(2)
    plt.yscale("log")
    plt.plot(history.acc)
    plt.plot(history.val_acc)
    plt.title('model acc')
    plt.ylabel('Acc')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper right')
    plt.savefig('TRTS_' + date + '/train/' + folder + '/train_acc.png')

    print('Training metrics:')
    # print('Inception Score:\nMean score : ', mean_scores_dict[-1])
    # print('Std : ', std_scores_dict[-1])

    # model = load_model('TRTS_' + date + '/train/' + folder + '/weights.best.train.hdf5')

    score_train = model.evaluate(X_train, y_train, verbose=1)
    score_val = model.evaluate(X_val, y_val, verbose=1)

    print('ACC : ', score_train[1])
    print('VAL_ACC : ', score_val[1])
    print('LOSS : ', score_train[0])
    print('VAL_LOSS : ', score_val[0])

    ## Test on synthetic

    print('\nTest metrics:')
    print('\nTest on real:')

    dataset_syn = folder + '_generated'

    # sc, me, st = evaluation(X_test, y_test, num_classes)
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_is.npy', sc)
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_is_mean.npy', me)
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_is_std.npy', st)

    score = model.evaluate(X_test, y_test, verbose=1)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_score.npy', score)

    # y_pred = model.predict(X_test)
    # roc = roc_auc_score(y_test, y_pred)
    # print('auc roc', roc)
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onreal_rocauc.npy', roc)

    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'] = {
        'test loss on real': score[0], 'Test accuracy on real': score[1]  # , 'auc roc on real': roc
    }

    print('\nTest on synthetic:')
    if irr == True:
        X_train2, y_train2, X_val2, y_val2, X_test2, y_test2 = read_data_generated_irr(
            'TSTR_data/generated/' + folder + '/' + dataset_syn + '.pkl')
    else:
        X_train2, y_train2, X_val2, y_val2, X_test2, y_test2 = read_data(
            'TSTR_data/generated/' + folder + '/' + dataset_syn + '.pkl')

    # sc, me, st = evaluation(X_test2, y_test2, num_classes)
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_is.npy', sc)
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_is_mean.npy', me)
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_is_std.npy', st)

    score = model.evaluate(X_test2, y_test2, verbose=1)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_score.npy', score)

    # y_pred = model.predict(X_test2)
    # roc = roc_auc_score(y_test2, y_pred)
    # print('auc roc', roc)

    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['training'] = {
        #   'IS Mean': mean_scores_dict[-1],
        #   'IS Std': std_scores_dict[-1], 'ACC': np.mean(history_dictionary_acc),
        'VAL_ACC': score_val[1], 'TRAIN_ACC': score_train[1],
        'TRAIN_LOSS': score_train[0], 'VAL_LOSS': score_val[0]
    }

    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing']['test loss on syn'] = score[0]
    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing']['Test accuracy on syn'] = score[1]
    # result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing']['auc roc on syn'] = roc
    # np.save('TRTS_' + date + '/test/' + folder + '/test_onsyn_rocauc.npy', roc)
    keras.backend.clear_session()
    del model
Exemple #6
0
def main(result_dict={},
         PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE=1.0,
         v=''):
    real_data_folder = os.path.join('datasets_original', 'REAL')
    dataset_real_pkl = '%s%.2f.pkl' % (
        BASE_REAL_NAME, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    syn_data_name = os.path.join(
        '%s%s%.2f' %
        (BASE_REAL_NAME, v, PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE))

    PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY = str(
        PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE)
    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY] = {
        'training': {},
        'testing': {}
    }
    print("REAL Training set to load %s" % dataset_real_pkl)
    print("SYN Training set to load %s" % syn_data_name)

    def read_data(file):

        with open(file, 'rb') as f:
            data = pickle.load(f)

        X_train = np.asarray(data[0]['generated_magnitude'])
        # print(X_train.shape)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        X_val = np.asarray(data[1]['generated_magnitude'])
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1, 1)
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        X_test = np.asarray(data[2]['generated_magnitude'])
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_original_irr(file):

        with open(file, 'rb') as f:
            data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['original_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1,
                                  X_train.shape[2])

        y_train = np.asarray(data[0]['class'])

        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['original_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1,
                              X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['original_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1,
                                X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def read_data_generated_irr(file):

        with open(file, 'rb') as f:
            data = pickle.load(f)

        print(data[0].keys())

        mgt = np.asarray(data[0]['generated_magnitude'])
        t = np.asarray(data[0]['time'])
        X_train = np.stack((mgt, t), axis=-1)
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1,
                                  X_train.shape[2])
        # print(X_train.shape)
        y_train = np.asarray(data[0]['class'])
        print(np.unique(y_train))
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        #  for i in y_train:
        #     if i != None:
        #        print(i)
        y_train = change_classes(y_train)
        y_train = to_categorical(y_train)

        mgt = np.asarray(data[1]['generated_magnitude'])
        t = np.asarray(data[1]['time'])
        X_val = np.stack((mgt, t), axis=-1)
        X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1,
                              X_val.shape[2])
        y_val = np.asarray(data[1]['class'])
        y_val = change_classes(y_val)
        y_val = to_categorical(y_val)
        X_val, y_val = shuffle(X_val, y_val, random_state=42)

        mgt = np.asarray(data[2]['generated_magnitude'])
        t = np.asarray(data[2]['time'])
        X_test = np.stack((mgt, t), axis=-1)
        X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1,
                                X_test.shape[2])
        y_test = np.asarray(data[2]['class'])
        y_test = change_classes(y_test)
        y_test = to_categorical(y_test)
        X_test, y_test = shuffle(X_test, y_test, random_state=42)

        return X_train, y_train, X_val, y_val, X_test, y_test

    def change_classes(targets):
        # print(targets)
        target_keys = np.unique(targets)
        # print(target_keys)
        target_keys_idxs = np.argsort(np.unique(targets))
        targets = target_keys_idxs[np.searchsorted(target_keys,
                                                   targets,
                                                   sorter=target_keys_idxs)]

        return targets

    def open_data(file):

        with open(file, 'rb') as f:
            data = pickle.load(f)

        print(len(data['generated_magnitude']))

        X = np.asarray(data['generated_magnitude'])
        X = X.reshape(X.shape[0], X.shape[1], 1, 1)
        y = np.asarray(data['class'])
        X, y = shuffle(X, y, random_state=42)
        y = change_classes(y)
        y = to_categorical(y)

        return X, y

    def check_dir(directory):
        if not os.path.exists(directory):
            os.makedirs(directory)

    check_dir('TSTR_' + date)
    check_dir('TSTR_' + date + '/train/')
    check_dir('TSTR_' + date + '/train/')
    check_dir('TSTR_' + date + '/train/' + syn_data_name)
    check_dir('TSTR_' + date + '/test/')
    check_dir('TSTR_' + date + '/test/' + syn_data_name)

    # if else
    irr = True
    dataset_syn_pkl = syn_data_name + '_generated.pkl'
    one_d = False

    ## Train on synthetic

    X_train_syn, y_train_syn, X_val_syn, y_val_syn, X_test_syn, y_test_syn = read_data_generated_irr(
        os.path.join('TSTR_data', 'generated', syn_data_name, dataset_syn_pkl))
    X_train_real, y_train_real, X_val_real, y_val_real, X_test_real, y_test_real = read_data_original_irr(
        os.path.join('TSTR_data', real_data_folder, dataset_real_pkl))

    print('')
    print('Training new model')
    print('')

    batch_size = 512
    epochs = 10000

    num_classes = 3

    m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE)

    if BN_CONDITION == 'batch_norm_':
        model = m.cnn2_batch()
    else:
        model = m.cnn2()

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    ## callbacks
    history = my_callbacks.Histories()

    checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + syn_data_name +
                                 '/weights.best.trainonsynthetic.hdf5',
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max')
    earlyStopping = EarlyStopping(monitor='val_acc',
                                  min_delta=0.00000001,
                                  patience=PATIENCE,
                                  verbose=1,
                                  mode='max')

    model.fit(
        X_train_syn,
        y_train_syn,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val_real, y_val_real),
        callbacks=[
            history,
            checkpoint,
            earlyStopping  # ,
            # rocauc,
            # inception
        ])

    model = load_model('TSTR_' + date + '/train/' + syn_data_name +
                       '/weights.best.trainonsynthetic.hdf5')

    print('Training metrics:')

    score_train = model.evaluate(X_train_syn, y_train_syn, verbose=1)
    score_val = model.evaluate(X_val_real, y_val_real, verbose=1)

    print('ACC : ', score_train[1])
    print('VAL_ACC : ', score_val[1])
    print('LOSS : ', score_train[0])
    print('VAL_LOSS : ', score_val[0])

    #fine tunning
    #K.set_value(model.optimizer.lr, 0.00005)

    checkpoint = ModelCheckpoint('TSTR_' + date + '/train/' + syn_data_name +
                                 '/weights.best.trainfinetune.hdf5',
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max')
    earlyStopping = EarlyStopping(monitor='val_acc',
                                  min_delta=0.00000001,
                                  patience=PATIENCE_FINE,
                                  verbose=1,
                                  mode='max')
    model.fit(
        X_train_real,
        y_train_real,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val_real, y_val_real),
        callbacks=[
            history,
            checkpoint,
            earlyStopping  # ,
            # rocauc,
            # inception
        ])

    model = load_model('TSTR_' + date + '/train/' + syn_data_name +
                       '/weights.best.trainfinetune.hdf5')

    ## Test on real

    score_val = model.evaluate(X_val_real, y_val_real, verbose=1)

    print('fine tune VAL_ACC : ', score_val[1])
    print('fine tune VAL_LOSS : ', score_val[0])

    print('\nTest metrics:')
    print('\nTest on real:')

    score = model.evaluate(X_test_real, y_test_real, verbose=1)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY][
        'testing'] = {
            'test loss on real': score[0],
            'Test accuracy on real': score[1]  # , 'auc roc on real': roc
        }

    ## Test on syn

    print('\nTest on synthetic:')

    score = model.evaluate(X_test_syn, y_test_syn, verbose=1)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY][
        'training'] = {
            'VAL_ACC': score_val[1],
            'TRAIN_ACC': score_train[1],
            'TRAIN_LOSS': score_train[0],
            'VAL_LOSS': score_val[0]
        }

    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'][
        'test loss on syn'] = score[0]
    result_dict[PERCENTAGE_OF_SAMPLES_TO_KEEP_FOR_DISBALANCE_KEY]['testing'][
        'Test accuracy on syn'] = score[1]

    keras.backend.clear_session()
    del model
Exemple #7
0
def main(result_dict={},
         percentage_of_samples_to_keep_for_imbalance=1.0,
         v=''):
    real_data_folder = os.path.join('datasets_original', 'REAL')
    dataset_real_pkl = '%s%.2f.pkl' % (
        BASE_REAL_NAME, percentage_of_samples_to_keep_for_imbalance)
    syn_data_name = os.path.join(
        '%s%s%.2f' %
        (BASE_REAL_NAME, v, percentage_of_samples_to_keep_for_imbalance))

    percentage_of_samples_to_keep_for_imbalance_key = str(
        percentage_of_samples_to_keep_for_imbalance)
    result_dict[percentage_of_samples_to_keep_for_imbalance_key] = {
        'training': {},
        'testing': {}
    }
    print("\nREAL Training set to load %s" % dataset_real_pkl)
    print("SYN Training set to load %s" % syn_data_name)

    dataset_syn_pkl = syn_data_name + '_generated.pkl'

    # load syn and real data
    x_train_syn, y_train_syn, x_val_syn, y_val_syn, x_test_syn, y_test_syn = read_data_irregular_sampling(
        os.path.join('TSTR_data', 'generated', syn_data_name, dataset_syn_pkl),
        magnitude_key='generated_magnitude',
        time_key='time')
    x_train_real, y_train_real, x_val_real, y_val_real, x_test_real, y_test_real = read_data_irregular_sampling(
        os.path.join('TSTR_data', real_data_folder, dataset_real_pkl),
        magnitude_key='generated_magnitude',
        time_key='time')

    ## Train on synthetic
    print('\nTraining new model\n')
    batch_size = 512
    epochs = 10000
    num_classes = 3
    # choose model
    m = Model_(batch_size, 100, num_classes, drop_rate=DROP_OUT_RATE)
    if BN_CONDITION == 'batch_norm_':
        model = m.cnn2_batch()
    else:
        model = m.cnn2()

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    ## callbacks
    history = my_callbacks.Histories()
    weight_folder = os.path.join('TSTR_' + date, 'train', RESULTS_NAME,
                                 syn_data_name)
    check_dir(weight_folder)
    checkpoint = ModelCheckpoint(os.path.join(
        weight_folder, 'weights.best.trainonsynthetic.hdf5'),
                                 monitor=EARLY_STOP_ON,
                                 verbose=1,
                                 save_best_only=True,
                                 mode=EARLY_STOP_ON_COD)
    earlyStopping = EarlyStopping(monitor=EARLY_STOP_ON,
                                  min_delta=0.00000001,
                                  patience=PATIENCE,
                                  verbose=1,
                                  mode=EARLY_STOP_ON_COD)

    model.fit(x_train_syn,
              y_train_syn,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(x_val_real, y_val_real),
              callbacks=[history, checkpoint, earlyStopping])
    model = load_model(
        os.path.join(weight_folder, 'weights.best.trainonsynthetic.hdf5'))

    print('Syn Training metrics:')
    score_train = model.evaluate(x_train_syn, y_train_syn, verbose=1)
    score_val = model.evaluate(x_val_real, y_val_real, verbose=1)
    score_tstr = model.evaluate(x_test_real, y_test_real, verbose=1)
    print('ACC : ', score_train[1])
    print('VAL_ACC : ', score_val[1])
    print('LOSS : ', score_train[0])
    print('VAL_LOSS : ', score_val[0])
    print('TSTR loss: %f ;-; accuracy: %f' % (score_tstr[0], score_tstr[1]))
    result_dict[percentage_of_samples_to_keep_for_imbalance_key]['testing'] = {
        'tstr loss': score_tstr[0],
        'tstr accuracy': score_tstr[1]
    }

    # fine tunning
    K.set_value(model.optimizer.lr, K.eval(model.optimizer.lr) * LR_VAL_MULT)
    checkpoint = ModelCheckpoint(os.path.join(
        weight_folder, 'weights.best.trainfinetune.hdf5'),
                                 monitor=EARLY_STOP_ON,
                                 verbose=1,
                                 save_best_only=True,
                                 mode=EARLY_STOP_ON_COD)
    earlyStopping = EarlyStopping(monitor=EARLY_STOP_ON,
                                  min_delta=0.00000001,
                                  patience=PATIENCE_FINE,
                                  verbose=1,
                                  mode=EARLY_STOP_ON_COD)

    model.fit(x_train_real,
              y_train_real,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(x_val_real, y_val_real),
              callbacks=[history, checkpoint, earlyStopping])
    model = load_model(
        os.path.join(weight_folder, 'weights.best.trainfinetune.hdf5'))

    ## Test on real
    score_val = model.evaluate(x_val_real, y_val_real, verbose=1)
    print('fine tune VAL_ACC : ', score_val[1])
    print('fine tune VAL_LOSS : ', score_val[0])

    print('\nTest metrics:')
    print('\nTest on real:')
    score = model.evaluate(x_test_real, y_test_real, verbose=1)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    result_dict[percentage_of_samples_to_keep_for_imbalance_key]['testing'][
        'test loss on real'] = score[0]
    result_dict[percentage_of_samples_to_keep_for_imbalance_key]['testing'][
        'Test accuracy on real'] = score[1]
    result_dict[percentage_of_samples_to_keep_for_imbalance_key][
        'training'] = {
            'VAL_ACC': score_val[1],
            'TRAIN_ACC': score_train[1],
            'TRAIN_LOSS': score_train[0],
            'VAL_LOSS': score_val[0]
        }

    keras.backend.clear_session()
    del model