def compare():
    logger.log('Compare all features, 6 features and 5 features')

    mse_all = hist1['Q1']['a']['val_mse']
    best_6 = val_mse[[c for c in df.columns
                      if len(c.split(",")) == 1]].idxmin()
    mse_6 = hist2['Q2'][best_6]['val_mse']
    best_5 = val_mse[[c for c in df.columns
                      if len(c.split(",")) == 2]].idxmin()
    mse_5 = hist2['Q2'][best_5]['val_mse']

    f, ax = plt.subplots(figsize=(10, 5))
    plt.plot(mse_all, label='all features')
    plt.plot(mse_6, label='6 features')
    plt.plot(mse_5, label='5 features')
    plt.xlabel('epoch', fontsize=18)
    plt.ylabel('mean squared error', fontsize=18)
    plt.legend(loc='upper right', fontsize=18)
    plt.title('MSE for Different Numbers of Input Features',
              fontsize=20,
              pad=20)
    plt.xlim(1000, 5000)
    plt.ylim(0.0058, 0.010)
    plt.xticks(fontsize=12, wrap=True)
    plt.yticks(fontsize=12, wrap=True)

    plt.tight_layout()
    plt.savefig('result/BQ2_compare.png')
    logger.log('Saved result to \"result/BQ2_compare.png\"')

    final = {
        'mse_converge': {
            "all_features": loss_converge_epoch(mse_all),
            "6_features": loss_converge_epoch(mse_6),
            "5_features": loss_converge_epoch(mse_5)
        },
        'final_mse': {
            "all_features": training_result(mse_all, mode='loss'),
            "6_features": training_result(mse_6, mode='loss'),
            "5_features": training_result(mse_5, mode='loss')
        }
    }
    final_df = pd.DataFrame(final)
    final_df.to_csv('result/BQ2_result.csv')
    logger.log('Saved result to \"result/BQ2_result.csv\"')

    # update hyperparameter
    result = min(final['final_mse'].keys(),
                 key=lambda x: final['final_mse'][x])
    if result == 'all_features':
        removed = []
    elif result == '6_features':
        removed = list(map(int, best_6.split(",")))
    elif result == '5_features':
        removed = list(map(int, best_5.split(",")))

    hyperparameters['input_shape'] = (len(columns) - len(removed), )
    hyperparameters['removed'] = removed
    write_dict_to_json(hyperparameters, args.params)
Beispiel #2
0
logger.log('Training...')
X_train, y_train = dataset.get_train()
X_test, y_test = dataset.get_test()

with tqdm(total=epochs) as pbar:
    update = tf.keras.callbacks.LambdaCallback(
        on_epoch_end=lambda batch, logs: pbar.update(1)
    )
    
    hist = model.fit(
        x=X_train,
        y=y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(X_test, y_test),
        verbose=0,
        callbacks=[update]
    )
    
histories['Q5'] = {
    'accuracy': hist.history['accuracy'],
    'val_accuracy': hist.history['val_accuracy'],
    'loss': hist.history['loss'],
    'val_loss': hist.history['val_loss']
}

# output to json
logger.log('Saving result to \"result/AQ5.json\"')
write_dict_to_json(filter_dict(histories, ['seed', 'Q5']), 'result/AQ5.json')

logger.end('Stopped q5_train.py')
logger.log('Training...')
X_train, y_train = dataset.get_train()
X_test, y_test = dataset.get_test()

with tqdm(total=epochs) as pbar:
    update = tf.keras.callbacks.LambdaCallback(
        on_epoch_end=lambda batch, logs: pbar.update(1)
    )
    
    hist = model.fit(
        x=X_train,
        y=y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(X_test, y_test),
        verbose=0,
        callbacks=[update]
    )
    
histories['Q5'] = {
    'accuracy': hist.history['accuracy'],
    'val_accuracy': hist.history['val_accuracy'],
    'loss': hist.history['loss'],
    'val_loss': hist.history['val_loss']
}

# output to json
logger.log('Saving result to \"result/additional.json\"')
write_dict_to_json(filter_dict(histories, ['seed', 'Q5']), 'result/additional.json')

logger.end('Stopped additional_train.py')
Beispiel #4
0
num_classes = 3
epochs = 1000
batch_size = 32
num_neurons = 10
alpha = 0.01
beta = 1e-6

hyperparameters = {
    "input_shape": input_shape,
    "num_classes": num_classes,
    "batch_size": batch_size,
    "num_neurons": num_neurons,
    "alpha": alpha,
    "beta": beta
}
write_dict_to_json(hyperparameters, args.params)

# create and compile model
logger.log('Creating model...')
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=input_shape),
    tf.keras.layers.Dense(num_neurons,
                          activation='relu',
                          kernel_regularizer=tf.keras.regularizers.l2(l=beta)),
    tf.keras.layers.Dense(num_classes,
                          activation='softmax',
                          kernel_regularizer=tf.keras.regularizers.l2(l=beta))
])

model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=alpha),
Beispiel #5
0
def cross_validation():
    input_shape = hyperparameters['input_shape']
    num_classes = hyperparameters['num_classes']
    epochs = 500
    batch_sizes = [4, 8, 16, 32, 64]
    num_neurons = hyperparameters['num_neurons']
    alpha = hyperparameters['alpha']

    histories['Q2'] = {
        'cv': {
            'accuracy': {batch: [] for batch in batch_sizes},
            'time': {batch: [] for batch in batch_sizes},
        },
        'optimal': dict()
    }

    logger.log('Starting cross validation...')
    X, y = dataset.get_train()
    X_test, y_test = dataset.get_test()

    for fold, (train_index, valid_index) in enumerate(dataset.get_kfold(), start=1):
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        for batch in batch_sizes:
            logger.log('Fold %s Batch Size %s' % (fold, batch))
            with tqdm(total=epochs, desc='Fold %s Batch Size %s' % (fold, batch)) as pbar:
                update = tf.keras.callbacks.LambdaCallback(
                    on_epoch_end=lambda batch, logs: pbar.update(1)
                )
            
                model = tf.keras.Sequential([
                    tf.keras.layers.InputLayer(input_shape=input_shape),
                    tf.keras.layers.Dense(num_neurons, activation='relu'),
                    tf.keras.layers.Dense(num_classes, activation='softmax')
                ])

                model.compile(
                    optimizer=tf.keras.optimizers.SGD(learning_rate=alpha),
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                    metrics=['accuracy']
                )

                start = time.time()
                history = model.fit(
                    x=X_train,
                    y=y_train,
                    batch_size=batch,
                    epochs=epochs,
                    validation_data=(X_valid, y_valid),
                    verbose=0,
                    callbacks=[update]
                )
                end = time.time()
            
                histories['Q2']['cv']['accuracy'][batch].append(history.history['val_accuracy'])
                histories['Q2']['cv']['time'][batch].append((end - start) / epochs)
    

    batch_epoch = pd.DataFrame(histories['Q2']['cv']['accuracy'])
    time_per_epoch = pd.DataFrame(histories['Q2']['cv']['time'])
    epoch_to_converge = batch_epoch.applymap(lambda x: acc_converge_epoch(x))
    cv_accuracy = batch_epoch.applymap(lambda x: training_result(x)).mean()
    total_time_to_converge = (epoch_to_converge * time_per_epoch).mean()

    def standardize(arr):
        return (arr - arr.mean()) / arr.std()

    def deciding_factor(total_time, acc):
        time_score = np.exp(-standardize(total_time))
        acc_score = np.exp(standardize(acc))
        print(acc_score * time_score)
        return int((acc_score * time_score).idxmax())

    hyperparameters['batch_size'] = deciding_factor(total_time_to_converge, cv_accuracy)
    logger.log('Optimal batch size: %s' % hyperparameters['batch_size'])
    write_dict_to_json(hyperparameters, args.params)

    logger.log('Done cross validation')
Beispiel #6
0
                            fold=5)
X_train, y_train = dataset.get_train()
X_test, y_test = dataset.get_test()

# Defining hyperparameters
try:
    hyperparameters = read_json_to_dict(args.params)
except:
    hyperparameters = {
        "input_shape": (7, ),
        "batch_size": 8,
        "num_neurons": 10,
        "alpha": 1e-3,
        "beta": 1e-3
    }
    write_dict_to_json(hyperparameters, args.params)

epochs = 5000
batch_size = hyperparameters['batch_size']
num_neurons = hyperparameters['num_neurons']
alpha = hyperparameters['alpha']
beta = hyperparameters['beta']

# Histories of results
histories = {'seed': SEED, 'columns': df.columns[1:8].tolist(), 'Q2': dict()}
write_dict_to_json(filter_dict(histories, ['seed', 'columns', 'Q2']),
                   'result/BQ2.json')


# RFE functions
def remove_features(f_arr, remove):
Beispiel #7
0
def cross_validation():
    input_shape = hyperparameters['input_shape']
    num_classes = hyperparameters['num_classes']
    epochs = 500
    batch_size = hyperparameters['batch_size']
    num_neurons = hyperparameters['num_neurons']
    alpha = hyperparameters['alpha']
    beta = [0, 1e-3, 1e-6, 1e-9, 1e-12]

    histories['Q4'] = {
        'cv': {
            'accuracy': {b: [] for b in beta},
            'time': {b: [] for b in beta},
        },
        'optimal': dict()
    }

    logger.log('Starting cross validation...')
    X, y = dataset.get_train()
    X_test, y_test = dataset.get_test()

    for fold, (train_index, valid_index) in enumerate(dataset.get_kfold(), start=1):
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        for b in beta:
            logger.log('Fold %s Decay %s' % (fold, b))
            with tqdm(total=epochs, desc='Fold %s Decay %s' % (fold, b)) as pbar:
                update = tf.keras.callbacks.LambdaCallback(
                    on_epoch_end=lambda batch, logs: pbar.update(1)
                )
            
                model = tf.keras.Sequential([
                    tf.keras.layers.InputLayer(input_shape=input_shape),
                    tf.keras.layers.Dense(num_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=b)),
                    tf.keras.layers.Dense(num_classes, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(l=b))
                ])

                model.compile(
                    optimizer=tf.keras.optimizers.SGD(learning_rate=alpha),
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                    metrics=['accuracy']
                )

                start = time.time()
                history = model.fit(
                    x=X_train,
                    y=y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_valid, y_valid),
                    verbose=0,
                    callbacks=[update]
                )
                end = time.time()
            
                histories['Q4']['cv']['accuracy'][b].append(history.history['val_accuracy'])
                histories['Q4']['cv']['time'][b].append((end - start) / epochs)
    
    beta_epoch = pd.DataFrame(histories['Q4']['cv']['accuracy'])
    cv_accuracy = beta_epoch.applymap(lambda x: training_result(x)).mean()
    hyperparameters['beta'] = float(cv_accuracy.idxmax())
    write_dict_to_json(hyperparameters, args.params)
    logger.log('Done cross validation')
logger.log('Loading dataset from \"' + args.data + '\"...')
df = pd.read_csv(args.data)

# Defining hyperparameters
try:
    hyperparameters = read_json_to_dict(args.params)
except:
    hyperparameters = {
        "input_shape": (7, ),
        "features_left": df.columns[1:8].tolist(),
        "batch_size": 8,
        "num_neurons": 10,
        "alpha": 1e-3,
        "beta": 1e-3
    }
    write_dict_to_json(hyperparameters, args.params)

epochs = 10000
num_neurons = 50
dropout = 0.2
batch_size = hyperparameters['batch_size']
alpha = hyperparameters['alpha']
beta = hyperparameters['beta']

# Histories of results
histories = {'seed': SEED, 'Q3': dict()}

features_left = hyperparameters['features_left']
input_shape = hyperparameters['input_shape']

dataset = PreprocessDataset(df=df,
Beispiel #9
0
model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=[
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1,
                                                       name='top1_accuracy',
                                                       dtype=None),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5,
                                                       name='top5_accuracy',
                                                       dtype=None)
    ])
model.trainable = False
results = model.evaluate(test_ds,
                         batch_size=batch_size,
                         verbose=0,
                         return_dict=True)
print(results)

# Save results
if not os.path.exists('./log'):
    os.mkdir('./log')
if not os.path.exists(f'./log/{model.name}'):
    os.mkdir(f'./log/{model.name}')
log_dir = f'./log/{model.name}/' + args.model_weights.split(
    '/')[-1] + '_test.log'

write_dict_to_json(results, log_dir)
logger.log('Saved result to \"' + log_dir + '\"')

logger.end('Done model_test.py')