def main():
    """Main method"""
    data_dir = '../data/01_raw'
    interim_dir = '../data/02_interim'
    processed_dir = '../data/03_processed'
    results_dir = '../results'
    data_config = {
        'data_dir': data_dir,
        'interim_dir': interim_dir,
        'processed_dir': processed_dir,
        'processed_dim': [512, 512],
        'data_labels': ['depth', 'target'],
        'delimiter': '\t',
        'used_data': 0.6,  # Percentage of used original data measurements
        'random_seed': 1234,
        'batch_size': 4,
        'test_size': 0.2
    }
    model_config = {
        'results_dir': results_dir,
        'output_size': 2,
        'arch': 'vgg16',
        'loss': 'BCELoss',
        'max_iter': 100,
        'learning_rate': 0.01,
        'pretrained': True
    }

    misc.gen_dirs([data_dir, interim_dir, processed_dir, results_dir])

    data_processor = DataProcessor(data_config)
Example #2
0
def main():
    """Main method"""
    misc.to_local_dir(__file__)
    misc.gen_dirs([data_dir, processed_dir, plot_dir, model_dir, results_dir])
    # data = np.load(f"{processed_dir}/processed_data.npy")
    np.set_printoptions(suppress=True)
    data = processing(store=True, plot=False)

    # Train/test split
    # train_data, test_data = train_test_split(data, test_size=test_size, random_state=random_seed)
    train_data = np.empty(
        (np.shape(data)[0] - len(test_configs), np.shape(data)[1],
         np.shape(data)[2]))
    test_data = np.empty(
        (len(test_configs), np.shape(data)[1], np.shape(data)[2]))
    test_idx = 0
    train_idx = 0
    for config in test_configs:
        for scenario in data:
            if (scenario[0, 0] == config[0] and scenario[0, 1] == config[1]
                    and scenario[0, 2] == config[2]):
                test_data[test_idx] = scenario
                test_idx += 1

    for scenario in data:
        test_found = False
        for config in test_configs:
            if (scenario[0, 0] == config[0] and scenario[0, 1] == config[1]
                    and scenario[0, 2] == config[2]):
                test_found = True
        if not test_found:
            train_data[train_idx] = scenario
            train_idx += 1

    # Flatten training data by one dimension but keep the shape of the testing data,
    # to being able to test different FRFs separately
    train_data = np.reshape(train_data, (-1, input_size + output_size))

    # Scale data
    x_scaler = MinMaxScaler()
    train_data[:, :input_size] = x_scaler.fit_transform(
        train_data[:, :input_size])
    for test_idx, __ in enumerate(test_data):
        test_data[test_idx, :, :input_size] = x_scaler.transform(
            test_data[test_idx, :, :input_size])

    hyperopts = training(train_data)
    total_errors = np.empty((len(hyperopts), output_size))
    total_variances = np.empty((len(hyperopts), output_size))
    for hyper_idx, hyperopt in enumerate(hyperopts):
        errors, variances = test_frf(hyperopt, test_data, x_scaler)
        total_errors[hyper_idx] = errors
        total_variances[hyper_idx] = variances
        dump(
            hyperopt, '{}/hyperopt_{}.joblib'.format(
                model_dir, hyperopt[0].best_estimator_.__class__.__name__))
    write_results(hyperopts, total_errors, total_variances)
Example #3
0
def main():
    """Main method"""
    misc.to_local_dir(__file__)
    misc.gen_dirs([data_dir, processed_dir, plot_dir, results_dir])
    # Definition of data and learning properties
    np.set_printoptions(suppress=True)
    # data = np.load(f"{processed_dir}/processed_osc.npy")

    n_neighbors = range(10, 21)
    if OSC:
        train_data, test_data = train_test_osc()

        best_score = 0
        best_nbrs = 0
        for local_nbrs in n_neighbors:
            __, __, __, score = interpolate_osc(train_data, test_data,
                                                local_nbrs)
            if score > best_score:
                best_score = score
                best_nbrs = local_nbrs
        freq_errors, gamma_errors, mass_errors, __ = interpolate_osc(
            train_data, test_data, best_nbrs)
        # print("Frequency error: {} +/- {}".format(np.mean(freq_errors), np.std(freq_errors)))
        # print("Gamma error: {} +/- {}".format(np.mean(gamma_errors), np.std(gamma_errors)))
        # print("Mass error: {} +/- {}".format(np.mean(mass_errors), np.std(mass_errors)))
        print(r"{:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f}".
              format(np.mean(freq_errors), np.std(freq_errors),
                     np.mean(gamma_errors), np.std(gamma_errors),
                     np.mean(mass_errors), np.std(mass_errors)))

    else:
        train_data, test_data = train_test_frf()
        best_score = 0
        best_nbrs = 0
        for local_nbrs in n_neighbors:
            __, __, score = interpolate_frf(train_data, test_data, local_nbrs)
            if score > best_score:
                best_score = score
                best_nbrs = local_nbrs
        errors, variances, __ = interpolate_frf(train_data, test_data,
                                                best_nbrs)
        print(
            r"{:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f}"
            .format(errors[0], variances[0], errors[1], variances[1],
                    errors[2], variances[2], errors[3], variances[3]))
Example #4
0
def main():
    """Main method"""
    misc.to_local_dir(__file__)
    misc.gen_dirs([data_dir, plot_dir, model_dir, results_dir])
    generate_data()
Example #5
0
def main():
    """Main method"""
    misc.to_local_dir(__file__)
    misc.gen_dirs([data_dir, processed_dir, plot_dir, model_dir, results_dir])
    # Definition of data and learning properties
    np.set_printoptions(suppress=True)
    # data = np.load(f"{processed_dir}/processed_osc.npy")
    data = read_osc(store=True)

    # Train/test split
    # train_data, test_data = train_test_split(data, test_size=test_size, random_state=random_seed)
    train_data = np.empty(
        (np.shape(data)[0] - len(test_configs), np.shape(data)[1]))
    test_data = np.empty((len(test_configs), np.shape(data)[1]))
    test_idx = 0
    train_idx = 0

    for config in test_configs:
        for row in data:
            if row[0] == config[0] and row[1] == config[1] and row[
                    2] == config[2]:
                test_data[test_idx] = row
                test_idx += 1

    for row in data:
        test_found = False
        for config in test_configs:
            if row[0] == config[0] and row[1] == config[1] and row[
                    2] == config[2]:
                test_found = True
        if not test_found:
            train_data[train_idx] = row
            train_idx += 1

    # import os
    # import math
    # from sklearn.metrics import mean_squared_error
    # regre = ['ElasticNet', 'RandomForestRegressor', 'XGBRegressor']
    # pred_dir = '{}/dmg_hsc75linear/osc_fitting_hypsrch300_predictions_2'.format(results_dir)

    # for reg in regre:
    # files = [
    # filename for filename in os.listdir(pred_dir)
    # if filename.startswith(reg)
    # ]
    # freq_errors = []
    # gamma_errors = []
    # mass_errors = []
    # for config_idx, config in enumerate(test_configs):
    # local_test = test_data[config_idx]
    # local_pred_file = files[0]
    # for idx in range(1, len(files)):
    # file_config = os.path.splitext(files[idx])[0].split('_')[2:]
    # if np.all([float(file_config[jdx]) == config[jdx] for jdx in range(len(config))]):
    # local_pred_file = files[idx]
    # local_pred = np.load('{}/{}'.format(pred_dir, local_pred_file))
    # local_test = local_test[input_size:]
    # freq_error = math.sqrt(mean_squared_error(local_test[::3], local_pred[::3]))
    # gamma_error = math.sqrt(mean_squared_error(local_test[1::3], local_pred[1::3]))
    # mass_error = math.sqrt(mean_squared_error(local_test[2::3], local_pred[2::3]))
    # freq_errors.append(freq_error)
    # gamma_errors.append(gamma_error)
    # mass_errors.append(mass_error)

    # print(reg)
    # # print("Frequency error: {} +/- {}".format(np.mean(freq_errors), np.std(freq_errors)))
    # # print("Gamma error: {} +/- {}".format(np.mean(gamma_errors), np.std(gamma_errors)))
    # # print("Mass error: {} +/- {}".format(np.mean(mass_errors), np.std(mass_errors)))
    # print(r"{:.2f} \pm {:.2f} & {:.2f} \pm {:.2f} & {:.2f} \pm {:.2f}".format(
    # np.mean(freq_errors), np.std(freq_errors),
    # np.mean(gamma_errors), np.std(gamma_errors),
    # np.mean(mass_errors), np.std(mass_errors)
    # ))

    # quit()

    # Scale data
    x_scaler = MinMaxScaler()
    train_data[:, :input_size] = x_scaler.fit_transform(
        train_data[:, :input_size])
    test_data[:, :input_size] = x_scaler.transform(test_data[:, :input_size])

    hyperopts = training(train_data)

    total_errors = np.empty((len(hyperopts), output_size))
    total_variances = np.empty((len(hyperopts), output_size))
    for hyper_idx, hyperopt in enumerate(hyperopts):
        errors, variances = test_osc(hyperopt, test_data, test_configs)
        total_errors[hyper_idx] = errors
        total_variances[hyper_idx] = variances
        dump(
            hyperopt, '{}/hyperopt_{}.joblib'.format(
                model_dir, hyperopt[0].best_estimator_.__class__.__name__))
    write_results(hyperopts, total_errors, total_variances)