Ejemplo n.º 1
0
def train_model():

    IMAGE_MEAN, base_model = load_model()

    X, y = load.load_data(IMAGE_MEAN)
    skf = StratifiedKFold(y, n_folds=5, shuffle=True)
    for fold_number, train_index, test_index in enumerate(skf):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_te, y_te = X[test_index], y[test_index]
        train_index, val_index = train_test_split(train_index)
        X_tr, y_tr = X[train_index], y[train_index]
        X_val, y_val = X[val_index], y[val_index]
        index = {"train": train_index, "val": val_index, "test": val_index}
        pickle.dump(index, open("index_fold{number}.plk".format(number=fold_number), "w"))

        cnn_model = redesign_model(base_model)
        train_fn, val_fn, pred_fn = new_model_params(cnn_model)
        train_one_net(cnn_model, train_fn, val_fn, pred_fn, X_tr, y_tr, X_val, y_val, X_te, y_te, fold_number)

    # X_tr, y_tr, X_val, y_val, X_te, y_te = load.load_dataset_transfer(IMAGE_MEAN)
Ejemplo n.º 2
0
from data_preprocessing import PreprocessedData

import rospkg
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np

rospack = rospkg.RosPack()

map_path = rospack.get_path('krp_localization') + '/maps/test'
train_data_prefix = "test4"
train_data_path = rospack.get_path('krp_localization') + '/data/'

print('Loading train data path: {}'.format(train_data_path))
raw_rssi, poses = load_data(file_name=train_data_prefix,
                            file_path=train_data_path)  #odometry is not needed

print('Number of RSSI dataset         : {}'.format(len(raw_rssi)))
print('Number of AMCL poses           : {}'.format(len(poses)))

traindata = PreprocessedData(raw_rssi,
                             flag_negative_db=True,
                             flag_min_distance=False,
                             flag_fuse_measurements=True,
                             flag_min_points_per_AP=False,
                             flag_mode_filter=False,
                             flag_discard_non_pose=True,
                             poses=poses,
                             filter_fuse_measurements=1)

distance = np.sum(traindata.data['X'][1:]**2 + traindata.data['X'][:-1]**2 -
        test_hdf5_file = args.data_dir + \
            f'/{args.imsize}x{args.imsize}/kle512_lhs1000_val.hdf5'
        ntrain_total, ntest_total = 10000, 1000
    elif args.data == 'channelized':
        train_hdf5_file = args.data_dir + \
            f'/{args.imsize}x{args.imsize}/channel_ng64_n4096_train.hdf5'
        test_hdf5_file = args.data_dir + \
            f'/{args.imsize}x{args.imsize}/channel_ng64_n512_test.hdf5'
        ntrain_total, ntest_total = 4096, 512
    assert args.ntrain <= ntrain_total, f"Only {args.ntrain_total} data "\
        f"available in {args.data} dataset, but needs {args.ntrain} training data."
    assert args.ntest <= ntest_total, f"Only {args.ntest_total} data "\
        f"available in {args.data} dataset, but needs {args.ntest} test data."
    train_loader, _ = load_data(train_hdf5_file,
                                args.ntrain,
                                args.batch_size,
                                only_input=False,
                                return_stats=False)
    test_loader, test_stats = load_data(test_hdf5_file,
                                        args.ntest,
                                        args.test_batch_size,
                                        only_input=False,
                                        return_stats=True)
    y_test_variation = test_stats['y_variation']
    print(f'Test output variation per channel: {y_test_variation}')

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    scheduler = OneCycleScheduler(lr_max=args.lr,
                                  div_factor=args.lr_div,
Ejemplo n.º 4
0
from utils.load import load_data
from utils.preprocess import preprocess_data
from features.importance import use_most_important_features
from parameters.optimize import optimize_hyperparameters
import performance.learning_curves
from performance.learning_curves import plot_learning_curves
from performance.learning_curves import plot_ROC_curve
from prediction.predict import predict_survival

# set pandas print width
pd.options.display.width = 200

# load data
print("Loading Data...")
DIR = "./data"
train_df, test_df = load_data(DIR)

# preprocess, massage, scale, merge and clean data
print("Preprocessing Data...")
train_df, test_df = preprocess_data(train_df, test_df)

# use only most important features
print("Extracting Most Important Features...")
train_df, test_df = use_most_important_features(train_df, test_df)

# optimize hyperparameters
print("Optimizing Hyperparameters...")
optimize_hyperparameters(train_df)

# plot learning curves
print("Plot Learning Curves...")
Ejemplo n.º 5
0
    ci_and_tc = (torch.cat(convo_ids, dim=0),
                 torch.cat(turn_counts, dim=0)) if args.cascade else (0, 0)

    utils = {'kb_labels': kb_labels, 'ci_and_tc': ci_and_tc}
    metrics, res_name = quantify(args, grouped_preds, grouped_labels, utils)
    exp_logger.end_eval(metrics, kind=args.filename)
    return (metrics, res_name) if split == 'dev' else metrics


if __name__ == "__main__":
    args = solicit_params()
    args = setup_gpus(args)
    set_seed(args)

    ckpt_dir, cache_results = check_directories(args)
    raw_data = load_data(args, cache_results[1])
    tokenizer, ontology = load_tokenizer(args)
    features, mappings = process_data(args, tokenizer, ontology, raw_data,
                                      *cache_results)
    exp_logger = ExperienceLogger(args, ckpt_dir)

    if args.task == 'ast':
        datasets = {
            split: ActionDataset(args, feats)
            for split, feats in features.items()
        }
        model = ActionStateTracking(args, mappings, ckpt_dir)
    elif args.task == 'cds':
        datasets = {
            split: CascadeDataset(args, feats)
            for split, feats in features.items()
Ejemplo n.º 6
0
def create_mne_raw_object(save=False, proj=True):
    eeg1, eeg2, eeg3, eeg4, eeg5, eeg6, eeg7, pulse, x, y, z = load_data("../data/raw/X_train.h5")
    data = np.zeros([12, np.concatenate(eeg1).shape[0]])


    data[0, :] = np.concatenate(eeg1)
    del eeg1
    data[1, :] = np.concatenate(eeg2)
    del eeg2
    data[2, :] = np.concatenate(eeg3)
    del eeg3
    gc.collect()

    data[3, :] = np.concatenate(eeg4)
    del eeg4
    data[4, :] = np.concatenate(eeg5)
    del eeg5
    data[5, :] = np.concatenate(eeg6)
    del eeg6
    gc.collect()

    data[6, :] = np.concatenate(eeg7)
    del eeg7
    data[7, :] = np.concatenate(sync_matrix(pulse))
    del pulse
    gc.collect()

    data[8, :] = np.concatenate(sync_matrix(x))
    del x
    data[9, :] = np.concatenate(sync_matrix(y))
    del y
    data[10, :] = np.concatenate(sync_matrix(z))
    del z
    gc.collect()

    # get chanel names
    ch_names = ["Fpz", "O1", "F7", "F8", "Fp2", "O2", "Fp1", "pulse", "x", "y", "z", "sleep_state"]
    ch_types = [*['eeg'] * 7, 'ecg', *['misc'] * 3, "stim"]

    # scale
    data *= 1e-7
    data[7] *= 1e-2

    # create and populate MNE info structure
    info = mne.create_info(ch_names, sfreq=250.0, ch_types=ch_types)
    # create raw object
    raw = mne.io.RawArray(data, info, verbose=False)
    raw.set_montage("standard_1020")

    del data
    gc.collect()

    Y = pd.read_csv("../data/raw/y_train.csv")
    Y = np.array(Y.sleep_stage) + 1

    new_events = mne.make_fixed_length_events(raw, start=0.001, stop=148128.5, duration=6.)
    new_events.shape
    new_events[:, 2] = Y
    raw.add_events(new_events, stim_channel ='sleep_state')


    if proj == True:
        projs = mne.compute_proj_raw(raw, n_grad=0, n_mag=0, n_eeg=2, n_jobs=4)
        raw.add_proj(projs)
    if save == True:
        raw.save("../data/mne/X_train_raw.fif")

    return raw