Beispiel #1
0
def crossvalidate_run(train_dataset, test_set, labels, splits=10):
    testaccuracy, trainaccuracy, trainaccuracy_last, test_cost, train_cost, times = [], [], [], [], [], []  # for saving results of different runs
    """function to use 'splits'-fold cross validation, writing results to CV_log.txt"""
    basepath = config.logspath[:-1] + "_cv" + str(splits) + '/'
    fold = 0
    kf = KFold(n_splits=splits, shuffle=True)  # initialize k-fold cross validation

    if config.write_file or config.plot:
        if not os.path.isdir(basepath):
            os.makedirs(basepath)  # make sure directory exists for writing the log file
        cvlog = open(basepath + '/CV_log.txt', 'w+')

    # get the data
    if config.plot:  # test set stays same, but train/val is different for every split
        plotlib.plot_histogram(test_set.labels, 'Test data', basepath, 'test_histogram', labels)

    # cross validation loop
    for train_idx, val_idx in kf.split(train_dataset.data, train_dataset.labels):
        # construct data set with train/val data for this "fold"
        train_set = datahandler.make_dataset(train_dataset.data[train_idx], train_dataset.labels[train_idx])
        val_set = datahandler.make_dataset(train_dataset.data[val_idx], train_dataset.labels[val_idx])
        # set own path for each run
        config.logspath = basepath + 'fold_' + str(fold) + '/'
        config.disp_train = int(train_set.num_examples / (config.display_steps * config.batch_size))  # steps per epoch regardless batch size
        # run optimization and evaluation procedure
        testacc, trainacc, trainacc_last, duration, valacc = \
            CNN_framework.run(config,
                              dataset=datahandler.make_datasets(train_set, val_set, test_set),
                              classlabels=labels)
        # plot histograms
        if config.plot:
            plotlib.plot_histogram(train_set.labels, 'Training data', config.logspath, 'train_histogram',
                                   labels)
            plotlib.plot_histogram(val_set.labels, 'Validation data', config.logspath,
                                   'validation_histogram',
                                   labels)
        testaccuracy.append(testacc)
        trainaccuracy.append(trainacc)
        trainaccuracy_last.append(trainacc_last)
        times.append(duration)
        print("test accuracy in fold %s: %s " % (fold, testacc))
        print("train accuracy in fold %s: %s" % (fold, trainacc))
        print("last train acc. in fold %s: %s" % (fold, trainacc_last))
        fold += 1
    mean_testacc = np.mean(testaccuracy)
    if config.write_file:
        cvlog.write("test accuracies: %s\n" % testaccuracy)
        cvlog.write("train accuracies: %s\n" % trainaccuracy)
        cvlog.write("last train acc: %s\n" % trainaccuracy_last)
        cvlog.write("average test accuracy: %s" % mean_testacc)
        cvlog.write("average training time: %s" % np.mean(times))
        cvlog.close()
    return mean_testacc
Beispiel #2
0
def datasetsize_inspectionrun(alltrain, test_set, labels, splits=10):
    orig_path = config.logspath
    # scale such that remaining _train_ set holds power of 2 data samples
    sizes = (np.ceil(np.asarray([256, 512, 1024, 2048, 4096, 8192, 10000]) * splits / (splits - 1)))
    sizes = [int(i) for i in sizes]     # make them integers
    i = 0
    for size in sizes:
        config.logspath = orig_path + 'tr_size' + str(sizes[i]) + '/'
        traindata_trunc = alltrain.data[:size]
        trainlabels_trunc = alltrain.labels[:size]
        result = crossvalidate_run(datahandler.make_dataset(traindata_trunc, trainlabels_trunc), test_set, labels, splits)
        i += 1
    return result
def evaluateCNN(config):

    # change logspath to initialLogpath/'eval_modelpath'_'test_set_path'
    config.logspath = config.eval_modelpath[:-1] + '_' + config.test_set.split('/')[-2] + '/'

    if not os.path.isdir(config.logspath) and (config.plot or config.write_file):
        os.makedirs(config.logspath)  # make sure directory exists for writing the file

    if 'etrend' in config.eval_modelpath:
        config.preprocessing = 'detrend'
    elif 'FFT' in config.eval_modelpath:
        config.preprocessing = 'fourier_channels'
    elif 'STFT' in config.eval_modelpath:
        config.preprocessing = 'stft'

    # get data info
    seq_length, n_channels, sensors = datahandler.read_infotxt(config.test_set)

    testlabels = datahandler.read_file('_labels', config.test_set, file_format='csv')
    testdata = datahandler.read_file('_data', config.test_set, file_format='csv')

    if testlabels.ndim == 1:
        testlabels, _ = datahandler.make_one_hot(testlabels)

    # reduce channels if specified
    if config.sel_sensors != sensors:
        testdata, sensors, n_channels = datahandler.select_channels(testdata, sensors, seq_length, config.sel_sensors)
    testdata, _, _ = datahandler.do_preprocessing(testdata, config.preprocessing, n_channels, seq_length)

    # transform data with scaler
    if config.preprocessing is 'fourier_samples':
        toscale = 'samples'
    else:
        toscale = 'channels'
    scaler = datahandler.load_scaler(config.eval_modelpath)
    testdata = datahandler.do_scaling(testdata, scaler, n_channels, seq_length, toscale=toscale)

    eval_data = datahandler.make_datasets(None, None, datahandler.make_dataset(testdata, testlabels))

    subDirectories = next(os.walk(config.eval_modelpath))[1]

    baselogspath = config.logspath
    baseevalmodelpath = config.eval_modelpath



    for subDirectory in subDirectories:

        run_for_loop_subdirectory(config=config, subDirectory=subDirectory, baseevalmodelpath=baseevalmodelpath,
                                  baselogspath=baselogspath, eval_data=eval_data, testlabels=testlabels)
def evaluateCNN(config):

    # change logspath to initialLogpath/'eval_modelpath'_'test_set_path'
    # config.logspath = config.logspath + config.eval_modelpath.split('/')[-2] + '_' + config.test_set.split('/')[-2] + '/'
    config.logspath = config.eval_modelpath[:-1] + '_' + config.test_set.split(
        '/')[-2] + '/'

    if not os.path.isdir(config.logspath) and (config.plot
                                               or config.write_file):
        os.makedirs(
            config.logspath)  # make sure directory exists for writing the file

    # TODO: preprocessing etc. aus dem Log-file des trainierten Systems auslesen
    if 'etrend' in config.eval_modelpath:
        config.preprocessing = 'detrend'
    elif 'FFT' in config.eval_modelpath:
        config.preprocessing = 'fourier_channels'
    elif 'STFT' in config.eval_modelpath:
        config.preprocessing = 'stft'

    # get data info
    seq_length, n_channels, sensors = datahandler.read_infotxt(config.test_set)
    # try:
    #     testlabels = datahandler.read_file('_labels', config.test_set, file_format='npy')
    #     testdata = datahandler.read_file('_data', config.test_set, file_format='npy')
    # except:
    # print('npy format of data not found. Using csv')
    testlabels = datahandler.read_file('labels',
                                       config.test_set,
                                       file_format='csv')
    testdata = datahandler.read_file('dataset',
                                     config.test_set,
                                     file_format='csv')
    try:
        idx = datahandler.read_file('obsID',
                                    config.test_set,
                                    file_format='csv')
        idx = idx.reshape(-1, 1)
    except:
        idx = np.arange(testdata.shape[0]).reshape(-1, 1)

    if testlabels.ndim == 1:
        testlabels, _ = datahandler.make_one_hot(testlabels)

    # reduce channels if specified
    if config.sel_sensors != sensors:
        testdata, sensors, n_channels = datahandler.select_channels(
            testdata, sensors, seq_length, config.sel_sensors)
    testdata, _, _ = datahandler.do_preprocessing(testdata,
                                                  config.preprocessing,
                                                  n_channels, seq_length)

    # transform data with scaler
    if config.preprocessing is 'fourier_samples':
        toscale = 'samples'
    else:
        toscale = 'channels'
    scaler = datahandler.load_scaler(config.eval_modelpath)
    testdata = datahandler.do_scaling(testdata,
                                      scaler,
                                      n_channels,
                                      seq_length,
                                      toscale=toscale)

    eval_data = datahandler.make_datasets(
        None, None, datahandler.make_dataset(testdata, testlabels, index=idx))

    subDirectories = next(os.walk(config.eval_modelpath))[1]

    baselogspath = config.logspath
    baseevalmodelpath = config.eval_modelpath

    for subDirectory in subDirectories:

        run_for_loop_subdirectory(config=config,
                                  subDirectory=subDirectory,
                                  baseevalmodelpath=baseevalmodelpath,
                                  baselogspath=baselogspath,
                                  eval_data=eval_data,
                                  testlabels=testlabels)