def crossvalidate_run(train_dataset, test_set, labels, splits=10): testaccuracy, trainaccuracy, trainaccuracy_last, test_cost, train_cost, times = [], [], [], [], [], [] # for saving results of different runs """function to use 'splits'-fold cross validation, writing results to CV_log.txt""" basepath = config.logspath[:-1] + "_cv" + str(splits) + '/' fold = 0 kf = KFold(n_splits=splits, shuffle=True) # initialize k-fold cross validation if config.write_file or config.plot: if not os.path.isdir(basepath): os.makedirs(basepath) # make sure directory exists for writing the log file cvlog = open(basepath + '/CV_log.txt', 'w+') # get the data if config.plot: # test set stays same, but train/val is different for every split plotlib.plot_histogram(test_set.labels, 'Test data', basepath, 'test_histogram', labels) # cross validation loop for train_idx, val_idx in kf.split(train_dataset.data, train_dataset.labels): # construct data set with train/val data for this "fold" train_set = datahandler.make_dataset(train_dataset.data[train_idx], train_dataset.labels[train_idx]) val_set = datahandler.make_dataset(train_dataset.data[val_idx], train_dataset.labels[val_idx]) # set own path for each run config.logspath = basepath + 'fold_' + str(fold) + '/' config.disp_train = int(train_set.num_examples / (config.display_steps * config.batch_size)) # steps per epoch regardless batch size # run optimization and evaluation procedure testacc, trainacc, trainacc_last, duration, valacc = \ CNN_framework.run(config, dataset=datahandler.make_datasets(train_set, val_set, test_set), classlabels=labels) # plot histograms if config.plot: plotlib.plot_histogram(train_set.labels, 'Training data', config.logspath, 'train_histogram', labels) plotlib.plot_histogram(val_set.labels, 'Validation data', config.logspath, 'validation_histogram', labels) testaccuracy.append(testacc) trainaccuracy.append(trainacc) trainaccuracy_last.append(trainacc_last) times.append(duration) print("test accuracy in fold %s: %s " % (fold, testacc)) print("train accuracy in fold %s: %s" % (fold, trainacc)) print("last train acc. in fold %s: %s" % (fold, trainacc_last)) fold += 1 mean_testacc = np.mean(testaccuracy) if config.write_file: cvlog.write("test accuracies: %s\n" % testaccuracy) cvlog.write("train accuracies: %s\n" % trainaccuracy) cvlog.write("last train acc: %s\n" % trainaccuracy_last) cvlog.write("average test accuracy: %s" % mean_testacc) cvlog.write("average training time: %s" % np.mean(times)) cvlog.close() return mean_testacc
def datasetsize_inspectionrun(alltrain, test_set, labels, splits=10): orig_path = config.logspath # scale such that remaining _train_ set holds power of 2 data samples sizes = (np.ceil(np.asarray([256, 512, 1024, 2048, 4096, 8192, 10000]) * splits / (splits - 1))) sizes = [int(i) for i in sizes] # make them integers i = 0 for size in sizes: config.logspath = orig_path + 'tr_size' + str(sizes[i]) + '/' traindata_trunc = alltrain.data[:size] trainlabels_trunc = alltrain.labels[:size] result = crossvalidate_run(datahandler.make_dataset(traindata_trunc, trainlabels_trunc), test_set, labels, splits) i += 1 return result
def evaluateCNN(config): # change logspath to initialLogpath/'eval_modelpath'_'test_set_path' config.logspath = config.eval_modelpath[:-1] + '_' + config.test_set.split('/')[-2] + '/' if not os.path.isdir(config.logspath) and (config.plot or config.write_file): os.makedirs(config.logspath) # make sure directory exists for writing the file if 'etrend' in config.eval_modelpath: config.preprocessing = 'detrend' elif 'FFT' in config.eval_modelpath: config.preprocessing = 'fourier_channels' elif 'STFT' in config.eval_modelpath: config.preprocessing = 'stft' # get data info seq_length, n_channels, sensors = datahandler.read_infotxt(config.test_set) testlabels = datahandler.read_file('_labels', config.test_set, file_format='csv') testdata = datahandler.read_file('_data', config.test_set, file_format='csv') if testlabels.ndim == 1: testlabels, _ = datahandler.make_one_hot(testlabels) # reduce channels if specified if config.sel_sensors != sensors: testdata, sensors, n_channels = datahandler.select_channels(testdata, sensors, seq_length, config.sel_sensors) testdata, _, _ = datahandler.do_preprocessing(testdata, config.preprocessing, n_channels, seq_length) # transform data with scaler if config.preprocessing is 'fourier_samples': toscale = 'samples' else: toscale = 'channels' scaler = datahandler.load_scaler(config.eval_modelpath) testdata = datahandler.do_scaling(testdata, scaler, n_channels, seq_length, toscale=toscale) eval_data = datahandler.make_datasets(None, None, datahandler.make_dataset(testdata, testlabels)) subDirectories = next(os.walk(config.eval_modelpath))[1] baselogspath = config.logspath baseevalmodelpath = config.eval_modelpath for subDirectory in subDirectories: run_for_loop_subdirectory(config=config, subDirectory=subDirectory, baseevalmodelpath=baseevalmodelpath, baselogspath=baselogspath, eval_data=eval_data, testlabels=testlabels)
def evaluateCNN(config): # change logspath to initialLogpath/'eval_modelpath'_'test_set_path' # config.logspath = config.logspath + config.eval_modelpath.split('/')[-2] + '_' + config.test_set.split('/')[-2] + '/' config.logspath = config.eval_modelpath[:-1] + '_' + config.test_set.split( '/')[-2] + '/' if not os.path.isdir(config.logspath) and (config.plot or config.write_file): os.makedirs( config.logspath) # make sure directory exists for writing the file # TODO: preprocessing etc. aus dem Log-file des trainierten Systems auslesen if 'etrend' in config.eval_modelpath: config.preprocessing = 'detrend' elif 'FFT' in config.eval_modelpath: config.preprocessing = 'fourier_channels' elif 'STFT' in config.eval_modelpath: config.preprocessing = 'stft' # get data info seq_length, n_channels, sensors = datahandler.read_infotxt(config.test_set) # try: # testlabels = datahandler.read_file('_labels', config.test_set, file_format='npy') # testdata = datahandler.read_file('_data', config.test_set, file_format='npy') # except: # print('npy format of data not found. Using csv') testlabels = datahandler.read_file('labels', config.test_set, file_format='csv') testdata = datahandler.read_file('dataset', config.test_set, file_format='csv') try: idx = datahandler.read_file('obsID', config.test_set, file_format='csv') idx = idx.reshape(-1, 1) except: idx = np.arange(testdata.shape[0]).reshape(-1, 1) if testlabels.ndim == 1: testlabels, _ = datahandler.make_one_hot(testlabels) # reduce channels if specified if config.sel_sensors != sensors: testdata, sensors, n_channels = datahandler.select_channels( testdata, sensors, seq_length, config.sel_sensors) testdata, _, _ = datahandler.do_preprocessing(testdata, config.preprocessing, n_channels, seq_length) # transform data with scaler if config.preprocessing is 'fourier_samples': toscale = 'samples' else: toscale = 'channels' scaler = datahandler.load_scaler(config.eval_modelpath) testdata = datahandler.do_scaling(testdata, scaler, n_channels, seq_length, toscale=toscale) eval_data = datahandler.make_datasets( None, None, datahandler.make_dataset(testdata, testlabels, index=idx)) subDirectories = next(os.walk(config.eval_modelpath))[1] baselogspath = config.logspath baseevalmodelpath = config.eval_modelpath for subDirectory in subDirectories: run_for_loop_subdirectory(config=config, subDirectory=subDirectory, baseevalmodelpath=baseevalmodelpath, baselogspath=baselogspath, eval_data=eval_data, testlabels=testlabels)