def runForBaseClusterResults(normMode, randomSeed=5, clusterModels=['KMeans', 'GMM_diag'], dataToUseArr=["hog", "skeleton", "sn"], numOfSignsArr=[11], clustCntVec=None): data_dir = funcH.getVariableByComputerName('data_dir') results_dir = funcH.getVariableByComputerName('results_dir') for dataToUse in dataToUseArr: for numOfSigns in numOfSignsArr: if clustCntVec is None: clustCntVec = [64, 128, 256] if dataToUse == 'skeleton': dimArray = [32, 64, 96] else: # dataToUse == 'sn' or dataToUse == 'hog': dimArray = [256] for dims in dimArray: funcHP.runClusteringOnFeatSet(data_dir=data_dir, results_dir=results_dir, dataToUse=dataToUse, normMode=normMode, numOfSigns=numOfSigns, pcaCount=dims, expectedFileType='Data', clustCntVec=clustCntVec, clusterModels=clusterModels, randomSeed=randomSeed)
def main(argv): base_dir = funcH.getVariableByComputerName('base_dir') data_dir = funcH.getVariableByComputerName('data_dir') results_dir = funcH.getVariableByComputerName('results_dir') print(argv) modelParams, trainParams, rnnParams = parseArgs(argv) seed(trainParams["randomSeed"]) tf.set_random_seed(seed=trainParams["randomSeed"]) numOfSigns = modelParams["numOfSigns"] feat_set, labels_all, detailed_labels_all = funcPRH.loadData(modelParams, numOfSigns, data_dir) data_dim = feat_set.shape[1] exp_name, subEpochs, trainParams, rnnParams = getInitParams(trainParams, modelParams, rnnParams) csv_name, model_name, outdir = funcPRH.createExperimentDirectories(results_dir, exp_name) model, modelTest, ES = funcM.getModels(data_dim=data_dim, modelParams=modelParams, rnnParams=rnnParams) checkpointer = ModelCheckpoint(filepath=model_name, verbose=0, save_best_only=False, period=1) csv_logger = CSVLogger(csv_name, append=True, separator=';') #%% trainFromScratch = False predictionLabelsDir = results_dir + os.sep + 'results' + os.sep + exp_name model, epochFr, epochTo = initEpochIDsModelParams(trainFromScratch, trainParams, model, model_name, predictionLabelsDir) if epochFr == epochTo: print("+*-+*-+*-+*-epochs completed+*-+*-+*-+*-") exit(12) modelParams["callbacks"] = [csv_logger, ES, checkpointer] modelParams["model_name"] = model_name trainParams["subEpochs"] = subEpochs trainParams["epochFr"] = epochFr trainParams["epochTo"] = epochTo trainParams["corr_indis_a"] = np.mod(epochFr, 2) if epochFr != 0 else np.mod(int(trainParams["corr_swapMode"]) + int(trainParams["corr_randMode"]), 2) if trainParams["applyCorr"] >= 1: trainParams["corrFramesAll"] = funcD.getCorrespondentFrames(base_dir=base_dir, data_dir=data_dir, featType=modelParams["dataToUse"], normMode=modelParams["normMode"], pcaCount=modelParams["pcaCount"], numOfSigns=numOfSigns, expectedFileType='Data') print('started training') directoryParams = { "outdir": outdir, "data_dir" : data_dir, "predictionLabelsDir": predictionLabelsDir, "nmi_and_acc_file_name": outdir + os.sep + exp_name + '_nmi_acc.txt' } if modelParams["trainMode"] == "rsa" or modelParams["trainMode"] == "corsa": funcTL.trainRNN(trainParams, modelParams, rnnParams, detailed_labels_all, model, modelTest, feat_set, labels_all, directoryParams) else: funcTL.trainFramewise(trainParams, modelParams, model, modelTest, feat_set, labels_all, directoryParams)
def get_create_folders(params_dict): data_path_base = params_dict["data_path_base"] data_ident = 'data_' + params_dict["data_ident"] base_dir = funcH.getVariableByComputerName('base_dir') # xx/DataPath or xx/DataFolder results_dir = os.path.join(base_dir, 'sup', 'results_mi' + str(params_dict["model_id"])) models_dir = os.path.join(base_dir, 'sup', 'models_mi' + str(params_dict["model_id"])) data_params_folder = os.path.join(base_dir, 'sup', 'data_mi', data_ident) data_path_base = os.path.join(base_dir, data_path_base, "imgs") result_fold = os.path.join(base_dir, 'sup', 'preds_' + params_dict["modelName"], 'pred_' + params_dict["exp_ident"]) path_dict = { "results": results_dir, # folder="~/DataFolder/sup/results_mi1" "models": models_dir, "data_base": data_path_base, # original path of data to load "data_params_folder": data_params_folder, # data params folder "result_fold": result_fold, # to save the predictions and labels } funcH.createDirIfNotExist(results_dir) funcH.createDirIfNotExist(models_dir) funcH.createDirIfNotExist(data_params_folder) funcH.createDirIfNotExist(result_fold) return path_dict
def decode_initial_label_param(initialLabelParam): if initialLabelParam is None: initialLabelVec = None expNameEnd = "" else: initialLabelVecStrings = initialLabelParam.split("_") if initialLabelVecStrings[0] == 'fn': fileName_end = initialLabelVecStrings[1] # 'baseResults-hgsk256-11-KMeans-256.npz' results_dir = funcH.getVariableByComputerName('results_dir').replace("bdResults", "dcResults") labelFileFullName = os.path.join(results_dir, fileName_end) if fileName_end.startswith("baseResults"): # np.savez(predictionFileNameFull, labels_all, predClusters) npzDict = np.load(labelFileFullName, allow_pickle=True) initialLabelVec = npzDict["arr_1"] elif fileName_end.startswith("cosae"): initialLabelVec = np.load(labelFileFullName) else: print('Not a valid input') os._exit(29) expNameEnd = fileName_end else: print('Not implemented yet') os._exit(30) if initialLabelVecStrings[0] == "baseResults": print('Not implemented yet') os._exit(30) return initialLabelVec, expNameEnd
def load_usps(data_path = os.path.join(getVariableByComputerName("n2d_experiments"), 'usps', 'data')): createDirIfNotExist(data_path) file_name_tr = os.path.join(data_path, 'usps_train.jf') file_name_te = os.path.join(data_path, 'usps_test.jf') link_adr_path = 'https://raw.githubusercontent.com/cvjena/ITAL/master/data/usps_<trte>.jf' if not os.path.exists(file_name_tr): download_file(link_adr_path.replace("<trte>", "train"), save2path=data_path, savefilename='usps_train.jf') #os.system('wget http://www-i6.informatik.rwth-aachen.de/~keysers/usps_train.jf.gz -P %s' % data_path) download_file(link_adr_path.replace("<trte>", "test"), save2path=data_path, savefilename='usps_test.jf') #os.system('wget http://www-i6.informatik.rwth-aachen.de/~keysers/usps_test.jf.gz -P %s' % data_path) with open(file_name_tr) as f: data = f.readlines() data = data[1:-1] data = [list(map(float, line.split())) for line in data] data = np.array(data) data_train, labels_train = data[:, 1:], data[:, 0] with open(file_name_te) as f: data = f.readlines() data = data[1:-1] data = [list(map(float, line.split())) for line in data] data = np.array(data) data_test, labels_test = data[:, 1:], data[:, 0] x = np.concatenate((data_train, data_test)).astype('float64') y = np.concatenate((labels_train, labels_test)) print('USPS samples', x.shape) return x, y
def load_pendigits(data_path = os.path.join(getVariableByComputerName("n2d_experiments"), 'pendigits', 'data')): createDirIfNotExist(data_path) file_name_tr = os.path.join(data_path, 'pendigits.tra') file_name_te = os.path.join(data_path, 'pendigits.tes') link_adr_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits<file_ending>' if not os.path.exists(file_name_tr): os.makedirs(data_path, exist_ok=True) download_file(link_adr_path.replace("<file_ending>", ".tra"), save2path=data_path, savefilename='pendigits.tra') #os.system('wget https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.tra -P %s' % data_path) download_file(link_adr_path.replace("<file_ending>", ".tes"), save2path=data_path, savefilename='pendigits.tes') #os.system('wget https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.tes -P %s' % data_path) download_file(link_adr_path.replace("<file_ending>", ".names"), save2path=data_path, savefilename='pendigits.names') #os.system('wget https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.names -P %s' % data_path) # load training data with open(file_name_tr) as file: data = file.readlines() data = [list(map(float, line.split(','))) for line in data] data = np.array(data).astype(np.float32) data_train, labels_train = data[:, :-1], data[:, -1] # load testing data with open(file_name_te) as file: data = file.readlines() data = [list(map(float, line.split(','))) for line in data] data = np.array(data).astype(np.float32) data_test, labels_test = data[:, :-1], data[:, -1] x = np.concatenate((data_train, data_test)).astype('float32') y = np.concatenate((labels_train, labels_test)) x /= 100. y = y.astype('int') return x, y
def createCombinedDatasets(numOfSigns=11): data_dir = funcH.getVariableByComputerName('data_dir') fName_sn = funcD.getFileName('sn', '', -1, numOfSigns, 'Data') fName_hg = funcD.getFileName('hog', '', -1, numOfSigns, 'Data') fName_sk = funcD.getFileName('skeleton', '', -1, numOfSigns, 'Data') X_sn = funcD.loadFileIfExist(directoryOfFile=data_dir, fileName=fName_sn) X_hg = funcD.loadFileIfExist(directoryOfFile=data_dir, fileName=fName_hg) X_sk = funcD.loadFileIfExist(directoryOfFile=data_dir, fileName=fName_sk) print('X_sn.shape = ', X_sn.shape) print('X_hg.shape = ', X_hg.shape) print('X_sk.shape = ', X_sk.shape) X_hgsnsk = np.concatenate([X_hg.T, X_sn.T, X_sk.T]).T X_hgsn = np.concatenate([X_hg.T, X_sn.T]).T X_hgsk = np.concatenate([X_hg.T, X_sk.T]).T X_snsk = np.concatenate([X_sn.T, X_sk.T]).T print('X_hgsnsk.shape = ', X_hgsnsk.shape) print('X_hgsn.shape = ', X_hgsn.shape) print('X_hgsk.shape = ', X_hgsk.shape) print('X_snsk.shape = ', X_snsk.shape) fName_hgsnsk = os.path.join(data_dir, fName_hg.replace("hog", "hgsnsk")) fName_hgsn = os.path.join(data_dir, fName_hg.replace("hog", "hgsn")) fName_hgsk = os.path.join(data_dir, fName_hg.replace("hog", "hgsk")) fName_snsk = os.path.join(data_dir, fName_hg.replace("hog", "snsk")) if os.path.isfile(os.path.join(data_dir, fName_hgsnsk)): _ = np.load(fName_hgsnsk) else: np.save(fName_hgsnsk, X_hgsnsk) if os.path.isfile(os.path.join(data_dir, fName_hgsn)): _ = np.load(fName_hgsn) else: np.save(fName_hgsn, X_hgsn) if os.path.isfile(os.path.join(data_dir, fName_hgsk)): _ = np.load(fName_hgsk) else: np.save(fName_hgsk, X_hgsk) if os.path.isfile(os.path.join(data_dir, fName_snsk)): _ = np.load(fName_snsk) else: np.save(fName_snsk, X_snsk) # resultDict = funcHP.runClusteringOnFeatSet(data_dir=funcH.getVariableByComputerName('data_dir'), # results_dir=funcH.getVariableByComputerName('results_dir'), # dataToUse='skeleton', numOfSigns=11, pcaCount=32, # expectedFileType='Data', clusterModels=['KMeans', 'GMM_diag'], randomSeed=5) # runForBaseClusterResults(normMode='', clusterModels = ['KMeans', 'GMM_diag']) # runForBaseClusterResults_OPTICS(randomSeed = 5, clustCntVec = [32, 64]) # run4All_createData(sign_countArr=[12]) # createCombinedDatasets(numOfSigns = 41) # checkCreateData2Use(41, "snsk", recreate=False, recreate_hog=False)
def runForBaseClusterResults_OPTICS(randomSeed=5, clustCntVec=[32, 64, 128, 256, 512], dataToUseArr=["hog", "skeleton", "sn"], numOfSignsVec=[11, 41]): data_dir = funcH.getVariableByComputerName('data_dir') results_dir = funcH.getVariableByComputerName('results_dir') for dataToUse in dataToUseArr: for numOfSigns in numOfSignsVec: if dataToUse == 'skeleton': dimArray = [96] # 32, 64, else: # dataToUse == 'sn' or dataToUse == 'hog': dimArray = [256] # 512 for dims in dimArray: funcHP.runOPTICSClusteringOnFeatSet(data_dir=data_dir, results_dir=results_dir, dataToUse=dataToUse, numOfSigns=numOfSigns, pcaCount=dims, expectedFileType='Data', clustCntVec=clustCntVec, randomSeed=randomSeed)
def load_har(data_path = os.path.join(getVariableByComputerName("n2d_experiments"), 'har', 'data')): # load this dataset this way ?? # https://pypi.org/project/kcc2020-tutorial-HAR-dataset/ # entire_dataset = load_har_all() createDirIfNotExist(data_path) fold_train = os.path.join(data_path, 'train') fold_test = os.path.join(data_path, 'test') createDirIfNotExist(fold_train) createDirIfNotExist(fold_test) fname_train_x = os.path.join(fold_train, 'X_train.txt') fname_train_y = os.path.join(fold_train, 'y_train.txt') fname_test_x = os.path.join(fold_test, 'X_test.txt') fname_test_y = os.path.join(fold_test, 'y_test.txt') # https://github.com/mollybostic/cleaning-data-assignment/tree/master/UCI%20HAR%20Dataset # for windows = https://sourceforge.net/projects/gnuwin32/files/wget/1.11.4-1/wget-1.11.4-1-setup.exe/download # https://stackoverflow.com/questions/29113456/wget-not-recognized-as-internal-or-external-command link_adr_path = 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI%20HAR%20Dataset/<trte>/<Xy>_<trte>.txt' if not os.path.isfile(fname_train_x): print('downloading X_train.txt(66.0MB)') download_file(link_adr_path.replace("<trte>", "train").replace("<Xy>", "X"), save2path=fold_train, savefilename='X_train.txt') #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/train/X_train.txt' -P %s" % fold_train) print('downloading y_train.txt(14.7kB)') download_file(link_adr_path.replace("<trte>", "train").replace("<Xy>", "y"), save2path=fold_train, savefilename='y_train.txt') #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/train/y_train.txt' -P %s" % fold_train) print('downloading X_test.txt(26.5MB)') download_file(link_adr_path.replace("<trte>", "test").replace("<Xy>", "X"), save2path=fold_test, savefilename='X_test.txt') #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/test/X_test.txt' -P %s" % fold_test) print('downloading y_test.txt(5.9kB)') download_file(link_adr_path.replace("<trte>", "test").replace("<Xy>", "y"), save2path=fold_test, savefilename='y_test.txt') #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/test/y_test.txt' -P %s" % fold_test) x_train = pd.read_csv(fname_train_x, sep=r'\s+', header=None) y_train = pd.read_csv(fname_train_y, header=None) x_test = pd.read_csv(fname_test_x, sep=r'\s+', header=None) y_test = pd.read_csv(fname_test_y, header=None) x = np.concatenate((x_train, x_test)) y = np.concatenate((y_train, y_test)) # # labels start at 1 so.. y = y - 1 y = y.reshape((y.size,)) y_names = {0: 'Walking', 1: 'Upstairs', 2: 'Downstairs', 3: 'Sitting', 4: 'Standing', 5: 'Laying', } os.error("not implemented") return x, y, y_names
def get_create_folders(params_dict): data_path_base = params_dict["data_path_base"] data_ident = 'data_' + params_dict["data_ident"] base_dir = funcH.getVariableByComputerName( 'base_dir') # xx/DataPath or xx/DataFolder results_dir = os.path.join(base_dir, 'sup', 'results_' + params_dict["modelName"]) models_dir = os.path.join(base_dir, 'sup', 'models_' + params_dict["modelName"]) data_path_fill = os.path.join(base_dir, 'sup', 'data', data_ident) exp_ident_str = 'rs' + str(params_dict["randomSeed"]).zfill(2) data_path_train = os.path.join( data_path_fill, data_path_base + '_' + exp_ident_str + '_tr') data_path_valid = os.path.join( data_path_fill, data_path_base + '_' + exp_ident_str + '_va') data_path_test = os.path.join(data_path_fill, data_path_base + '_' + exp_ident_str + '_te') data_path_base = os.path.join(base_dir, data_path_base, "imgs") result_fold = os.path.join(base_dir, 'sup', 'preds_' + params_dict["modelName"], 'pred_' + params_dict["exp_ident"]) path_dict = { "results": results_dir, # folder="~/DataFolder/sup/results" "models": models_dir, "data_base": data_path_base, # original path of data to load "train": data_path_train, # train data to create "valid": data_path_valid, # valid data to create "test": data_path_test, # test data to create "result_fold": result_fold, # to save the predictions and labels } funcH.createDirIfNotExist(results_dir) funcH.createDirIfNotExist(models_dir) funcH.createDirIfNotExist(data_path_train) funcH.createDirIfNotExist(data_path_valid) funcH.createDirIfNotExist(data_path_test) funcH.createDirIfNotExist(result_fold) return path_dict
def n_run_autoencode(x, args): global debug_string_out # input_dict : # fit_verbose input_dict = argparse.ArgumentParser(description='func_autoencode', formatter_class=argparse.ArgumentDefaultsHelpFormatter) input_dict.add_argument('--experiments_folder_base', default=funcH.getVariableByComputerName("n2d_experiments")) input_dict.add_argument('--n_clusters', default=10, type=int) input_dict.add_argument('--dataset', default='mnist') input_dict.add_argument('--batch_size', default=256, type=int) input_dict.add_argument('--pretrain_epochs', default=100, type=int) input_dict.add_argument('--fit_verbose', default=True, type=bool) args2 = funcH._parse_args(input_dict, args, print_args=True) shape = [x.shape[-1], 500, 500, 2000, args2.n_clusters] ae = _autoencoder(shape) hidden = ae.get_layer(name='encoder_%d' % (len(shape) - 2)).output encoder = Model(inputs=ae.input, outputs=hidden) print("checking if ", args.experiment_names_and_folders["file_name_ae_weights_full"], " exist.") weights_file = args.experiment_names_and_folders["file_name_ae_weights_full"] load_file_skip_learning = os.path.isfile(weights_file) t = funcH.Timer() # Pretrain autoencoders before clustering if load_file_skip_learning: debug_string_out = funcH.print_and_add("Load weigths from(" + weights_file + ")", debug_string_out) ae.load_weights(weights_file) else: optimizer = 'adam' ae.compile(loss='mse', optimizer=optimizer) ae.fit(x, x, batch_size=args2.batch_size, epochs=args2.pretrain_epochs, verbose=1) t.end() ae.save_weights(weights_file) debug_string_out = funcH.print_and_add("Time to train the ae: " + t.get_elapsed_time(), debug_string_out) with open(args.experiment_names_and_folders["file_name_ae_params_text_full"], 'w') as f: f.write("\n".join([str(k)+":"+str(args2.__dict__[k]) for k in args2.__dict__])) hl = encoder.predict(x) return hl
def script_hgsk(): global debug_string_out pretrain_epochs = [10] ml = "UMAP" ds = "hgsk_256_41" for cluster in ['KM', 'GMM']: for ae_epoc in pretrain_epochs: for clust_cnt in [512, 1024]: # umap_dim = 20, n_clusters_ae = 20, umap_neighbors = 40 for umap_neighbors in [20, 30, 40]: try: debug_string_out.clear() main(["--dataset", ds, "--gpu", "0", "--pretrain_epochs", str(ae_epoc), "--n_clusters", str(clust_cnt), "--cluster", cluster, "--umap_dim", str(clust_cnt), "--umap_neighbors", str(umap_neighbors), "--manifold_learner", ml, "--umap_min_dist", "0.00"]) except Exception as e: debug_string_out = funcH.print_and_add(ds + '_' + ml + " - problem", debug_string_out) debug_string_out = funcH.print_and_add(str(e), debug_string_out) exp_date_str = str(datetime.now().strftime("%Y%m%d_%H%M")).replace('-', '') # %S with open(os.path.join(funcH.getVariableByComputerName("n2d_experiments"), ds + '_' + ml + '_error_' + exp_date_str + '.txt'), 'w') as f: f.write("\n".join(debug_string_out))
def script(): global debug_string_out pretrain_epochs = [10, 50] manifold_learners_all = ["UMAP"] dataset_names_all = ["cifar10", "mnist", "pendigits", "fashion"] # , "usps", "har" cluster_func = "HDBSCAN" for ds in dataset_names_all: for ml in manifold_learners_all: for ae_epoc in pretrain_epochs: for clust_cnt in [20]: # umap_dim = 20, n_clusters_ae = 20, umap_neighbors = 40 try: debug_string_out = [] main(["--dataset", ds, "--gpu", "0", "--pretrain_epochs", str(ae_epoc), "--n_clusters", str(clust_cnt), '--cluster', str(cluster_func), "--umap_dim", str(clust_cnt), "--umap_neighbors", str(2*clust_cnt), "--manifold_learner", ml, "--umap_min_dist", "0.00"]) except Exception as e: debug_string_out = funcH.print_and_add(ds + '_' + ml + " - problem \n" + str(e), debug_string_out) exp_date_str = str(datetime.now().strftime("%Y%m%d_%H%M")).replace('-', '') # %S with open(os.path.join(funcH.getVariableByComputerName("n2d_experiments"), ds + '_' + ml + '_error_' + exp_date_str + '.txt'), 'w') as f: f.write("\n".join(debug_string_out))
def get_args(argv): global debug_string_out parser = argparse.ArgumentParser( description='(Not Too) Deep', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--dataset', default='mnist', ) parser.add_argument('--ae_weights', default=None) parser.add_argument('--experiments_folder_base', default=funcH.getVariableByComputerName("n2d_experiments")) parser.add_argument("--mode", default='client') parser.add_argument("--port", default=52162) parser.add_argument('--gpu', default=0, ) parser.add_argument('--n_clusters', default=10, type=int) parser.add_argument('--batch_size', default=256, type=int) parser.add_argument('--pretrain_epochs', default=1000, type=int) parser.add_argument('--umap_dim', default=2, type=int) parser.add_argument('--umap_neighbors', default=10, type=int) parser.add_argument('--umap_min_dist', default="0.00", type=str) parser.add_argument('--umap_metric', default='euclidean', type=str) parser.add_argument('--cluster', default='GMM', type=str) parser.add_argument('--manifold_learner', default='UMAP', type=str) parser.add_argument('--visualize', default=False, type=bool) parser.add_argument('--rerun_last_plots', default=False, type=bool) args = funcH._parse_args(parser, argv, print_args=True) debug_string_out = funcH.print_and_add('-' * 80) experiment_names_and_folders = { "exp_date_str": str(datetime.now().strftime("%Y%m%d_")).replace('-', ''), # %M%S, "exp_base_str": "_".join([args.dataset, "c" + str(args.cluster)+ str(args.n_clusters), "e" + str(args.pretrain_epochs)]), "folder_umap_data": os.path.join(args.experiments_folder_base, "exported_manifolds"), "folder_ae_weights": os.path.join(args.experiments_folder_base, "weights"), } experiment_names_and_folders["exp_extended"] = experiment_names_and_folders["exp_base_str"] + "_" + "_".join([args.manifold_learner + "ud" + str(args.umap_dim), "un" + str(args.umap_neighbors)]) experiment_names_and_folders["folder_experiment"] = os.path.join(args.experiments_folder_base, args.dataset, experiment_names_and_folders["exp_date_str"] + experiment_names_and_folders["exp_extended"]) experiment_names_and_folders["file_name_ae_weights_base"] = "aew_" + "_".join([args.dataset, "c" + str(args.n_clusters), "e" + str(args.pretrain_epochs)]) experiment_names_and_folders["file_name_ae_weights_full"] = os.path.join(experiment_names_and_folders["folder_ae_weights"], experiment_names_and_folders["file_name_ae_weights_base"] + '.npy') experiment_names_and_folders["file_name_umap_data_base"] = "ulp" + experiment_names_and_folders["exp_extended"] experiment_names_and_folders["file_name_umap_data_full"] = os.path.join(experiment_names_and_folders["folder_umap_data"], experiment_names_and_folders["file_name_umap_data_base"] + '.npy') experiment_names_and_folders["file_name_arguments_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'args_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt') experiment_names_and_folders["file_name_ae_params_text_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'args_autoencode_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt') experiment_names_and_folders["file_name_plot_fig_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'plot_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '_<plot_id>.png') experiment_names_and_folders["file_name_plot_csv_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'csv_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.csv') experiment_names_and_folders["file_name_clusters_after_manifold_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'clusters_after_manifold-' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt') experiment_names_and_folders["file_name_clusters_before_manifold_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'clusters_before_manifold-' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt') experiment_names_and_folders["file_name_debug_string_out_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'debug_string_out-' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt') experiment_names_and_folders["file_name_result_csv_file_full"] = os.path.join(args.experiments_folder_base, 'results.csv') experiment_names_and_folders["file_name_data_before_manifold"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'data_' + experiment_names_and_folders["exp_extended"] + '_before.npz') experiment_names_and_folders["file_name_data_after_manifold"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'data_' + experiment_names_and_folders["exp_extended"] + '_after.npz') experiment_names_and_folders["file_name_cluster_obj"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'cluster_obj_' + experiment_names_and_folders["exp_extended"] + '_<bef_aft>.dictionary') experiment_names_and_folders["file_name_silhouette_results"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'silhouette_results_' + experiment_names_and_folders["exp_extended"] + '_<bef_aft>.npy') experiment_names_and_folders["file_name_results"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'results_' + experiment_names_and_folders["exp_extended"] + '.dictionary') args.experiment_names_and_folders = experiment_names_and_folders # 4 folders folder_{experiment, umap_data, ae_weights} funcH.createDirIfNotExist(experiment_names_and_folders["folder_experiment"]) funcH.createDirIfNotExist(experiment_names_and_folders["folder_umap_data"]) funcH.createDirIfNotExist(experiment_names_and_folders["folder_ae_weights"]) with open(experiment_names_and_folders["file_name_arguments_full"], 'w') as f: f.write("\n".join(argv)) return args
ae = modelLoader.modelLoad_MNIST() # compile it using adam optimizer ae.compile(optimizer="adam", loss="mse") #Train it by providing training images ae.fit(train_images, train_images, epochs=2) modelLoader.saveModel(ae, "model_tex") else: ae = modelLoader.loadModel("model_tex") prediction = ae.predict(train_images[0:199,:,:,:], verbose=1, batch_size=100) x =prediction[0].reshape(28,28) plt.imshow(x) plt.show() else: exp_name = 'cnnAE' results_dir = funcH.getVariableByComputerName('results_dir') outdir = os.path.join(results_dir, 'results', exp_name) csv_name = os.path.join(results_dir, 'epochs') + os.sep + exp_name + '.csv' model_name = os.path.join(results_dir, 'models') + os.sep + exp_name + '.h5' funcH.createDirIfNotExist(os.path.join(results_dir, 'epochs')) funcH.createDirIfNotExist(os.path.join(results_dir, 'models')) funcH.createDirIfNotExist(outdir) checkpointer = ModelCheckpoint(filepath=model_name, verbose=0, save_best_only=False, period=1) csv_logger = CSVLogger(csv_name, append=True, separator=';') #ES = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=50, verbose=0, mode='auto') #callbacks = [csv_logger, ES, checkpointer] feat_set, labels_all, detailedLabels_all = dataLoader.loadData_nnVidImages('/home/dg/DataPath/bdData')
def loadData_hog(base_dir = funcH.getVariableByComputerName('base_dir'), data_dir = funcH.getVariableByComputerName('data_dir'), loadHogIfExist=True, numOfSigns=11): videosFolderName = 'neuralNetHandVideos_' + str(numOfSigns) base_dir_train_feat = os.path.join(base_dir, videosFolderName) hogFeatsFileName = getFileName(dataToUse='hog', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Data') hogFeatsFileNameFull = os.path.join(data_dir, hogFeatsFileName) labelsFileNameFull = os.path.join(data_dir, getFileName(dataToUse='hog', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Labels')) detailedLabelsFileNameFull = os.path.join(data_dir, getFileName(dataToUse='hog', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='DetailedLabels')) if loadHogIfExist and os.path.isfile(hogFeatsFileNameFull) and os.path.isfile(labelsFileNameFull) and os.path.isfile(detailedLabelsFileNameFull): print('loading exported feat_set from(', hogFeatsFileNameFull, ')') feat_set = np.load(hogFeatsFileNameFull) labels_all = np.load(labelsFileNameFull) detailedLabels_all = np.load(detailedLabelsFileNameFull) print('loaded exported feat_set(', feat_set.shape, ') from(', hogFeatsFileName, ')') else: detailedLabels_all = np.array([0, 0, 0, 0]) labels_all = np.array([0, 0, 0, 0]) feat_set = np.array([0, 0, 0, 0]) foldernames = np.sort(os.listdir(base_dir_train_feat)) signID = 0 frameCount = 0 for f in foldernames: sign_folder = os.path.join(base_dir_train_feat, str(f).format(':02d')) if not os.path.isdir(sign_folder): continue signID = signID + 1 videoID = 0 videos = np.sort(os.listdir(sign_folder)) print(f) print('going to create hog from sign folder(', sign_folder, ')') for v in videos: video_folder = os.path.join(sign_folder, v) if not os.path.isdir(video_folder): continue videoID = videoID + 1 print('going to create hog from video folder(', video_folder, ')') frames = os.listdir(video_folder) feat_set_video = np.array([0, 0, 0, 0]) olderFileName_v01 = video_folder + os.sep + 'hog_set.npz' olderFileName_v02 = video_folder + os.sep + 'hog_set_41.npz' hogFeats_curVideo_FileNameFull = video_folder + os.sep + hogFeatsFileName.replace('.npy', '.npz') if os.path.isfile(olderFileName_v01): os.rename(olderFileName_v01, hogFeats_curVideo_FileNameFull) elif os.path.isfile(olderFileName_v02): os.rename(olderFileName_v02, hogFeats_curVideo_FileNameFull) feats_labels_loaded = False if os.path.isfile(hogFeats_curVideo_FileNameFull): npzfile = np.load(hogFeats_curVideo_FileNameFull) feat_set_video = npzfile['feat_set_video'] labels = npzfile['labels'] feats_labels_loaded = True if feats_labels_loaded: frameList = video_folder + os.sep + '*.png' pngCount = len(glob.glob(frameList)) feats_labels_loaded = pngCount==len(labels) if not feats_labels_loaded: for frame in sorted(frames): if frame.endswith('.png'): frame_name = os.path.join(video_folder, frame) img = data.load(frame_name) feat_current = hog(img, pixels_per_cell=(32, 32), cells_per_block=(4, 4)) if np.all(feat_set_video == 0): feat_set_video = feat_current else: feat_set_video = np.vstack((feat_set_video, feat_current)) elif frame.endswith('_labels.txt'): labels = np.loadtxt(os.path.join(video_folder, frame)) np.savez(hogFeats_curVideo_FileNameFull, feat_set_video=feat_set_video, labels=labels) fr = frameCount to = frameCount + len(labels) frCnt = to - fr frameIDs = np.asarray(range(fr, to)).reshape(frCnt, -1) detailedLabels_video = np.hstack((signID * np.ones([frCnt, 1]), videoID * np.ones([frCnt, 1]), frameIDs, np.asarray(labels).reshape(frCnt, -1))) if np.all(feat_set == 0): feat_set = feat_set_video else: feat_set = np.vstack((feat_set, feat_set_video)) if np.all(labels_all == 0): labels_all = labels else: labels_all = np.hstack((labels_all, labels)) if np.all(detailedLabels_all == 0): detailedLabels_all = detailedLabels_video else: detailedLabels_all = np.vstack((detailedLabels_all, detailedLabels_video)) frameCount = len(labels_all) print('saving exported feat_set(', feat_set.shape, ') into(', hogFeatsFileNameFull, ')') np.save(hogFeatsFileNameFull, feat_set) np.save(labelsFileNameFull, labels_all) np.save(detailedLabelsFileNameFull, detailedLabels_all) return feat_set, labels_all, detailedLabels_all
import helperFuncs as funcH import dataLoaderFuncs as funcD import time ## extra imports to set GPU options ################################### # TensorFlow wizardry config = tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed config.gpu_options.allow_growth = True # Only allow a total of half the GPU memory to be allocated config.gpu_options.per_process_gpu_memory_fraction = 0.5 # Create a session with the above options specified. K.tensorflow_backend.set_session(tf.Session(config=config)) base_dir = funcH.getVariableByComputerName('base_dir') data_dir = funcH.getVariableByComputerName('data_dir') results_dir = funcH.getVariableByComputerName('results_dir') posterior_dim = int(argv[1])# K number of clusters weight_of_regularizer = float(argv[2]) #sparsity parametresi (a trade-off between reconstruction vs clustering) trainMode = str(argv[3]) #trainMode = {'sae','cosae','corsa'} corr_randMode = bool(int(argv[4])) dataToUse = argv[5] #dataToUse = {'hog','resnet18','sn256'} if trainMode=="sae": applyCorr = 0.0 elif trainMode == "cosae": applyCorr = 2.0 elif trainMode=="corsa": applyCorr = 0.0
def main(argv): np.set_printoptions(formatter={"float_kind": lambda x: "%g" % x}) params_dict = parseArgs(argv) numOfSigns = params_dict["numOfSigns"] # 11 or 41 clusterModel = params_dict["clusterModel"] # 'KMeans', 'GMM_diag', 'Spectral' params_dict["hostName"] = socket.gethostname() initialLabelVec, expNameEnd = decode_initial_label_param(params_dict["initialLabel"]) clusterLabelUpdateInterval = params_dict["clusterLabelUpdateInterval"] print('you are running this train function on = <', params_dict["hostName"], '>') input_initial_resize, input_size, batch_size, num_workers = initSomeVals(params_dict) train_data_transform, valid_data_transform = getTransformFuncs(input_size, input_initial_resize) base_dir = funcH.getVariableByComputerName('base_dir') # dataPath and dataFolder data_dir = funcH.getVariableByComputerName('data_dir') # bdData results_dir = funcH.getVariableByComputerName('results_dir').replace("bdResults", "dcResults") labelsDir = funcH.getVariableByComputerName('results_dir').replace("bdResults", "dcLabels") modelsDir = os.path.join(base_dir, 'dcModels') nnVidsDir = os.path.join(base_dir, 'neuralNetHandVideos_' + str(numOfSigns)) expName = params_dict["modelName"] + '_' + \ params_dict["clusterModel"] + \ '_pd' + str(params_dict["posterior_dim"]) + \ '_clui' + str(params_dict["clusterLabelUpdateInterval"]) + \ '_' + str(numOfSigns) + \ expNameEnd labelSaveFolder = os.path.join(labelsDir, expName) resultMatFile = os.path.join(results_dir, 'rMF_' + expName) funcH.createDirIfNotExist(results_dir) funcH.createDirIfNotExist(labelsDir) funcH.createDirIfNotExist(modelsDir) funcH.createDirIfNotExist(labelSaveFolder) epochFr, epochTo = setEpochBounds(labelSaveFolder, params_dict["epochs"], params_dict["appendEpochBinary"]) train_dataset = HandShapeDataset(root_dir=nnVidsDir, istrain=True, transform=train_data_transform, datasetname='nnv') val_dataset = HandShapeDataset(root_dir=nnVidsDir, istrain=False, transform=valid_data_transform, datasetname='nnv') num_classes = np.unique(train_dataset.labels).size print('trainCnt = ', len(train_dataset)) print('valCnt = ', len(val_dataset)) model, optimizer, updatedModelFile = getModel(params_dict, modelsDir, expName) num_ftrs = model.fc.in_features print('num_classes = ', num_classes, ', num_ftrs = ', num_ftrs, flush=True) epochStartTime = time.time() dsLoad_train_train = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) dsLoad_train_featExtract = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) model.eval() # evaluate the model to extract # trAccInit : to save as initial training accuracy # featTrInit : features to cluster, also saved as result features in -saveFeatsExtracted- # labelsTrInit : # predictionsTrInit : trAccInit, _, featTrInit, labelsTrInit, predictionsTrInit = runValidDs(model, dsLoad_train_featExtract, return_feats=True, layerSize=num_ftrs) saveFeatsExtracted(data_dir, epochFr, params_dict["modelName"], expName, featTrInit, labelsTrInit, predictionsTrInit) labelSaveFileName = labelSaveFolder + os.sep + 'labels_{:03d}.npz'.format(epochFr) predClusters, resultRow = iterate_1(featTrInit, labelsTrInit, predictionsTrInit, params_dict["posterior_dim"], labelSaveFileName, epochFr-1, epochTo, trAccInit, epochStartTime, clusterModel=clusterModel, initialLabelVec=initialLabelVec) train_dataset = updateTrainLabels(train_dataset, clusterLabelUpdateInterval, epochFr, predClusters=predClusters, initialLabelVec=initialLabelVec) resultMat = [] resultMat = resultMat + resultRow.tolist() if not os.path.isfile(resultMatFile): np.savetxt(resultMatFile, np.array(resultRow).reshape(1, -1), fmt='%4.3f', delimiter='*', newline=os.linesep, header='ep * tr_acc_epoch * nmi_lab * nmi_lab_nz * acc_lab * acc_lab_nz * nmi_pred * nmi_pred_nz * acc_pred * acc_pred_nz', footer='', comments='', encoding=None) else: f = open(resultMatFile, 'a') np.savetxt(f, np.array(resultRow).reshape(1, -1), fmt='%4.3f', delimiter='*', newline=os.linesep, header='', footer='', comments='', encoding=None) f.close() for ep in range(epochFr, epochTo): model.train() # Set model to training mode epochStartTime = time.time() _, _ = runTrainDs(model, optimizer, dsLoad_train_train) model.eval() tr_acc_epoch, _, features_avgPool, labels_avgPool, predictionsTr = \ runValidDs(model, dsLoad_train_featExtract, return_feats=True, layerSize=num_ftrs) labelSaveFileName = labelSaveFolder + os.sep + 'labels_{:03d}.npz'.format(ep+1) predClusters, resultRow = iterate_1(features_avgPool, labelsTrInit, predictionsTr, params_dict["posterior_dim"], labelSaveFileName, ep, epochTo, tr_acc_epoch, epochStartTime, clusterModel=clusterModel, initialLabelVec=initialLabelVec) resultMat = resultMat + resultRow.tolist() train_dataset = updateTrainLabels(train_dataset, clusterLabelUpdateInterval, ep+1, predClusters=predClusters) saveFeatsExtracted(data_dir, ep, params_dict["modelName"], expName, features_avgPool, labelsTrInit, predictionsTr) saveToResultMatFile(resultMatFile, resultRow) torch.save(model, f=updatedModelFile)
def loadPCAData(dataToUse, normMode, numOfSigns, data_dim, skipLoadOfOriginalData, data_dir, base_dir = funcH.getVariableByComputerName('base_dir')): # normMode = str(modelParams["normMode"]) pcaFeatsFileName = getFileName(dataToUse=dataToUse, normMode=normMode, pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='PCA') hogFeatsFileName = getFileName(dataToUse=dataToUse, normMode=normMode, pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Data') if not skipLoadOfOriginalData: feat_set, _, _ = loadData_hog(base_dir=base_dir, data_dir=data_dir, loadHogIfExist=True, numOfSigns=numOfSigns) else: #load the data feat_set = loadFileIfExist(data_dir, hogFeatsFileName) feat_set_pca = loadFileIfExist(data_dir, pcaFeatsFileName) if feat_set_pca.size == 0: feat_set_pca = applyPCA2Data(feat_set, data_dir, data_dim, dataToUse=dataToUse, numOfSigns=numOfSigns, loadIfExist=True) return feat_set_pca
def loadSkeletonDataFromVideosFolders(base_dir=funcH.getVariableByComputerName('base_dir'), data_dir=funcH.getVariableByComputerName('data_dir'), loadIfExist=True, numOfSigns=11): videosFolderName = 'neuralNetHandVideos_' + str(numOfSigns) base_dir_train_feat = os.path.join(base_dir, videosFolderName) featsFileNameFull, labelsFileNameFull, detailedLabelsFileNameFull, _ = getFileNameGroup(dataToUse='skeleton', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Data', data_dir=data_dir) # 'skeletonFeats_41.npy' or 'skeletonFeats_11.npy' if loadIfExist and os.path.isfile(featsFileNameFull) and os.path.isfile(labelsFileNameFull) and os.path.isfile(detailedLabelsFileNameFull): print('loading exported feat_set from(', featsFileNameFull, ')') feat_set = np.load(featsFileNameFull) labels_all = np.load(labelsFileNameFull) detailedLabels_all = np.load(detailedLabelsFileNameFull) print('loaded exported feat_set(', feat_set.shape, ') from(', featsFileNameFull, ')') else: detailedLabels_all = np.array([0, 0, 0, 0]) labels_all = np.array([0, 0, 0, 0]) feat_set = np.array([0, 0, 0, 0]) foldernames = np.sort(os.listdir(base_dir_train_feat)) signID = 0 frameCount = 0 for f in foldernames: sign_folder = os.path.join(base_dir_train_feat, str(f).format(':02d')) if not os.path.isdir(sign_folder): continue signID = signID + 1 videoID = 0 videos = np.sort(os.listdir(sign_folder)) print(f) print('going to create hog from sign folder(', sign_folder, ')') for v in videos: video_folder = os.path.join(sign_folder, v) if not os.path.isdir(video_folder): continue videoID = videoID + 1 print('going to create hog from video folder(', video_folder, ')') frames = os.listdir(video_folder) feat_set_video = np.array([0, 0, 0, 0]) skelFeat_file = [os.path.join(video_folder, f) for f in os.listdir(video_folder) if f.endswith('skel.txt')] featsMat = pd.read_csv(skelFeat_file[0], header=None) frameCntSkel = featsMat.shape[0] labels_file = [os.path.join(video_folder, f) for f in os.listdir(video_folder) if f.endswith('labels.txt')] labels = np.loadtxt(os.path.join(video_folder, labels_file[0])) frameCntLabel = len(labels) frameList = video_folder + os.sep + '*.png' pngCount = len(glob.glob(frameList)) assert (pngCount==frameCntLabel & pngCount==frameCntSkel), \ "these three values must be same frameCntSkel(" + str(frameCntSkel) + ")" + \ "frameCntLabel(" + str(frameCntLabel) + ")" + \ "pngCount(" + str(pngCount) + ")" fr = frameCount to = frameCount + frameCntLabel frCnt = to - fr frameIDs = np.asarray(range(fr, to)).reshape(frCnt, -1) detailedLabels_video = np.hstack((signID * np.ones([frCnt, 1]), videoID * np.ones([frCnt, 1]), frameIDs, np.asarray(labels).reshape(frCnt, -1))) if np.all(feat_set == 0): feat_set = featsMat else: feat_set = np.vstack((feat_set, featsMat)) if np.all(labels_all == 0): labels_all = labels else: labels_all = np.hstack((labels_all, labels)) if np.all(detailedLabels_all == 0): detailedLabels_all = detailedLabels_video else: detailedLabels_all = np.vstack((detailedLabels_all, detailedLabels_video)) frameCount = len(labels_all) print('saving exported feat_set(', feat_set.shape, ') into(', featsFileNameFull, ')') np.save(featsFileNameFull, feat_set) print('saving labels(', labels_all.shape, ') into(', labelsFileNameFull, ')') np.save(labelsFileNameFull, labels_all) print('saving detailedLabels(', detailedLabels_all.shape, ') into(', detailedLabelsFileNameFull, ')') np.save(detailedLabelsFileNameFull, detailedLabels_all) return feat_set, labels_all, detailedLabels_all
def checkCreateData2Use(sign_count, dataToUse, recreate=False, recreate_hog=False): base_dir = funcH.getVariableByComputerName('base_dir') data_dir = funcH.getVariableByComputerName('data_dir') nnfolderBase = os.path.join(base_dir, 'neuralNetHandVideos_' + str(sign_count)) print('dataToUse:', dataToUse) # dataToUse: sn print('signCnt:', sign_count) # signCnt: 41 print('nnfolderBase:', nnfolderBase ) # nnfolderBase: / home / dg / DataPath / neuralNetHandVideos_41 print('exist(nnfolderBase):', os.path.isdir(nnfolderBase)) # exist(nnfolderBase): False if dataToUse == 'sn': featureStr = 'surfImArr_all' labelStr = 'labelVecs_all' possible_fname_init = ['surfImArr', 'snFeats'] dimArray = [256, 512, 1024] convertMat2NPY = True elif dataToUse == 'skeleton': featureStr = 'skel_all' labelStr = 'labelVecs_all' possible_fname_init = ['skeleton', 'skelFeats'] dimArray = [32, 64, 96] convertMat2NPY = True elif dataToUse == 'hog': dimArray = [256, 512, 1024] convertMat2NPY = False elif dataToUse == 'hgsnsk' or dataToUse == 'hgsn' or dataToUse == 'hgsk' or dataToUse == 'snsk': dimArray = [256, 512, 1024] convertMat2NPY = False else: os.exit(5) if convertMat2NPY: _ = funcHP.convert_Mat2NPY(dataToUse, data_dir, sign_count, featureStr, labelStr, possible_fname_init, recreate=recreate) elif dataToUse == 'hog': _ = funcD.loadData_hog(loadHogIfExist=not recreate_hog, numOfSigns=sign_count) # /home/dg/DataPath/bdData/snFeats_41.mat is loaded:) # ['__globals__', '__header__', '__version__', 'knownKHSlist', 'labelVecs_all', 'surfImArr_all'] # saving labels((104472,)) at: / home / dg / DataPath / bdData / labels_41.npy # saving snFeats((104472, 1600)) at: / home / dg / DataPath / bdData / snFeats_41.npy for normMode in ['']: _ = funcHP.createPCAOfData(data_dir, dataToUse, sign_count, recreate=recreate, normMode=normMode) # loaded sn_feats((104472, 1600)) from: / home / dg / DataPath / bdData / snPCA_41.npy # Max of featsPCA = 0.003667559914686907, Min of featsPCA = -0.0028185132292039457 for normMode in ['']: funcHP.createPCADimsOfData(data_dir, dataToUse, sign_count, dimArray, recreate=recreate, normMode=normMode) # loaded sn Feats( (104472, 1600) ) from : /home/dg/DataPath/bdData/snPCA_41.npy # Max of featsPCA = 0.003667559914686907 , Min of featsPCA = -0.0028185132292039457 # features.shape: (104472, 256) # saving pca sn features at : /home/dg/DataPath/bdData/sn256Feats_41.npy # features.shape: (104472, 512) # saving pca sn features at : /home/dg/DataPath/bdData/sn512Feats_41.npy # features.shape: (104472, 1024) # saving pca sn features at : /home/dg/DataPath/bdData/sn1024Feats_41.npy for normMode in ['']: for dims in dimArray: funcD.getCorrespondentFrames(base_dir=base_dir, data_dir=data_dir, featType=dataToUse, numOfSigns=sign_count, pcaCount=dims, expectedFileType='Data', normMode=normMode) funcD.getCorrespondentFrames(base_dir=base_dir, data_dir=data_dir, featType=dataToUse, numOfSigns=sign_count, pcaCount=-1, expectedFileType='Data', normMode='')