コード例 #1
0
def runForBaseClusterResults(normMode,
                             randomSeed=5,
                             clusterModels=['KMeans', 'GMM_diag'],
                             dataToUseArr=["hog", "skeleton", "sn"],
                             numOfSignsArr=[11],
                             clustCntVec=None):
    data_dir = funcH.getVariableByComputerName('data_dir')
    results_dir = funcH.getVariableByComputerName('results_dir')

    for dataToUse in dataToUseArr:
        for numOfSigns in numOfSignsArr:
            if clustCntVec is None:
                clustCntVec = [64, 128, 256]
            if dataToUse == 'skeleton':
                dimArray = [32, 64, 96]
            else:  # dataToUse == 'sn' or dataToUse == 'hog':
                dimArray = [256]
            for dims in dimArray:
                funcHP.runClusteringOnFeatSet(data_dir=data_dir,
                                              results_dir=results_dir,
                                              dataToUse=dataToUse,
                                              normMode=normMode,
                                              numOfSigns=numOfSigns,
                                              pcaCount=dims,
                                              expectedFileType='Data',
                                              clustCntVec=clustCntVec,
                                              clusterModels=clusterModels,
                                              randomSeed=randomSeed)
コード例 #2
0
def main(argv):
    base_dir = funcH.getVariableByComputerName('base_dir')
    data_dir = funcH.getVariableByComputerName('data_dir')
    results_dir = funcH.getVariableByComputerName('results_dir')
    print(argv)

    modelParams, trainParams, rnnParams = parseArgs(argv)

    seed(trainParams["randomSeed"])
    tf.set_random_seed(seed=trainParams["randomSeed"])

    numOfSigns = modelParams["numOfSigns"]
    feat_set, labels_all, detailed_labels_all = funcPRH.loadData(modelParams, numOfSigns, data_dir)
    data_dim = feat_set.shape[1]

    exp_name, subEpochs, trainParams, rnnParams = getInitParams(trainParams, modelParams, rnnParams)
    csv_name, model_name, outdir = funcPRH.createExperimentDirectories(results_dir, exp_name)
    model, modelTest, ES = funcM.getModels(data_dim=data_dim, modelParams=modelParams, rnnParams=rnnParams)

    checkpointer = ModelCheckpoint(filepath=model_name, verbose=0, save_best_only=False, period=1)
    csv_logger = CSVLogger(csv_name, append=True, separator=';')

    #%%
    trainFromScratch = False
    predictionLabelsDir = results_dir + os.sep + 'results' + os.sep + exp_name
    model, epochFr, epochTo = initEpochIDsModelParams(trainFromScratch, trainParams, model, model_name, predictionLabelsDir)

    if epochFr == epochTo:
        print("+*-+*-+*-+*-epochs completed+*-+*-+*-+*-")
        exit(12)

    modelParams["callbacks"] = [csv_logger, ES, checkpointer]
    modelParams["model_name"] = model_name
    trainParams["subEpochs"] = subEpochs
    trainParams["epochFr"] = epochFr
    trainParams["epochTo"] = epochTo
    trainParams["corr_indis_a"] = np.mod(epochFr, 2) if epochFr != 0 else np.mod(int(trainParams["corr_swapMode"]) + int(trainParams["corr_randMode"]), 2)
    if trainParams["applyCorr"] >= 1:
        trainParams["corrFramesAll"] = funcD.getCorrespondentFrames(base_dir=base_dir, data_dir=data_dir, featType=modelParams["dataToUse"],
                                                                    normMode=modelParams["normMode"], pcaCount=modelParams["pcaCount"], numOfSigns=numOfSigns,
                                                                    expectedFileType='Data')

    print('started training')

    directoryParams = {
        "outdir": outdir,
        "data_dir" : data_dir,
        "predictionLabelsDir": predictionLabelsDir,
        "nmi_and_acc_file_name": outdir + os.sep + exp_name + '_nmi_acc.txt'
    }

    if modelParams["trainMode"] == "rsa" or modelParams["trainMode"] == "corsa":
        funcTL.trainRNN(trainParams, modelParams, rnnParams, detailed_labels_all, model, modelTest, feat_set, labels_all, directoryParams)
    else:
        funcTL.trainFramewise(trainParams, modelParams, model, modelTest, feat_set, labels_all, directoryParams)
コード例 #3
0
def get_create_folders(params_dict):
    data_path_base = params_dict["data_path_base"]

    data_ident = 'data_' + params_dict["data_ident"]
    base_dir = funcH.getVariableByComputerName('base_dir')  # xx/DataPath or xx/DataFolder
    results_dir = os.path.join(base_dir, 'sup', 'results_mi' + str(params_dict["model_id"]))
    models_dir = os.path.join(base_dir, 'sup', 'models_mi' + str(params_dict["model_id"]))
    data_params_folder = os.path.join(base_dir, 'sup', 'data_mi', data_ident)

    data_path_base = os.path.join(base_dir, data_path_base, "imgs")
    result_fold = os.path.join(base_dir, 'sup', 'preds_' + params_dict["modelName"], 'pred_' + params_dict["exp_ident"])

    path_dict = {
        "results": results_dir,  # folder="~/DataFolder/sup/results_mi1"
        "models": models_dir,
        "data_base": data_path_base,  # original path of data to load
        "data_params_folder": data_params_folder,  # data params folder
        "result_fold": result_fold,  # to save the predictions and labels
    }

    funcH.createDirIfNotExist(results_dir)
    funcH.createDirIfNotExist(models_dir)
    funcH.createDirIfNotExist(data_params_folder)
    funcH.createDirIfNotExist(result_fold)

    return path_dict
コード例 #4
0
def decode_initial_label_param(initialLabelParam):

    if initialLabelParam is None:
        initialLabelVec = None
        expNameEnd = ""
    else:
        initialLabelVecStrings = initialLabelParam.split("_")
        if initialLabelVecStrings[0] == 'fn':
            fileName_end = initialLabelVecStrings[1]  # 'baseResults-hgsk256-11-KMeans-256.npz'
            results_dir = funcH.getVariableByComputerName('results_dir').replace("bdResults", "dcResults")
            labelFileFullName = os.path.join(results_dir, fileName_end)
            if fileName_end.startswith("baseResults"):
                #  np.savez(predictionFileNameFull, labels_all, predClusters)
                npzDict = np.load(labelFileFullName, allow_pickle=True)
                initialLabelVec = npzDict["arr_1"]
            elif fileName_end.startswith("cosae"):
                initialLabelVec = np.load(labelFileFullName)
            else:
                print('Not a valid input')
                os._exit(29)
            expNameEnd = fileName_end
        else:
            print('Not implemented yet')
            os._exit(30)
        if initialLabelVecStrings[0] == "baseResults":
            print('Not implemented yet')
            os._exit(30)

    return initialLabelVec, expNameEnd
コード例 #5
0
def load_usps(data_path = os.path.join(getVariableByComputerName("n2d_experiments"), 'usps', 'data')):
    createDirIfNotExist(data_path)

    file_name_tr = os.path.join(data_path, 'usps_train.jf')
    file_name_te = os.path.join(data_path, 'usps_test.jf')
    link_adr_path = 'https://raw.githubusercontent.com/cvjena/ITAL/master/data/usps_<trte>.jf'
    if not os.path.exists(file_name_tr):
        download_file(link_adr_path.replace("<trte>", "train"), save2path=data_path, savefilename='usps_train.jf')
        #os.system('wget http://www-i6.informatik.rwth-aachen.de/~keysers/usps_train.jf.gz -P %s' % data_path)
        download_file(link_adr_path.replace("<trte>", "test"), save2path=data_path, savefilename='usps_test.jf')
        #os.system('wget http://www-i6.informatik.rwth-aachen.de/~keysers/usps_test.jf.gz -P %s' % data_path)

    with open(file_name_tr) as f:
        data = f.readlines()
    data = data[1:-1]
    data = [list(map(float, line.split())) for line in data]
    data = np.array(data)
    data_train, labels_train = data[:, 1:], data[:, 0]

    with open(file_name_te) as f:
        data = f.readlines()
    data = data[1:-1]
    data = [list(map(float, line.split())) for line in data]
    data = np.array(data)
    data_test, labels_test = data[:, 1:], data[:, 0]

    x = np.concatenate((data_train, data_test)).astype('float64')
    y = np.concatenate((labels_train, labels_test))
    print('USPS samples', x.shape)
    return x, y
コード例 #6
0
def load_pendigits(data_path = os.path.join(getVariableByComputerName("n2d_experiments"), 'pendigits', 'data')):
    createDirIfNotExist(data_path)
    file_name_tr = os.path.join(data_path, 'pendigits.tra')
    file_name_te = os.path.join(data_path, 'pendigits.tes')
    link_adr_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits<file_ending>'
    if not os.path.exists(file_name_tr):
        os.makedirs(data_path,  exist_ok=True)
        download_file(link_adr_path.replace("<file_ending>", ".tra"), save2path=data_path, savefilename='pendigits.tra')
        #os.system('wget https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.tra -P %s' % data_path)
        download_file(link_adr_path.replace("<file_ending>", ".tes"), save2path=data_path, savefilename='pendigits.tes')
        #os.system('wget https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.tes -P %s' % data_path)
        download_file(link_adr_path.replace("<file_ending>", ".names"), save2path=data_path, savefilename='pendigits.names')
        #os.system('wget https://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.names -P %s' % data_path)

    # load training data
    with open(file_name_tr) as file:
        data = file.readlines()
    data = [list(map(float, line.split(','))) for line in data]
    data = np.array(data).astype(np.float32)
    data_train, labels_train = data[:, :-1], data[:, -1]

    # load testing data
    with open(file_name_te) as file:
        data = file.readlines()
    data = [list(map(float, line.split(','))) for line in data]
    data = np.array(data).astype(np.float32)
    data_test, labels_test = data[:, :-1], data[:, -1]

    x = np.concatenate((data_train, data_test)).astype('float32')
    y = np.concatenate((labels_train, labels_test))
    x /= 100.
    y = y.astype('int')
    return x, y
コード例 #7
0
def createCombinedDatasets(numOfSigns=11):
    data_dir = funcH.getVariableByComputerName('data_dir')
    fName_sn = funcD.getFileName('sn', '', -1, numOfSigns, 'Data')
    fName_hg = funcD.getFileName('hog', '', -1, numOfSigns, 'Data')
    fName_sk = funcD.getFileName('skeleton', '', -1, numOfSigns, 'Data')

    X_sn = funcD.loadFileIfExist(directoryOfFile=data_dir, fileName=fName_sn)
    X_hg = funcD.loadFileIfExist(directoryOfFile=data_dir, fileName=fName_hg)
    X_sk = funcD.loadFileIfExist(directoryOfFile=data_dir, fileName=fName_sk)

    print('X_sn.shape = ', X_sn.shape)
    print('X_hg.shape = ', X_hg.shape)
    print('X_sk.shape = ', X_sk.shape)

    X_hgsnsk = np.concatenate([X_hg.T, X_sn.T, X_sk.T]).T
    X_hgsn = np.concatenate([X_hg.T, X_sn.T]).T
    X_hgsk = np.concatenate([X_hg.T, X_sk.T]).T
    X_snsk = np.concatenate([X_sn.T, X_sk.T]).T

    print('X_hgsnsk.shape = ', X_hgsnsk.shape)
    print('X_hgsn.shape = ', X_hgsn.shape)
    print('X_hgsk.shape = ', X_hgsk.shape)
    print('X_snsk.shape = ', X_snsk.shape)

    fName_hgsnsk = os.path.join(data_dir, fName_hg.replace("hog", "hgsnsk"))
    fName_hgsn = os.path.join(data_dir, fName_hg.replace("hog", "hgsn"))
    fName_hgsk = os.path.join(data_dir, fName_hg.replace("hog", "hgsk"))
    fName_snsk = os.path.join(data_dir, fName_hg.replace("hog", "snsk"))

    if os.path.isfile(os.path.join(data_dir, fName_hgsnsk)):
        _ = np.load(fName_hgsnsk)
    else:
        np.save(fName_hgsnsk, X_hgsnsk)

    if os.path.isfile(os.path.join(data_dir, fName_hgsn)):
        _ = np.load(fName_hgsn)
    else:
        np.save(fName_hgsn, X_hgsn)

    if os.path.isfile(os.path.join(data_dir, fName_hgsk)):
        _ = np.load(fName_hgsk)
    else:
        np.save(fName_hgsk, X_hgsk)

    if os.path.isfile(os.path.join(data_dir, fName_snsk)):
        _ = np.load(fName_snsk)
    else:
        np.save(fName_snsk, X_snsk)


#  resultDict = funcHP.runClusteringOnFeatSet(data_dir=funcH.getVariableByComputerName('data_dir'),
#                                           results_dir=funcH.getVariableByComputerName('results_dir'),
#                                           dataToUse='skeleton', numOfSigns=11, pcaCount=32,
#                                           expectedFileType='Data', clusterModels=['KMeans', 'GMM_diag'], randomSeed=5)
#  runForBaseClusterResults(normMode='', clusterModels = ['KMeans', 'GMM_diag'])
#  runForBaseClusterResults_OPTICS(randomSeed = 5, clustCntVec = [32, 64])
#  run4All_createData(sign_countArr=[12])
#  createCombinedDatasets(numOfSigns = 41)
#  checkCreateData2Use(41, "snsk", recreate=False, recreate_hog=False)
コード例 #8
0
def runForBaseClusterResults_OPTICS(randomSeed=5,
                                    clustCntVec=[32, 64, 128, 256, 512],
                                    dataToUseArr=["hog", "skeleton", "sn"],
                                    numOfSignsVec=[11, 41]):
    data_dir = funcH.getVariableByComputerName('data_dir')
    results_dir = funcH.getVariableByComputerName('results_dir')
    for dataToUse in dataToUseArr:
        for numOfSigns in numOfSignsVec:
            if dataToUse == 'skeleton':
                dimArray = [96]  # 32, 64,
            else:  # dataToUse == 'sn' or dataToUse == 'hog':
                dimArray = [256]  # 512
            for dims in dimArray:
                funcHP.runOPTICSClusteringOnFeatSet(data_dir=data_dir,
                                                    results_dir=results_dir,
                                                    dataToUse=dataToUse,
                                                    numOfSigns=numOfSigns,
                                                    pcaCount=dims,
                                                    expectedFileType='Data',
                                                    clustCntVec=clustCntVec,
                                                    randomSeed=randomSeed)
コード例 #9
0
def load_har(data_path = os.path.join(getVariableByComputerName("n2d_experiments"), 'har', 'data')):
    # load this dataset this way ??
    # https://pypi.org/project/kcc2020-tutorial-HAR-dataset/
    # entire_dataset = load_har_all()
    createDirIfNotExist(data_path)
    fold_train = os.path.join(data_path, 'train')
    fold_test = os.path.join(data_path, 'test')
    createDirIfNotExist(fold_train)
    createDirIfNotExist(fold_test)
    fname_train_x = os.path.join(fold_train, 'X_train.txt')
    fname_train_y = os.path.join(fold_train, 'y_train.txt')
    fname_test_x = os.path.join(fold_test, 'X_test.txt')
    fname_test_y = os.path.join(fold_test, 'y_test.txt')

    # https://github.com/mollybostic/cleaning-data-assignment/tree/master/UCI%20HAR%20Dataset
    # for windows = https://sourceforge.net/projects/gnuwin32/files/wget/1.11.4-1/wget-1.11.4-1-setup.exe/download
    # https://stackoverflow.com/questions/29113456/wget-not-recognized-as-internal-or-external-command

    link_adr_path = 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI%20HAR%20Dataset/<trte>/<Xy>_<trte>.txt'
    if not os.path.isfile(fname_train_x):
        print('downloading X_train.txt(66.0MB)')
        download_file(link_adr_path.replace("<trte>", "train").replace("<Xy>", "X"), save2path=fold_train, savefilename='X_train.txt')
        #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/train/X_train.txt' -P %s" % fold_train)
        print('downloading y_train.txt(14.7kB)')
        download_file(link_adr_path.replace("<trte>", "train").replace("<Xy>", "y"), save2path=fold_train, savefilename='y_train.txt')
        #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/train/y_train.txt' -P %s" % fold_train)
        print('downloading X_test.txt(26.5MB)')
        download_file(link_adr_path.replace("<trte>", "test").replace("<Xy>", "X"), save2path=fold_test, savefilename='X_test.txt')
        #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/test/X_test.txt' -P %s" % fold_test)
        print('downloading y_test.txt(5.9kB)')
        download_file(link_adr_path.replace("<trte>", "test").replace("<Xy>", "y"), save2path=fold_test, savefilename='y_test.txt')
        #os.system("wget --no-verbose 'https://raw.githubusercontent.com/mollybostic/cleaning-data-assignment/master/UCI HAR Dataset/test/y_test.txt' -P %s" % fold_test)

    x_train = pd.read_csv(fname_train_x, sep=r'\s+', header=None)
    y_train = pd.read_csv(fname_train_y, header=None)
    x_test = pd.read_csv(fname_test_x, sep=r'\s+', header=None)
    y_test = pd.read_csv(fname_test_y, header=None)
    x = np.concatenate((x_train, x_test))
    y = np.concatenate((y_train, y_test))
    # # labels start at 1 so..
    y = y - 1
    y = y.reshape((y.size,))
    y_names = {0: 'Walking', 1: 'Upstairs', 2: 'Downstairs', 3: 'Sitting', 4: 'Standing', 5: 'Laying', }
    os.error("not implemented")
    return x, y, y_names
コード例 #10
0
def get_create_folders(params_dict):
    data_path_base = params_dict["data_path_base"]

    data_ident = 'data_' + params_dict["data_ident"]
    base_dir = funcH.getVariableByComputerName(
        'base_dir')  # xx/DataPath or xx/DataFolder
    results_dir = os.path.join(base_dir, 'sup',
                               'results_' + params_dict["modelName"])
    models_dir = os.path.join(base_dir, 'sup',
                              'models_' + params_dict["modelName"])
    data_path_fill = os.path.join(base_dir, 'sup', 'data', data_ident)

    exp_ident_str = 'rs' + str(params_dict["randomSeed"]).zfill(2)
    data_path_train = os.path.join(
        data_path_fill, data_path_base + '_' + exp_ident_str + '_tr')
    data_path_valid = os.path.join(
        data_path_fill, data_path_base + '_' + exp_ident_str + '_va')
    data_path_test = os.path.join(data_path_fill,
                                  data_path_base + '_' + exp_ident_str + '_te')
    data_path_base = os.path.join(base_dir, data_path_base, "imgs")
    result_fold = os.path.join(base_dir, 'sup',
                               'preds_' + params_dict["modelName"],
                               'pred_' + params_dict["exp_ident"])

    path_dict = {
        "results": results_dir,  # folder="~/DataFolder/sup/results"
        "models": models_dir,
        "data_base": data_path_base,  # original path of data to load
        "train": data_path_train,  # train data to create
        "valid": data_path_valid,  # valid data to create
        "test": data_path_test,  # test data to create
        "result_fold": result_fold,  # to save the predictions and labels
    }

    funcH.createDirIfNotExist(results_dir)
    funcH.createDirIfNotExist(models_dir)
    funcH.createDirIfNotExist(data_path_train)
    funcH.createDirIfNotExist(data_path_valid)
    funcH.createDirIfNotExist(data_path_test)
    funcH.createDirIfNotExist(result_fold)

    return path_dict
コード例 #11
0
def n_run_autoencode(x, args):
    global debug_string_out
    # input_dict :
    # fit_verbose
    input_dict = argparse.ArgumentParser(description='func_autoencode', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    input_dict.add_argument('--experiments_folder_base', default=funcH.getVariableByComputerName("n2d_experiments"))
    input_dict.add_argument('--n_clusters', default=10, type=int)
    input_dict.add_argument('--dataset', default='mnist')
    input_dict.add_argument('--batch_size', default=256, type=int)
    input_dict.add_argument('--pretrain_epochs', default=100, type=int)
    input_dict.add_argument('--fit_verbose', default=True, type=bool)
    args2 = funcH._parse_args(input_dict, args, print_args=True)

    shape = [x.shape[-1], 500, 500, 2000, args2.n_clusters]
    ae = _autoencoder(shape)
    hidden = ae.get_layer(name='encoder_%d' % (len(shape) - 2)).output
    encoder = Model(inputs=ae.input, outputs=hidden)

    print("checking if ", args.experiment_names_and_folders["file_name_ae_weights_full"], " exist.")
    weights_file = args.experiment_names_and_folders["file_name_ae_weights_full"]
    load_file_skip_learning = os.path.isfile(weights_file)
    t = funcH.Timer()

    # Pretrain autoencoders before clustering
    if load_file_skip_learning:
        debug_string_out = funcH.print_and_add("Load weigths from(" + weights_file + ")", debug_string_out)
        ae.load_weights(weights_file)
    else:
        optimizer = 'adam'
        ae.compile(loss='mse', optimizer=optimizer)
        ae.fit(x, x, batch_size=args2.batch_size, epochs=args2.pretrain_epochs, verbose=1)
        t.end()
        ae.save_weights(weights_file)
        debug_string_out = funcH.print_and_add("Time to train the ae: " + t.get_elapsed_time(), debug_string_out)

    with open(args.experiment_names_and_folders["file_name_ae_params_text_full"], 'w') as f:
        f.write("\n".join([str(k)+":"+str(args2.__dict__[k]) for k in args2.__dict__]))

    hl = encoder.predict(x)
    return hl
コード例 #12
0
def script_hgsk():
    global debug_string_out
    pretrain_epochs = [10]
    ml = "UMAP"
    ds = "hgsk_256_41"
    for cluster in ['KM', 'GMM']:
        for ae_epoc in pretrain_epochs:
            for clust_cnt in [512, 1024]: #  umap_dim = 20, n_clusters_ae = 20, umap_neighbors = 40
                for umap_neighbors in [20, 30, 40]:
                    try:
                        debug_string_out.clear()
                        main(["--dataset", ds, "--gpu", "0",
                              "--pretrain_epochs", str(ae_epoc),
                              "--n_clusters", str(clust_cnt), "--cluster", cluster,
                              "--umap_dim", str(clust_cnt), "--umap_neighbors", str(umap_neighbors),
                              "--manifold_learner", ml, "--umap_min_dist", "0.00"])
                    except Exception as e:
                        debug_string_out = funcH.print_and_add(ds + '_' + ml + " - problem", debug_string_out)
                        debug_string_out = funcH.print_and_add(str(e), debug_string_out)
                        exp_date_str = str(datetime.now().strftime("%Y%m%d_%H%M")).replace('-', '')  # %S
                        with open(os.path.join(funcH.getVariableByComputerName("n2d_experiments"), ds + '_' + ml + '_error_' + exp_date_str + '.txt'), 'w') as f:
                            f.write("\n".join(debug_string_out))
コード例 #13
0
def script():
    global debug_string_out
    pretrain_epochs = [10, 50]
    manifold_learners_all = ["UMAP"]
    dataset_names_all = ["cifar10", "mnist", "pendigits", "fashion"]  # , "usps", "har"
    cluster_func = "HDBSCAN"
    for ds in dataset_names_all:
        for ml in manifold_learners_all:
            for ae_epoc in pretrain_epochs:
                for clust_cnt in [20]: #  umap_dim = 20, n_clusters_ae = 20, umap_neighbors = 40
                    try:
                        debug_string_out = []
                        main(["--dataset", ds, "--gpu", "0",
                              "--pretrain_epochs", str(ae_epoc),
                              "--n_clusters", str(clust_cnt), '--cluster', str(cluster_func),
                              "--umap_dim", str(clust_cnt), "--umap_neighbors", str(2*clust_cnt),
                              "--manifold_learner", ml, "--umap_min_dist", "0.00"])
                    except Exception as e:
                        debug_string_out = funcH.print_and_add(ds + '_' + ml + " - problem \n" + str(e), debug_string_out)
                        exp_date_str = str(datetime.now().strftime("%Y%m%d_%H%M")).replace('-', '')  # %S
                        with open(os.path.join(funcH.getVariableByComputerName("n2d_experiments"), ds + '_' + ml + '_error_' + exp_date_str + '.txt'), 'w') as f:
                            f.write("\n".join(debug_string_out))
コード例 #14
0
def get_args(argv):
    global debug_string_out
    parser = argparse.ArgumentParser(
        description='(Not Too) Deep',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--dataset', default='mnist', )
    parser.add_argument('--ae_weights', default=None)
    parser.add_argument('--experiments_folder_base', default=funcH.getVariableByComputerName("n2d_experiments"))
    parser.add_argument("--mode", default='client')
    parser.add_argument("--port", default=52162)
    parser.add_argument('--gpu', default=0, )
    parser.add_argument('--n_clusters', default=10, type=int)
    parser.add_argument('--batch_size', default=256, type=int)
    parser.add_argument('--pretrain_epochs', default=1000, type=int)
    parser.add_argument('--umap_dim', default=2, type=int)
    parser.add_argument('--umap_neighbors', default=10, type=int)
    parser.add_argument('--umap_min_dist', default="0.00", type=str)
    parser.add_argument('--umap_metric', default='euclidean', type=str)
    parser.add_argument('--cluster', default='GMM', type=str)
    parser.add_argument('--manifold_learner', default='UMAP', type=str)
    parser.add_argument('--visualize', default=False, type=bool)
    parser.add_argument('--rerun_last_plots', default=False, type=bool)
    args = funcH._parse_args(parser, argv, print_args=True)
    debug_string_out = funcH.print_and_add('-' * 80)

    experiment_names_and_folders = {
        "exp_date_str": str(datetime.now().strftime("%Y%m%d_")).replace('-', ''),  # %M%S,
        "exp_base_str": "_".join([args.dataset, "c" + str(args.cluster)+ str(args.n_clusters), "e" + str(args.pretrain_epochs)]),
        "folder_umap_data": os.path.join(args.experiments_folder_base, "exported_manifolds"),
        "folder_ae_weights": os.path.join(args.experiments_folder_base, "weights"),
    }
    experiment_names_and_folders["exp_extended"] = experiment_names_and_folders["exp_base_str"] + "_" + "_".join([args.manifold_learner + "ud" + str(args.umap_dim), "un" + str(args.umap_neighbors)])
    experiment_names_and_folders["folder_experiment"] = os.path.join(args.experiments_folder_base, args.dataset,
                                           experiment_names_and_folders["exp_date_str"] + experiment_names_and_folders["exp_extended"])
    experiment_names_and_folders["file_name_ae_weights_base"] = "aew_" + "_".join([args.dataset, "c" + str(args.n_clusters), "e" + str(args.pretrain_epochs)])
    experiment_names_and_folders["file_name_ae_weights_full"] = os.path.join(experiment_names_and_folders["folder_ae_weights"], experiment_names_and_folders["file_name_ae_weights_base"] + '.npy')
    experiment_names_and_folders["file_name_umap_data_base"] = "ulp" + experiment_names_and_folders["exp_extended"]
    experiment_names_and_folders["file_name_umap_data_full"] = os.path.join(experiment_names_and_folders["folder_umap_data"], experiment_names_and_folders["file_name_umap_data_base"] + '.npy')
    experiment_names_and_folders["file_name_arguments_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'args_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt')
    experiment_names_and_folders["file_name_ae_params_text_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'args_autoencode_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt')
    experiment_names_and_folders["file_name_plot_fig_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'plot_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '_<plot_id>.png')
    experiment_names_and_folders["file_name_plot_csv_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'csv_' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.csv')
    experiment_names_and_folders["file_name_clusters_after_manifold_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'clusters_after_manifold-' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt')
    experiment_names_and_folders["file_name_clusters_before_manifold_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'clusters_before_manifold-' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt')
    experiment_names_and_folders["file_name_debug_string_out_full"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'debug_string_out-' + experiment_names_and_folders["exp_extended"] + "_" + experiment_names_and_folders["exp_date_str"] + '.txt')
    experiment_names_and_folders["file_name_result_csv_file_full"] = os.path.join(args.experiments_folder_base, 'results.csv')
    experiment_names_and_folders["file_name_data_before_manifold"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'data_' + experiment_names_and_folders["exp_extended"] + '_before.npz')
    experiment_names_and_folders["file_name_data_after_manifold"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'data_' + experiment_names_and_folders["exp_extended"] + '_after.npz')
    experiment_names_and_folders["file_name_cluster_obj"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'cluster_obj_' + experiment_names_and_folders["exp_extended"] + '_<bef_aft>.dictionary')
    experiment_names_and_folders["file_name_silhouette_results"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'silhouette_results_' + experiment_names_and_folders["exp_extended"] + '_<bef_aft>.npy')
    experiment_names_and_folders["file_name_results"] = os.path.join(experiment_names_and_folders["folder_experiment"], 'results_' + experiment_names_and_folders["exp_extended"] + '.dictionary')

    args.experiment_names_and_folders = experiment_names_and_folders

    # 4 folders folder_{experiment, umap_data, ae_weights}
    funcH.createDirIfNotExist(experiment_names_and_folders["folder_experiment"])
    funcH.createDirIfNotExist(experiment_names_and_folders["folder_umap_data"])
    funcH.createDirIfNotExist(experiment_names_and_folders["folder_ae_weights"])

    with open(experiment_names_and_folders["file_name_arguments_full"], 'w') as f:
        f.write("\n".join(argv))
    return args
コード例 #15
0
        ae = modelLoader.modelLoad_MNIST()
        # compile it using adam optimizer
        ae.compile(optimizer="adam", loss="mse")
        #Train it by providing training images
        ae.fit(train_images, train_images, epochs=2)
        modelLoader.saveModel(ae, "model_tex")
    else:
        ae = modelLoader.loadModel("model_tex")

    prediction = ae.predict(train_images[0:199,:,:,:], verbose=1, batch_size=100)
    x =prediction[0].reshape(28,28)
    plt.imshow(x)
    plt.show()
else:
    exp_name = 'cnnAE'
    results_dir = funcH.getVariableByComputerName('results_dir')
    outdir = os.path.join(results_dir, 'results', exp_name)

    csv_name = os.path.join(results_dir, 'epochs') + os.sep + exp_name + '.csv'
    model_name = os.path.join(results_dir, 'models') + os.sep + exp_name + '.h5'

    funcH.createDirIfNotExist(os.path.join(results_dir, 'epochs'))
    funcH.createDirIfNotExist(os.path.join(results_dir, 'models'))
    funcH.createDirIfNotExist(outdir)

    checkpointer = ModelCheckpoint(filepath=model_name, verbose=0, save_best_only=False, period=1)
    csv_logger = CSVLogger(csv_name, append=True, separator=';')
    #ES = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=50, verbose=0, mode='auto')
    #callbacks = [csv_logger, ES, checkpointer]

    feat_set, labels_all, detailedLabels_all = dataLoader.loadData_nnVidImages('/home/dg/DataPath/bdData')
コード例 #16
0
def loadData_hog(base_dir = funcH.getVariableByComputerName('base_dir'), data_dir = funcH.getVariableByComputerName('data_dir'),
                 loadHogIfExist=True, numOfSigns=11):

    videosFolderName = 'neuralNetHandVideos_' + str(numOfSigns)
    base_dir_train_feat = os.path.join(base_dir, videosFolderName)

    hogFeatsFileName = getFileName(dataToUse='hog', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Data')
    hogFeatsFileNameFull = os.path.join(data_dir, hogFeatsFileName)
    labelsFileNameFull = os.path.join(data_dir, getFileName(dataToUse='hog', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Labels'))
    detailedLabelsFileNameFull = os.path.join(data_dir, getFileName(dataToUse='hog', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='DetailedLabels'))

    if loadHogIfExist and os.path.isfile(hogFeatsFileNameFull) and os.path.isfile(labelsFileNameFull) and os.path.isfile(detailedLabelsFileNameFull):
        print('loading exported feat_set from(', hogFeatsFileNameFull, ')')
        feat_set = np.load(hogFeatsFileNameFull)
        labels_all = np.load(labelsFileNameFull)
        detailedLabels_all = np.load(detailedLabelsFileNameFull)
        print('loaded exported feat_set(', feat_set.shape, ') from(', hogFeatsFileName, ')')
    else:
        detailedLabels_all = np.array([0, 0, 0, 0])
        labels_all = np.array([0, 0, 0, 0])
        feat_set = np.array([0, 0, 0, 0])
        foldernames = np.sort(os.listdir(base_dir_train_feat))
        signID = 0
        frameCount = 0
        for f in foldernames:
            sign_folder = os.path.join(base_dir_train_feat, str(f).format(':02d'))
            if not os.path.isdir(sign_folder):
                continue
            signID = signID + 1
            videoID = 0
            videos = np.sort(os.listdir(sign_folder))
            print(f)
            print('going to create hog from sign folder(', sign_folder, ')')
            for v in videos:
                video_folder = os.path.join(sign_folder, v)
                if not os.path.isdir(video_folder):
                    continue
                videoID = videoID + 1
                print('going to create hog from video folder(', video_folder, ')')
                frames = os.listdir(video_folder)
                feat_set_video = np.array([0, 0, 0, 0])

                olderFileName_v01 = video_folder + os.sep + 'hog_set.npz'
                olderFileName_v02  = video_folder + os.sep + 'hog_set_41.npz'
                hogFeats_curVideo_FileNameFull = video_folder + os.sep + hogFeatsFileName.replace('.npy', '.npz')
                if os.path.isfile(olderFileName_v01):
                    os.rename(olderFileName_v01, hogFeats_curVideo_FileNameFull)
                elif os.path.isfile(olderFileName_v02):
                    os.rename(olderFileName_v02, hogFeats_curVideo_FileNameFull)

                feats_labels_loaded = False
                if os.path.isfile(hogFeats_curVideo_FileNameFull):
                    npzfile = np.load(hogFeats_curVideo_FileNameFull)
                    feat_set_video = npzfile['feat_set_video']
                    labels = npzfile['labels']
                    feats_labels_loaded = True

                if feats_labels_loaded:
                    frameList = video_folder + os.sep + '*.png'
                    pngCount = len(glob.glob(frameList))
                    feats_labels_loaded = pngCount==len(labels)

                if not feats_labels_loaded:
                    for frame in sorted(frames):
                        if frame.endswith('.png'):
                            frame_name = os.path.join(video_folder, frame)
                            img = data.load(frame_name)
                            feat_current = hog(img, pixels_per_cell=(32, 32), cells_per_block=(4, 4))
                            if np.all(feat_set_video == 0):
                                feat_set_video = feat_current
                            else:
                                feat_set_video = np.vstack((feat_set_video, feat_current))
                        elif frame.endswith('_labels.txt'):
                            labels = np.loadtxt(os.path.join(video_folder, frame))
                    np.savez(hogFeats_curVideo_FileNameFull, feat_set_video=feat_set_video, labels=labels)

                fr = frameCount
                to = frameCount + len(labels)
                frCnt = to - fr
                frameIDs = np.asarray(range(fr, to)).reshape(frCnt, -1)
                detailedLabels_video = np.hstack((signID * np.ones([frCnt, 1]), videoID * np.ones([frCnt, 1]), frameIDs, np.asarray(labels).reshape(frCnt, -1)))

                if np.all(feat_set == 0):
                    feat_set = feat_set_video
                else:
                    feat_set = np.vstack((feat_set, feat_set_video))

                if np.all(labels_all == 0):
                    labels_all = labels
                else:
                    labels_all = np.hstack((labels_all, labels))

                if np.all(detailedLabels_all == 0):
                    detailedLabels_all = detailedLabels_video
                else:
                    detailedLabels_all = np.vstack((detailedLabels_all, detailedLabels_video))
                frameCount = len(labels_all)
        print('saving exported feat_set(', feat_set.shape, ') into(', hogFeatsFileNameFull, ')')

        np.save(hogFeatsFileNameFull, feat_set)
        np.save(labelsFileNameFull, labels_all)
        np.save(detailedLabelsFileNameFull, detailedLabels_all)

    return feat_set, labels_all, detailedLabels_all
コード例 #17
0
import helperFuncs as funcH
import dataLoaderFuncs as funcD

import time

## extra imports to set GPU options
################################### # TensorFlow wizardry 
config = tf.ConfigProto()
# Don't pre-allocate memory; allocate as-needed 
config.gpu_options.allow_growth = True  
# Only allow a total of half the GPU memory to be allocated 
config.gpu_options.per_process_gpu_memory_fraction = 0.5
# Create a session with the above options specified. 
K.tensorflow_backend.set_session(tf.Session(config=config))

base_dir = funcH.getVariableByComputerName('base_dir')
data_dir = funcH.getVariableByComputerName('data_dir')
results_dir = funcH.getVariableByComputerName('results_dir')

posterior_dim = int(argv[1])# K number of clusters
weight_of_regularizer = float(argv[2]) #sparsity parametresi (a trade-off between reconstruction vs clustering)
trainMode = str(argv[3]) #trainMode = {'sae','cosae','corsa'}
corr_randMode = bool(int(argv[4]))
dataToUse = argv[5] #dataToUse = {'hog','resnet18','sn256'}

if trainMode=="sae":
    applyCorr = 0.0
elif trainMode == "cosae":
    applyCorr = 2.0
elif trainMode=="corsa":
    applyCorr = 0.0
コード例 #18
0
def main(argv):
    np.set_printoptions(formatter={"float_kind": lambda x: "%g" % x})

    params_dict = parseArgs(argv)
    numOfSigns = params_dict["numOfSigns"]  # 11 or 41
    clusterModel = params_dict["clusterModel"]  # 'KMeans', 'GMM_diag', 'Spectral'
    params_dict["hostName"] = socket.gethostname()
    initialLabelVec, expNameEnd = decode_initial_label_param(params_dict["initialLabel"])
    clusterLabelUpdateInterval = params_dict["clusterLabelUpdateInterval"]

    print('you are running this train function on = <', params_dict["hostName"], '>')

    input_initial_resize, input_size, batch_size, num_workers = initSomeVals(params_dict)
    train_data_transform, valid_data_transform = getTransformFuncs(input_size, input_initial_resize)

    base_dir = funcH.getVariableByComputerName('base_dir')  # dataPath and dataFolder
    data_dir = funcH.getVariableByComputerName('data_dir')  # bdData
    results_dir = funcH.getVariableByComputerName('results_dir').replace("bdResults", "dcResults")
    labelsDir = funcH.getVariableByComputerName('results_dir').replace("bdResults", "dcLabels")
    modelsDir = os.path.join(base_dir, 'dcModels')
    nnVidsDir = os.path.join(base_dir, 'neuralNetHandVideos_' + str(numOfSigns))

    expName = params_dict["modelName"] + '_' + \
              params_dict["clusterModel"] + \
              '_pd' + str(params_dict["posterior_dim"]) + \
              '_clui' + str(params_dict["clusterLabelUpdateInterval"]) + \
              '_' + str(numOfSigns) + \
              expNameEnd
    labelSaveFolder = os.path.join(labelsDir, expName)
    resultMatFile = os.path.join(results_dir, 'rMF_' + expName)

    funcH.createDirIfNotExist(results_dir)
    funcH.createDirIfNotExist(labelsDir)
    funcH.createDirIfNotExist(modelsDir)
    funcH.createDirIfNotExist(labelSaveFolder)

    epochFr, epochTo = setEpochBounds(labelSaveFolder, params_dict["epochs"], params_dict["appendEpochBinary"])

    train_dataset = HandShapeDataset(root_dir=nnVidsDir, istrain=True, transform=train_data_transform, datasetname='nnv')
    val_dataset = HandShapeDataset(root_dir=nnVidsDir, istrain=False, transform=valid_data_transform, datasetname='nnv')

    num_classes = np.unique(train_dataset.labels).size

    print('trainCnt = ', len(train_dataset))
    print('valCnt = ', len(val_dataset))

    model, optimizer, updatedModelFile = getModel(params_dict, modelsDir, expName)

    num_ftrs = model.fc.in_features
    print('num_classes = ', num_classes, ', num_ftrs = ', num_ftrs, flush=True)

    epochStartTime = time.time()

    dsLoad_train_train = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    dsLoad_train_featExtract = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    model.eval()

    #  evaluate the model to extract
    #  trAccInit : to save as initial training accuracy
    #  featTrInit : features to cluster, also saved as result features in -saveFeatsExtracted-
    #  labelsTrInit :
    #  predictionsTrInit :
    trAccInit, _, featTrInit, labelsTrInit, predictionsTrInit = runValidDs(model, dsLoad_train_featExtract, return_feats=True, layerSize=num_ftrs)

    saveFeatsExtracted(data_dir, epochFr, params_dict["modelName"], expName, featTrInit, labelsTrInit, predictionsTrInit)

    labelSaveFileName = labelSaveFolder + os.sep + 'labels_{:03d}.npz'.format(epochFr)
    predClusters, resultRow = iterate_1(featTrInit, labelsTrInit, predictionsTrInit, params_dict["posterior_dim"],
                                        labelSaveFileName, epochFr-1, epochTo, trAccInit,
                                        epochStartTime, clusterModel=clusterModel, initialLabelVec=initialLabelVec)

    train_dataset = updateTrainLabels(train_dataset, clusterLabelUpdateInterval, epochFr, predClusters=predClusters, initialLabelVec=initialLabelVec)

    resultMat = []
    resultMat = resultMat + resultRow.tolist()
    if not os.path.isfile(resultMatFile):
        np.savetxt(resultMatFile, np.array(resultRow).reshape(1, -1), fmt='%4.3f', delimiter='*', newline=os.linesep,
               header='ep * tr_acc_epoch * nmi_lab * nmi_lab_nz * acc_lab * acc_lab_nz * nmi_pred * nmi_pred_nz * acc_pred * acc_pred_nz',
               footer='', comments='', encoding=None)
    else:
        f = open(resultMatFile, 'a')
        np.savetxt(f, np.array(resultRow).reshape(1, -1), fmt='%4.3f', delimiter='*', newline=os.linesep, header='', footer='', comments='', encoding=None)
        f.close()


    for ep in range(epochFr, epochTo):
        model.train()  # Set model to training mode
        epochStartTime = time.time()
        _, _ = runTrainDs(model, optimizer, dsLoad_train_train)

        model.eval()
        tr_acc_epoch, _, features_avgPool, labels_avgPool, predictionsTr = \
            runValidDs(model, dsLoad_train_featExtract, return_feats=True, layerSize=num_ftrs)

        labelSaveFileName = labelSaveFolder + os.sep + 'labels_{:03d}.npz'.format(ep+1)
        predClusters, resultRow = iterate_1(features_avgPool, labelsTrInit, predictionsTr,
                                            params_dict["posterior_dim"], labelSaveFileName, ep, epochTo, tr_acc_epoch,
                                            epochStartTime, clusterModel=clusterModel, initialLabelVec=initialLabelVec)
        resultMat = resultMat + resultRow.tolist()

        train_dataset = updateTrainLabels(train_dataset, clusterLabelUpdateInterval, ep+1, predClusters=predClusters)

        saveFeatsExtracted(data_dir, ep, params_dict["modelName"], expName, features_avgPool, labelsTrInit, predictionsTr)
        saveToResultMatFile(resultMatFile, resultRow)
        torch.save(model, f=updatedModelFile)
コード例 #19
0
def loadPCAData(dataToUse, normMode, numOfSigns, data_dim, skipLoadOfOriginalData, data_dir, base_dir = funcH.getVariableByComputerName('base_dir')):
    # normMode = str(modelParams["normMode"])
    pcaFeatsFileName = getFileName(dataToUse=dataToUse, normMode=normMode, pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='PCA')
    hogFeatsFileName = getFileName(dataToUse=dataToUse, normMode=normMode, pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Data')
    if not skipLoadOfOriginalData:
        feat_set, _, _ = loadData_hog(base_dir=base_dir, data_dir=data_dir, loadHogIfExist=True, numOfSigns=numOfSigns)
    else: #load the data
        feat_set = loadFileIfExist(data_dir, hogFeatsFileName)

    feat_set_pca = loadFileIfExist(data_dir, pcaFeatsFileName)
    if feat_set_pca.size == 0:
        feat_set_pca = applyPCA2Data(feat_set, data_dir, data_dim, dataToUse=dataToUse, numOfSigns=numOfSigns, loadIfExist=True)
    return feat_set_pca
コード例 #20
0
def loadSkeletonDataFromVideosFolders(base_dir=funcH.getVariableByComputerName('base_dir'),
                                      data_dir=funcH.getVariableByComputerName('data_dir'),
                                      loadIfExist=True, numOfSigns=11):

    videosFolderName = 'neuralNetHandVideos_' + str(numOfSigns)
    base_dir_train_feat = os.path.join(base_dir, videosFolderName)

    featsFileNameFull, labelsFileNameFull, detailedLabelsFileNameFull, _ = getFileNameGroup(dataToUse='skeleton', normMode='', pcaCount=-1, numOfSigns=numOfSigns, expectedFileType='Data', data_dir=data_dir)
    # 'skeletonFeats_41.npy' or 'skeletonFeats_11.npy'

    if loadIfExist and os.path.isfile(featsFileNameFull) and os.path.isfile(labelsFileNameFull) and os.path.isfile(detailedLabelsFileNameFull):
        print('loading exported feat_set from(', featsFileNameFull, ')')
        feat_set = np.load(featsFileNameFull)
        labels_all = np.load(labelsFileNameFull)
        detailedLabels_all = np.load(detailedLabelsFileNameFull)
        print('loaded exported feat_set(', feat_set.shape, ') from(', featsFileNameFull, ')')
    else:
        detailedLabels_all = np.array([0, 0, 0, 0])
        labels_all = np.array([0, 0, 0, 0])
        feat_set = np.array([0, 0, 0, 0])
        foldernames = np.sort(os.listdir(base_dir_train_feat))
        signID = 0
        frameCount = 0
        for f in foldernames:
            sign_folder = os.path.join(base_dir_train_feat, str(f).format(':02d'))
            if not os.path.isdir(sign_folder):
                continue
            signID = signID + 1
            videoID = 0
            videos = np.sort(os.listdir(sign_folder))
            print(f)
            print('going to create hog from sign folder(', sign_folder, ')')
            for v in videos:
                video_folder = os.path.join(sign_folder, v)
                if not os.path.isdir(video_folder):
                    continue
                videoID = videoID + 1
                print('going to create hog from video folder(', video_folder, ')')
                frames = os.listdir(video_folder)
                feat_set_video = np.array([0, 0, 0, 0])

                skelFeat_file = [os.path.join(video_folder, f) for f in os.listdir(video_folder) if f.endswith('skel.txt')]
                featsMat = pd.read_csv(skelFeat_file[0], header=None)
                frameCntSkel = featsMat.shape[0]

                labels_file = [os.path.join(video_folder, f) for f in os.listdir(video_folder) if f.endswith('labels.txt')]
                labels = np.loadtxt(os.path.join(video_folder, labels_file[0]))
                frameCntLabel = len(labels)

                frameList = video_folder + os.sep + '*.png'
                pngCount = len(glob.glob(frameList))

                assert (pngCount==frameCntLabel & pngCount==frameCntSkel), \
                    "these three values must be same frameCntSkel(" + str(frameCntSkel) + ")" + \
                                                    "frameCntLabel(" + str(frameCntLabel) + ")" + \
                                                    "pngCount(" + str(pngCount) + ")"


                fr = frameCount
                to = frameCount + frameCntLabel
                frCnt = to - fr
                frameIDs = np.asarray(range(fr, to)).reshape(frCnt, -1)
                detailedLabels_video = np.hstack((signID * np.ones([frCnt, 1]), videoID * np.ones([frCnt, 1]), frameIDs, np.asarray(labels).reshape(frCnt, -1)))

                if np.all(feat_set == 0):
                    feat_set = featsMat
                else:
                    feat_set = np.vstack((feat_set, featsMat))

                if np.all(labels_all == 0):
                    labels_all = labels
                else:
                    labels_all = np.hstack((labels_all, labels))

                if np.all(detailedLabels_all == 0):
                    detailedLabels_all = detailedLabels_video
                else:
                    detailedLabels_all = np.vstack((detailedLabels_all, detailedLabels_video))
                frameCount = len(labels_all)

        print('saving exported feat_set(', feat_set.shape, ') into(', featsFileNameFull, ')')
        np.save(featsFileNameFull, feat_set)
        print('saving labels(', labels_all.shape, ') into(', labelsFileNameFull, ')')
        np.save(labelsFileNameFull, labels_all)
        print('saving detailedLabels(', detailedLabels_all.shape, ') into(', detailedLabelsFileNameFull, ')')
        np.save(detailedLabelsFileNameFull, detailedLabels_all)

    return feat_set, labels_all, detailedLabels_all
コード例 #21
0
def checkCreateData2Use(sign_count,
                        dataToUse,
                        recreate=False,
                        recreate_hog=False):
    base_dir = funcH.getVariableByComputerName('base_dir')
    data_dir = funcH.getVariableByComputerName('data_dir')
    nnfolderBase = os.path.join(base_dir,
                                'neuralNetHandVideos_' + str(sign_count))

    print('dataToUse:', dataToUse)  # dataToUse: sn
    print('signCnt:', sign_count)  # signCnt: 41
    print('nnfolderBase:', nnfolderBase
          )  # nnfolderBase: / home / dg / DataPath / neuralNetHandVideos_41
    print('exist(nnfolderBase):',
          os.path.isdir(nnfolderBase))  # exist(nnfolderBase): False
    if dataToUse == 'sn':
        featureStr = 'surfImArr_all'
        labelStr = 'labelVecs_all'
        possible_fname_init = ['surfImArr', 'snFeats']
        dimArray = [256, 512, 1024]
        convertMat2NPY = True
    elif dataToUse == 'skeleton':
        featureStr = 'skel_all'
        labelStr = 'labelVecs_all'
        possible_fname_init = ['skeleton', 'skelFeats']
        dimArray = [32, 64, 96]
        convertMat2NPY = True
    elif dataToUse == 'hog':
        dimArray = [256, 512, 1024]
        convertMat2NPY = False
    elif dataToUse == 'hgsnsk' or dataToUse == 'hgsn' or dataToUse == 'hgsk' or dataToUse == 'snsk':
        dimArray = [256, 512, 1024]
        convertMat2NPY = False
    else:
        os.exit(5)

    if convertMat2NPY:
        _ = funcHP.convert_Mat2NPY(dataToUse,
                                   data_dir,
                                   sign_count,
                                   featureStr,
                                   labelStr,
                                   possible_fname_init,
                                   recreate=recreate)
    elif dataToUse == 'hog':
        _ = funcD.loadData_hog(loadHogIfExist=not recreate_hog,
                               numOfSigns=sign_count)
    # /home/dg/DataPath/bdData/snFeats_41.mat is loaded:)
    # ['__globals__', '__header__', '__version__', 'knownKHSlist', 'labelVecs_all', 'surfImArr_all']
    # saving labels((104472,)) at: / home / dg / DataPath / bdData / labels_41.npy
    # saving snFeats((104472, 1600)) at: / home / dg / DataPath / bdData / snFeats_41.npy

    for normMode in ['']:
        _ = funcHP.createPCAOfData(data_dir,
                                   dataToUse,
                                   sign_count,
                                   recreate=recreate,
                                   normMode=normMode)
    # loaded sn_feats((104472, 1600)) from: / home / dg / DataPath / bdData / snPCA_41.npy
    # Max of featsPCA = 0.003667559914686907, Min of featsPCA = -0.0028185132292039457

    for normMode in ['']:
        funcHP.createPCADimsOfData(data_dir,
                                   dataToUse,
                                   sign_count,
                                   dimArray,
                                   recreate=recreate,
                                   normMode=normMode)
    # loaded  sn Feats( (104472, 1600) ) from :  /home/dg/DataPath/bdData/snPCA_41.npy
    # Max of featsPCA =  0.003667559914686907 , Min of featsPCA =  -0.0028185132292039457
    # features.shape: (104472, 256)
    # saving pca sn features at :  /home/dg/DataPath/bdData/sn256Feats_41.npy
    # features.shape: (104472, 512)
    # saving pca sn features at :  /home/dg/DataPath/bdData/sn512Feats_41.npy
    # features.shape: (104472, 1024)
    # saving pca sn features at :  /home/dg/DataPath/bdData/sn1024Feats_41.npy

    for normMode in ['']:
        for dims in dimArray:
            funcD.getCorrespondentFrames(base_dir=base_dir,
                                         data_dir=data_dir,
                                         featType=dataToUse,
                                         numOfSigns=sign_count,
                                         pcaCount=dims,
                                         expectedFileType='Data',
                                         normMode=normMode)
    funcD.getCorrespondentFrames(base_dir=base_dir,
                                 data_dir=data_dir,
                                 featType=dataToUse,
                                 numOfSigns=sign_count,
                                 pcaCount=-1,
                                 expectedFileType='Data',
                                 normMode='')