def prepare_data_holdout_given_split(path_samples):

    # ------ Fetch samples
    samples_train = fetch_samples(os.path.join(path_samples, 'train'))
    samples_test = fetch_samples(os.path.join(path_samples, 'test'))
    #samples_test = fetch_samples(os.path.join(path_samples, 'validation'))

    # ------ Create feature vector from already splitted dataset
    X_train, X_test, Y_train, Y_test, class_names, fvector_labels = create_fvector_train_test(
        samples_train, samples_test)

    return X_train, X_test, Y_train, Y_test, class_names, fvector_labels, samples_train, samples_test
def prepare_data_gridCrossvalidation(path_samples):

    # ------ Fetch samples
    samples_train = fetch_samples(os.path.join(path_samples, 'train'))
    samples_test = fetch_samples(os.path.join(path_samples, 'test'))

    test_fold = []
    for sample in samples_train:
        test_fold.append(sample['fold'])

    # ------ Create feature vector
    X_train, X_test, Y_train, Y_test, class_names, fvector_labels = create_fvector_train_test(
        samples_train, samples_test)

    folds = PredefinedSplit(test_fold)

    return X_train, X_test, Y_train, Y_test, class_names, fvector_labels, folds, samples_train, samples_test
def prepare_data_holdout_random_split(path_samples):

    # ------ Fetch samples
    samples = fetch_samples(path_samples)

    # ------ Create feature vector
    X, Y, class_names, fvector_labels, filenames = create_fvector(samples)

    # ------ Split Dataset
    X_train, X_test, Y_train, Y_test = split_dataset(X, Y)

    return X_train, X_test, Y_train, Y_test, class_names, fvector_labels, samples
def prepare_data_crossvalidation_given_split(path_samples):

    # ------ Fetch samples
    samples = fetch_samples(path_samples)

    test_fold = []
    for sample in samples:
        test_fold.append(sample['fold'])

    # ------ Create feature vector
    X, Y, class_names, fvector_labels = create_fvector(samples)

    folds = PredefinedSplit(test_fold)

    return X, Y, class_names, fvector_labels, folds, samples
def prepare_data_crossvalidation(path_samples):

    # ------ Fetch samples
    samples = fetch_samples(path_samples)

    # ------ Create feature vector
    X, Y, class_names, fvector_labels, filenames = create_fvector(samples)

    print("X")
    print(X)
    print("X len")
    print(len(X))
    print("X len elem")
    print(len(X[0]))
    print("Y")
    print(Y)
    #print("FVECTOR LABELS")
    #print(fvector_labels)
    #print("SAMPLES")
    #print(samples)

    print("SAMPLES 0")
    print(samples[0])
    print("SAMPLES TYPE")
    print(type(samples))

    global ids
    global ids_and_authors_dict

    x = 0
    while x < len(samples):
        ids_and_authors_dict[samples[x]['id']] = samples[x]['class']
        x = x + 1
    x = 0
    while x < len(samples):
        ids.append(samples[x]['id'])
        x = x + 1

    parentsamples_and_functionsamples = {}
    for sample in samples:
        function_sample = sample['filename']
        function_sample_split = function_sample.split(".")[0]
        parentsample2 = ""
        parentsample = function_sample_split.split("_")[:-1]
        print(parentsample)
        for elem in parentsample:
            parentsample2 = parentsample2 + elem
            parentsample2 = parentsample2 + "_"
        parentsample2 = parentsample2[:-1]
        print(parentsample2)
        parentsamples_and_functionsamples[parentsample2] = []

    for sample in samples:
        function_sample = sample['filename']
        function_sample_split = function_sample.split(".")[0]
        parentsample2 = ""
        parentsample = function_sample_split.split("_")[:-1]
        #print(parentsample)
        for elem in parentsample:
            parentsample2 = parentsample2 + elem
            parentsample2 = parentsample2 + "_"
        parentsample2 = parentsample2[:-1]
        #print(parentsample2)
        parentsamples_and_functionsamples[parentsample2].append(
            function_sample)

    print(parentsamples_and_functionsamples)
    print(filenames)

    return X, Y, class_names, fvector_labels, samples, parentsamples_and_functionsamples, filenames