Example #1
0
def unpack_data():
    source = ['mercedes', '5577', '2020', 'Aug']
    training_data = import_all_training(source,shuffle=True)
    print("Quantity of training data:", len(training_data))

    classifier_outputs = len(numpy.unique(numpy.array(training_data, dtype=object)[:,0]))

    X = []
    y = []
    for sample in training_data:
        # For each sample in the training dataset the inputs become a normalised array of the original data
        inputs = sample[1]
        inputs = (inputs/ 255 * 0.99) + 0.01
        inputs = inputs.flatten()
        #inputs = (numpy.asfarray(sample[1:])/ 255 * 0.99) + 0.01
        X.append(inputs)

        # An array of target values is set up based on the neuron sample type specified at the start of each line
        targets = numpy.zeros(classifier_outputs) + 0.01
        targets[int(sample[0])] = 0.99    
        y.append(int(sample[0]))
        pass
    X = numpy.array(X)
    y = numpy.array(y)
    return X,y
Example #2
0
def run_test_harness():
    # load dataset
    source = ["mcdonald", "5577", "2018", "Jul"]
    train_test_split = 1000
    img_sz = 128
    trainingdata = import_all_training(source, shuffle=True, img_size=img_sz)
    if train_test_split > len(trainingdata):
        train_test_split = len(trainingdata) - 2
    classifier_outputs = len(
        numpy.unique(numpy.array(trainingdata, dtype=object)[:, 0]))
    trainX, trainY, testX, testY = load_evaluation_data(
        trainingdata, train_test_split, classifier_outputs)

    #### Test Model
    scores, histories, times = kfold_evaluate_model(trainX,
                                                    trainY,
                                                    img_sz,
                                                    classifier_outputs,
                                                    n_folds=5)
    #scores, histories = evaluate_model(trainX, trainY, testX, testY, img_sz, num_outputs)
    # learning curves
    summarize_accuracy(histories)
    # summarize estimated performance
    #summarize_performance(scores)
    with open("CNN_time_log.csv", "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(times)

    with open("CNN_acc_log.csv", "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(scores)
    return
Example #3
0
def run_prediction_model(source):
    # Import the training data for the CNN to learn from
    num_outputs = 7
    train_test_split = 500
    img_sz = 32
    trainingdata = import_all_training(source, shuffle=True, img_size=img_sz)
    if train_test_split > len(trainingdata):
        train_test_split = len(trainingdata) - 2
    trainX, trainY, testX, testY = load_evaluation_data(
        trainingdata, train_test_split, num_outputs)

    # ~~~ Make Classification Predictions ~~~
    unlabelled_data = import_prediction(source)
    predictX = load_unlabelled_data(unlabelled_data)
    predictions = prediction_model(trainX, trainY, predictX, img_sz,
                                   num_outputs)
    labels = simplify_predictions(predictions, unlabelled_data)
    organise_data(labels, source)

    return
Example #4
0
def run_hierarch(num_clusters, samples, testmode):
    if testmode == True:
        data_source = ["mcdonald", "5577", "2018", "Jul"]
        data = import_all_training(data_source)
    else:
        data = import_all(samples)

    time_start = time.time()
    hierarach_obj = Hierarchical(num_clusters)
    hierarach_obj.load_evaluation_data(data)
    labels = hierarach_obj.cluster()
    timetest = time.time() - time_start
    if testmode == True:
        with open("hierarchical_labels.csv", "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerows(labels)
    else:
        #hierarach_obj.move_to_new_dir(labels)
        pass
    return timetest
Example #5
0
def run_kmeans(outputs, samples, testmode):
    if testmode == True:
        data_source = ["mcdonald", "5577", "2018", "Jul"]
        data = import_all_training(data_source)
    else:
        data = import_all(samples)
    time_start = time.time()
    kmeans_ob = Kmeans(outputs)
    kmeans_ob.load_evaluation_data(data)
    labels = kmeans_ob.cluster()
    timetest = time.time() - time_start
    #kmeans_ob.elbow_method(10)

    if testmode == True:
        with open("k-means_labels.csv", "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerows(labels)
    else:
        kmeans_ob.move_to_new_dir(labels)
    return timetest
Example #6
0
def run_CNN(num_of_outputs,
            train_test_split,
            dropout_rate=0.5,
            num_of_filters=[64, 128],
            num_of_dense_units=128,
            act_func='relu'):
    DR = dropout_rate
    NF = num_of_filters
    ND = num_of_dense_units
    AF = act_func
    NO = num_of_outputs
    img_sze = 64

    # Import all training data and complete a sanity check on the split value.
    training_dataset = import_all_training("True", img_size=img_sze)
    if train_test_split > len(training_dataset):
        raise Exception("Split must be smaller than dataset size!")

    newCNN = CNN(DR, NF, ND, AF, NO)
    newCNN.load_evaluation_data(training_dataset, train_test_split)
    score = newCNN.CNN_2D_evaluate(img_sze)
    accuracy = score[1] * 100.0
    test_loss = score[0]
    return accuracy, test_loss
Example #7
0
    hierarach_obj = Hierarchical(num_clusters)
    hierarach_obj.load_evaluation_data(data)
    labels = hierarach_obj.cluster()
    timetest = time.time() - time_start
    if testmode == True:
        with open("hierarchical_labels.csv", "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerows(labels)
    else:
        #hierarach_obj.move_to_new_dir(labels)
        pass
    return timetest


data_source = ["mcdonald", "5577", "2018", "Jul"]
training_data = import_all_training(data_source, shuffle=True)
print("Quantity of training data:", len(training_data))
train_test_split = 350

classifier_outputs = len(
    numpy.unique(numpy.array(training_data, dtype=object)[:, 0]))

# -------------- MLP TEST AREA  ---------------------------

# mlp_outputs = classifier_outputs
# hidden_nodes = [20,40,60,80,100,120,140]

# acc_list = []
# for i in range(3*len(hidden_nodes)-1):
#     hid_node = hidden_nodes[round(i/3)]
#     f1score = run_MLP(training_data, mlp_outputs, hidden_nodes=hid_node)
Example #8
0
    return


def _PCA_3D(x_data, y_data):

    time_start = time.time()
    pca = PCA(n_components=3)
    pca_result = pca.fit_transform(x_subset)
    print('PCA done! Time elapsed: {} seconds'.format(time.time() -
                                                      time_start))

    pca_df = pd.DataFrame(columns=['pca1', 'pca2', 'pca3'])
    pca_df['pca1'] = pca_result[:, 0]
    pca_df['pca2'] = pca_result[:, 1]
    pca_df['pca3'] = pca_result[:, 2]

    top_three_comp = pca_df[['pca1', 'pca2', 'pca3']]
    scatter_3D(top_three_comp.values, y_data)  # Visualizing the PCA output

    return


data_source = ["mcdonald", "5577", "2018", "Jul"]
training_dataset = import_all_training(data_source)
x_subset, y_subset = convert_data_format(training_dataset)

#_tSNE(x_subset, y_subset)
_PCA(x_subset, y_subset)
#_PCA_3D(x_subset, y_subset)