Python extract_data 예제들, data.extract_data Python 예제들

예제 #1

0

파일 보기

def main():
    """Simple main program to test the methods of the file.
     
    """
    feature_x, feature_y, classifier = "", "", ""
    data_training, data_validation = data.extract_data("diabetes.csv")
    while classifier == "" or classifier != "svc" and classifier != "knn":
        classifier = input(
            "ENTER CLASSIFIER\nSVC or KNN?\n(Enter 'q' to exit)\n>").lower()
        if classifier == "q": exit(0)
        if classifier != "svc" and classifier != "knn":
            print("\n\nInput is not a valid classifier, try again..")
    feature_string = "ENTER {} FEATURE\npregnant\nglucose\npressure\ntriceps\ninsulin\nmass\npedigree\nage\n(Enter 'q' to exit)\n>"
    while feature_x == "" or feature_x not in list(data_training):
        feature_x = input("\n" + feature_string.format("FIRST")).lower()
        if feature_x == "q": exit(0)
        if feature_x not in list(data_training):
            print("\n\nInput is not a valid feature, try again..")
    while feature_y == "" or feature_y not in list(data_training):
        feature_y = input("\n" + feature_string.replace(
            feature_x + "\n", "").format("SECOND")).lower()
        if feature_y == "q": exit(0)
        if feature_y not in list(data_training):
            print("\n\nInput is not a valid feature, try again..")
    plot_diabetes(
        data_training, data_validation,
        fitting.fit(data_training, data_validation, classifier, feature_x,
                    feature_y), feature_x, feature_y).show()

예제 #2

0

파일 보기

파일: train.py 프로젝트: AntreasAntoniou/DeepClassificationBot

def extract_data(size=256):
    print("Extracting data..")
    X, y = data.extract_data(size=256)

    print("Preprocessing data..")
    X, y, nb_samples, num_categories = data.preprocess_data(X, y, save=True, subtract_mean=True)

    return X, y, nb_samples, num_categories

예제 #3

0

파일 보기

def extract_data(size=256):
    print("Extracting data..")
    X, y = data.extract_data(size=256)

    print("Preprocessing data..")
    X, y, nb_samples, num_categories = data.preprocess_data(X, y, save=True, subtract_mean=True)

    return X, y, nb_samples, num_categories

예제 #4

0

파일 보기

파일: dataset.py 프로젝트: chris4540/DD2434_Advanced_ML

    def __init__(self, labels=('earn', 'acq', 'crude', 'corn')):
        print('Prepare data')
        self._train_set_tot, self._test_set_tot = data.extract_data()
        self._train_set_tot = [_item[:10] for _item in self._train_set_tot]
        self._labels = labels
        self._nb_labels = len(labels)
        self._nb_train = (152, 114, 76, 38)
        self._nb_test = (40, 25, 15, 10)

        self.train_set = []
        self.test_set = []
        self.train_labels = []
        self.test_labels = []
        self._sample_data()

예제 #5

0

파일 보기

파일: train.py 프로젝트: AntreasAntoniou/DeepClassificationBot

def run(epochs=500, training_percentage=0.4, validation_percentage=0.1, extract=True, cont=True, size=256, top_k=5):
    '''Does the routine required to get the data, put them in needed format and start training the model
       saves weights whenever the model produces a better test result and keeps track of the best loss'''
    if extract:
        print("Extracting data..")
        X, y = data.extract_data(size=size)

        print("Preprocessing data..")
        X, y, nb_samples, num_categories = data.preprocess_data(X, y, save=True, subtract_mean=True)

    else:
        print("Loading data..")
        h5f = h5py.File('data.hdf5', 'r')
        nb_samples = h5f['nb_samples'].value
        num_categories = h5f['n_categories'].value
        h5f.close()

    print("Number of categories: {}".format(num_categories))
    print("Number of samples {}".format(nb_samples))

    data_ids = np.arange(start=0, stop=nb_samples)
    val_ids = data.produce_validation_indices(data_ids, nb_samples * validation_percentage)
    train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=nb_samples * training_percentage,
                                           val_indx=val_ids)
    # X_train, y_train, X_test, y_test = data.split_data(X, y, split_ratio=split)
    X_train, y_train, X_val, y_val = data.load_dataset_bit_from_hdf5(train_ids, val_ids, only_train=False)
    X_val = X_val / 255

    print("Building and Compiling model..")
    model = m.get_model(n_outputs=num_categories, input_size=size)

    if cont:
        # model.load_weights_until_layer("pre_trained_weights/latest_model_weights.hdf5", 26)
        model.load_weights("pre_trained_weights/latest_model_weights.hdf5")
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])

    print("Training..")

    best_performance = np.inf
    for i in range(epochs):
        train_ids = data.produce_train_indices(dataset_indx=data_ids, number_of_samples=15000, val_indx=val_ids)

        X_train, y_train = data.load_dataset_bit_from_hdf5(train_ids, val_ids, only_train=True)

        X_train = X_train / 255
        X_train = data.augment_data(X_train)

        # fit the model on the batches generated by datagen.flow()
        metadata = model.fit(X_train, y_train, validation_data=[X_val, y_val], batch_size=64,
                             nb_epoch=1, verbose=1, shuffle=True, class_weight=None,
                             sample_weight=None)
        current_loss = metadata.history['loss'][-1]
        current_val_loss = metadata.history['val_loss'][-1]
        preds = model.predict_proba(X_val, batch_size=64)
        print("Loss: {}".format(current_loss))
        print("Val_loss: {}".format(current_val_loss))

        top_3_error = get_top_n_error(preds, y_val, top_k)
        print("Top 3 error: {}".format(top_3_error))
        if current_val_loss < best_performance:
            model.save_weights("pre_trained_weights/model_weights.hdf5", overwrite=True)
            best_performance = current_val_loss
            print("Saving weights..")
        model.save_weights("pre_trained_weights/latest_model_weights.hdf5", overwrite=True)

예제 #6

0

파일 보기

from sklearn import model_selection
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn import svm
import pickle
import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dir',
        type=str,
        default='.',
        help='directory to read vehicle/non-vehicle image files from')
    FLAGS, unparsed = parser.parse_known_args()

    X_train, y_train, X_test, y_test, scaler = extract_data(FLAGS.dir)

    svc = svm.SVC()

    t = time.time()
    svc.fit(X_train, y_train)
    t2 = time.time()
    print('Training took {} seconds and produced an accuracy of {}'.format(
        t2 - t, round(svc.score(X_test, y_test), 3)))

    with open('clf.pkl', 'wb') as fid:
        pickle.dump(svc, fid)
    with open('scaler.pkl', 'wb') as fid:
        pickle.dump(scaler, fid)

예제 #7

0

파일 보기

파일: web_visualization.py 프로젝트: kristiangyene/IN3110

import os
import visualize
import fitting
import data

from flask import Flask, render_template, request, url_for
import matplotlib

matplotlib.use('Agg')

classifiers = ['knn', 'svc']
features = [
    "pregnant", "glucose", "pressure", "triceps", "insulin", "mass",
    "pedigree", "age"
]
data_training, data_validation = data.extract_data("diabetes.csv")

app = Flask(__name__)
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0


@app.route("/")
def root():
    return render_template('frontpage.html')


@app.route("/plot")
def plot(error=False):
    plt = visualize.plot_diabetes(
        data_training, data_validation,
        fitting.fit(data_training, data_validation, classifiers[0],

예제 #8

0

파일 보기

def classify():
    sess = tf.Session()
    path = "./trainingFeatures/bal_train/"
    filenames = [path + f for f in listdir(path)]
    features, labels = data.extract_data(filenames)
    features = np.array(features)
    total = 0
    i = 0
    sumTab = np.zeros((3985))
    colors = np.zeros((3985))
    indiceTab = np.zeros((3985))
    finalFeatures = []
    finalLabels = []
    for f in features:
        for label in labels[i]:
            if label == 0:
                colors[i] = 1
        sumTab[i] = sum(sum(f))
        indiceTab[i] = i
        i += 1

    for i in range(len(features)):
        temp = []
        if len(features[i]) >= 10:
            for j in range(10):
                for value in features[i][j]:
                    temp.append(value)
            finalFeatures.append(np.array(temp))
            finalLabels.append(labels[i])

    finalFeatures = np.array(finalFeatures)
    finalLabels = np.array(finalLabels)
    #plt.scatter(indiceTab, sumTab, c=colors)
    #plt.show()

    X = finalFeatures
    outputs = np.zeros((len(finalLabels), 2))
    i = 0
    for labels in finalLabels:
        for label in labels:
            if label == 0:
                outputs[i][0] = 1
                break
            else:
                outputs[i][1] = 1
        i += 1
    print(outputs)
    Y = outputs
    net = tflearn.input_data(shape=[None, 1280])
    net = tflearn.fully_connected(net, 64)
    net = tflearn.fully_connected(net, 64)
    net = tflearn.fully_connected(net, 2, activation='softmax')
    net = tflearn.regression(net)
    model = tflearn.DNN(net, tensorboard_verbose=3)
    model.fit(X,
              Y,
              n_epoch=10,
              validation_set=0.3,
              show_metric=True,
              batch_size=16)

    path = "./trainingFeatures/eval/"
    filenames = [path + f for f in listdir(path)]
    features, labels = data.extract_data(filenames)
    finalFeatures = []
    finalLabels = []
    for i in range(len(features)):
        temp = []
        if len(features[i]) >= 10:
            for j in range(10):
                for value in features[i][j]:
                    temp.append(value)
            finalFeatures.append(np.array(temp))
            finalLabels.append(labels[i])
    i = 0
    for labels in finalLabels:
        for label in labels:
            if label == 0:
                outputs[i][0] = 1
            else:
                outputs[i][1] = 1
        i += 1
    predict = model.predict(finalFeatures)
    for i in range(len(predict)):
        if predict[i][0] > predict[i][1]:
            predict[i][0] = 1
            predict[i][1] = 0
        else:
            predict[i][1] = 0
            predict[i][0] = 1
    totalTrue = 0
    total = 0
    for i in range(len(predict)):
        if (outputs[i][0] == predict[i][0]):
            totalTrue += 1
        total += 1
    print(totalTrue / total)

예제 #9

0

파일 보기

def run(epochs=500,
        training_percentage=0.4,
        validation_percentage=0.1,
        extract=True,
        cont=True,
        size=256,
        top_k=5):
    '''Does the routine required to get the data, put them in needed format and start training the model
       saves weights whenever the model produces a better test result and keeps track of the best loss'''
    if extract:
        print("Extracting data..")
        X, y = data.extract_data(size=size)

        print("Preprocessing data..")
        X, y, nb_samples, num_categories = data.preprocess_data(
            X, y, save=True, subtract_mean=True)

    else:
        print("Loading data..")
        h5f = h5py.File('data.hdf5', 'r')
        nb_samples = h5f['nb_samples'].value
        num_categories = h5f['n_categories'].value
        h5f.close()

    print("Number of categories: {}".format(num_categories))
    print("Number of samples {}".format(nb_samples))

    data_ids = np.arange(start=0, stop=nb_samples)
    val_ids = data.produce_validation_indices(
        data_ids, nb_samples * validation_percentage)
    train_ids = data.produce_train_indices(dataset_indx=data_ids,
                                           number_of_samples=nb_samples *
                                           training_percentage,
                                           val_indx=val_ids)
    # X_train, y_train, X_test, y_test = data.split_data(X, y, split_ratio=split)
    X_train, y_train, X_val, y_val = data.load_dataset_bit_from_hdf5(
        train_ids, val_ids, only_train=False)
    X_val = X_val / 255

    print("Building and Compiling model..")
    model = m.get_model(n_outputs=num_categories, input_size=size)

    if cont:
        # model.load_weights_until_layer("pre_trained_weights/latest_model_weights.hdf5", 26)
        model.load_weights("pre_trained_weights/latest_model_weights.hdf5")
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=["accuracy"])

    print("Training..")

    best_performance = np.inf
    for i in range(epochs):
        train_ids = data.produce_train_indices(dataset_indx=data_ids,
                                               number_of_samples=15000,
                                               val_indx=val_ids)

        X_train, y_train = data.load_dataset_bit_from_hdf5(train_ids,
                                                           val_ids,
                                                           only_train=True)

        X_train = X_train / 255
        X_train = data.augment_data(X_train)

        # fit the model on the batches generated by datagen.flow()
        metadata = model.fit(X_train,
                             y_train,
                             validation_data=[X_val, y_val],
                             batch_size=64,
                             nb_epoch=1,
                             verbose=1,
                             shuffle=True,
                             class_weight=None,
                             sample_weight=None)
        current_loss = metadata.history['loss'][-1]
        current_val_loss = metadata.history['val_loss'][-1]
        preds = model.predict_proba(X_val, batch_size=64)
        print("Loss: {}".format(current_loss))
        print("Val_loss: {}".format(current_val_loss))

        top_3_error = get_top_n_error(preds, y_val, top_k)
        print("Top 3 error: {}".format(top_3_error))
        if current_val_loss < best_performance:
            model.save_weights("pre_trained_weights/model_weights.hdf5",
                               overwrite=True)
            best_performance = current_val_loss
            print("Saving weights..")
        model.save_weights("pre_trained_weights/latest_model_weights.hdf5",
                           overwrite=True)

예제 #10

0

파일 보기

파일: test.py 프로젝트: jfrattarola/CarND-Vehicle-Detection

    parser = argparse.ArgumentParser()
    parser.add_argument('--car', type=str, default='image0004.png',
                        help='car image')
    parser.add_argument('--non', type=str, default='image0001.png',
                        help='non-car image')
    FLAGS, unparsed = parser.parse_known_args()

#    features_car = extract_features([FLAGS.car])
#    features_noncar = extract_features([FLAGS.non])

#    features_car_scaled = X_scaler.transform(features_car)
#    features_noncar_scaled = X_scaler.transform(features_noncar)

#    prediction = clf.predict(features_car_scaled)
#    if prediction == 1:
#        print('Correct prediction of Car')
#    else: print('Incorrect prediction of Car')
#    prediction = clf.predict(features_noncar_scaled)
#    if prediction == 0:
#        print('Correct prediction of Non-Car')
#    else: print('Incorrect prediction of Non-Car')

    X_train, y_train, X_test, y_test = extract_data()
    scaled_X_test = X_scaler.transform(X_test)
    predictions = clf.predict(scaled_X_test)
    print('Accuracy on Test Set: {:.2f}%'.format(accuracy_score(y_test, predictions)))

    print("\nDetailed classification report:")
    print(classification_report(y_test, predictions))