def loadDNNonly(modeldir=os.getcwd()+os.sep+"DNN"+os.sep):
    tf.logging.set_verbosity(tf.logging.ERROR)
    X, Ytrain = getXYDNN("refined_turning_data.csv")
    classifier = skflow.DNNClassifier(
        feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X),
        hidden_units=[128, 128], n_classes=3, model_dir=modeldir)
    return classifier
Ejemplo n.º 2
0
def train_and_eval(train_steps, log_dir, training_set, validation_set, testing_set, ):
    sparse_columns = [
        layers.sparse_column_with_keys(attribute, training_set[attribute].unique()) for attribute in FEATURE_ATTRIBUTES
    ]
    embedding_columns = [
        layers.embedding_column(column, dimension=8) for column in sparse_columns
    ]
    m = learn.DNNClassifier(
        hidden_units=[10, 50, ],
        feature_columns=embedding_columns,
        model_dir=log_dir,
        config=learn.RunConfig(save_checkpoints_secs=1, ),
    )
    validation_metrics = {
        "accuracy": learn.MetricSpec(metric_fn=metrics.streaming_accuracy, prediction_key="classes"),
        "precision": learn.MetricSpec(metric_fn=metrics.streaming_precision, prediction_key="classes"),
        "recall": learn.MetricSpec(metric_fn=metrics.streaming_recall, prediction_key="classes"),
    }
    monitors = [
        learn.monitors.ValidationMonitor(
            input_fn=lambda: input_fn(validation_set),
            every_n_steps=1000,
            metrics=validation_metrics,
            early_stopping_rounds=1,
        ),
    ]
    m.fit(
        input_fn=lambda: input_fn(training_set),
        steps=train_steps,
        monitors=monitors,
    )
    results = m.evaluate(input_fn=lambda: input_fn(testing_set), steps=1)
    for key in sorted(results):
        print("%s: %s" % (key, results[key]))
Ejemplo n.º 3
0
def main(unused_argv):
    # Load dataset.
    iris = learn.datasets.load_dataset('iris')
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        iris.data, iris.target, test_size=0.2, random_state=42)

    # Note that we are saving and load iris data as h5 format as a simple
    # demonstration here.
    h5f = h5py.File('/tmp/test_hdf5.h5', 'w')
    h5f.create_dataset('X_train', data=x_train)
    h5f.create_dataset('X_test', data=x_test)
    h5f.create_dataset('y_train', data=y_train)
    h5f.create_dataset('y_test', data=y_test)
    h5f.close()

    h5f = h5py.File('/tmp/test_hdf5.h5', 'r')
    x_train = np.array(h5f['X_train'])
    x_test = np.array(h5f['X_test'])
    y_train = np.array(h5f['y_train'])
    y_test = np.array(h5f['y_test'])

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    feature_columns = learn.infer_real_valued_columns_from_input(x_train)
    classifier = learn.DNNClassifier(feature_columns=feature_columns,
                                     hidden_units=[10, 20, 10],
                                     n_classes=3)

    # Fit and predict.
    classifier.fit(x_train, y_train, steps=200)
    score = metrics.accuracy_score(y_test, classifier.predict(x_test))
    print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 4
0
def part3():
    global boston, x_data, y_data
    import sys
    import numpy as np
    from tensorflow.examples.tutorials.mnist import input_data
    DATA_DIR = 'c:\\tmp\\data'
    data = input_data.read_data_sets(DATA_DIR, one_hot=False)
    x_data, y_data = data.train.images, data.train.labels.astype(np.int32)
    x_test, y_test = data.test.images, data.test.labels.astype(np.int32)

    NUM_STEPS = 2000
    MINIBATCH_SIZE = 128

    feature_columns = learn.infer_real_valued_columns_from_input(x_data)

    dnn = learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=[200],
        n_classes=10,
        optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.2))

    dnn.fit(x=x_data, y=y_data, steps=NUM_STEPS, batch_size=MINIBATCH_SIZE)

    test_acc = dnn.evaluate(x=x_test, y=y_test, steps=1)["accuracy"]
    print(f"test accuracy {test_acc}")

    from sklearn.metrics import confusion_matrix

    y_pred = dnn.predict(x=x_test, as_iterable=False)
    class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    cnf_matrix = confusion_matrix(y_test, y_pred)
    print(cnf_matrix)
Ejemplo n.º 5
0
def analyze_model(test_inters, testtype, model):
    path_to_load = c.PATH_TO_RESULTS + "ByIntersection" + os.sep
    load_folder = path_to_load + testtype + os.sep
    save_folder = load_folder + "TestOn" + ",".join(
        [str(i) for i in test_inters]) + os.sep
    Ypred = None
    if "LSTM" in model:
        Xtrain, Ytrain = du.getFeaturesLSTM(
            load_folder, testtype,
            list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters)))
        #Xtest, Ytest = du.getFeaturesLSTM(load_folder, testtype, test_inters)
        means, stddevs = du.normalize_get_params(Xtrain)
        Xtrain = du.normalize(Xtrain, means, stddevs)
        numFeatures = Xtrain.shape[2]
        Xtest, Ytest = createAnalysisTestData(numFeatures,
                                              traj_len=Xtrain.shape[1])
        #train the LSTM again
        Ypred, timeFit, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM(
            (Xtrain, Ytrain), (Xtest, Ytest),
            model=model,
            save_path="ignore.out")
    else:
        Xtrain, Ytrain = du.getFeaturesnonLSTM(
            load_folder, testtype,
            list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters)))
        #Xtest, Ytest = du.getFeaturesnonLSTM(load_folder, testtype, test_inters)
        means, stddevs = du.normalize_get_params(Xtrain)
        Xtrain = du.normalize(Xtrain, means, stddevs)
        numFeatures = Xtrain.shape[1]
        Xtest, _ = createAnalysisTestData(numFeatures, traj_len=1)
        classifier = skflow.DNNClassifier(
            feature_columns=tf.contrib.learn.
            infer_real_valued_columns_from_input(Xtrain),
            hidden_units=[128, 128],
            n_classes=3)  #, model_dir=save_folder)
        #try:
        #    Ypred = classifier.predict_proba(Xtest)
        #except:
        print("Could not load saved model, re-training :(.")
        Ytrain = [int(i - 1) for i in Ytrain]
        start = time.clock()
        max_epochs = 10
        if max_epochs:
            start2 = time.clock()
            for epoch in range(max_epochs):
                classifier.fit(Xtrain, Ytrain, steps=1000)
                end2 = time.clock()
                print("Epoch", epoch, "Done. Took:", end2 - start2)
                start2 = end2
        else:
            classifier.fit(Xtrain, Ytrain)  #, logdir=log_path)
        Ypred = classifier.predict_proba(Xtest)
        end = time.clock()
        timeFit = end - start
    print("Done fitting, time spent:", timeFit)

    np.savetxt(save_folder + "analysis_Ypred_" + model, np.array(Ypred))
    print(model, "analysis predictions saved, test", testtype, save_folder,
          "analysis_Ypred_", model)
    return Ypred
Ejemplo n.º 6
0
def validateDNN(test_folder, Xtrain, Ytrain, Xtest, Ytest):
    scaler = preprocessing.StandardScaler(copy=False).fit(Xtrain)
    scaler.transform(Xtrain)
    scaler.transform(Xtest)
    classifier = skflow.DNNClassifier(
        feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(Xtrain),
        hidden_units=[128, 128], n_classes=3)

    
    Ytrain = [i-1 for i in Ytrain]
    Ytest = [i-1 for i in Ytest]
    num_batches = 10
    batch_size = int(len(Ytrain) / num_batches)
    XValid = Xtest[0:batch_size]
    YValid = Ytest[0:batch_size]
    '''for i in range(num_batches): # was testing out, works faster, not necessarily better
    #    classifier.partial_fit(Xtrain[i*batch_size:(i+1)*batch_size], 
    #                                  Ytrain[i*batch_size:(i+1)*batch_size])
    #    validation_score = metrics.accuracy_score(YValid, classifier.predict(XValid))
    #    print("Validation accuracy: %f" % validation_score)
        classifier.partial_fit(Xtrain, Ytrain, batch_size = batch_size)
        #classifier.fit(Xtrain, Ytrain)
        predictions = classifier.predict(Xtest)
        score = metrics.accuracy_score(Ytest, predictions)
        print("Validation accuracy: %f" % score)
    '''
    start = time.clock()
    classifier.fit(Xtrain, Ytrain)
    end = time.clock()
    print(end - start)
    predictions = classifier.predict(Xtest)
    probs = classifier.predict_proba(Xtest)
    np.savetxt(test_folder + "DNN_validation_p_dist", probs)
    score = metrics.accuracy_score(Ytest, predictions)
    print("Accuracy: %f" % score)
Ejemplo n.º 7
0
def main(unused_argv):
    # Load dataset.
    iris = learn.datasets.load_dataset('iris')
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        iris.data, iris.target, test_size=0.2, random_state=42)

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    feature_columns = learn.infer_real_valued_columns_from_input(x_train)
    classifier = learn.DNNClassifier(feature_columns=feature_columns,
                                     hidden_units=[10, 20, 10],
                                     n_classes=3)

    # Fit and predict.
    classifier.fit(x_train, y_train, steps=200)
    predictions = list(classifier.predict(x_test, as_iterable=True))
    score = metrics.accuracy_score(y_test, predictions)

    y = list(classifier.predict_proba(x_test, as_iterable=True))

    result = []
    index = 0
    for prob in y:
        temp = []
        temp.append(prob[1])
        temp.append(y_test[index])
        index = index + 1
        result.append(temp)

    print(result)
    print(score)
 def dnnclassifier():
     tf.logging.set_verbosity(tf.logging.INFO)
     traindata = pd.read_csv("./classifier.trainset.5fold" +
                             str(n) + ".csv")
     y_train = traindata['txoutcome']
     X_train = traindata[list(range(2, len(traindata.columns)))]
     testdata = pd.read_csv("./classifier.testset.5fold" + str(n) +
                            ".csv")
     y_test = testdata['txoutcome']
     X_test = testdata[list(range(2, len(traindata.columns)))]
     feature_columns = learn.infer_real_valued_columns_from_input(
         X_train)
     dnn_classifier = learn.DNNClassifier(
         hidden_units=[20, 40, 20],
         n_classes=5,
         feature_columns=feature_columns)
     dnn_classifier.fit(X_train, y_train, steps=100000)
     dnn_prediction = dnn_classifier.predict(X_test)
     print('DNN Prediction Score: {0}'.format(
         accuracy_score(dnn_prediction, y_test)))
     print(len(dnn_prediction))
     print(len(y_test))
     print(dnn_prediction[4])
     print(y_test[4])
     # save the predicted value for the next step of C-index calculation by R
     fout = open(
         "./dnn_classifier.txoutcome.5fold" + str(5 * k + n) +
         ".txt", "w")
     for j in range(len(dnn_prediction)):
         fout.write(
             str(y_test[j]) + '\t' + str(dnn_prediction[j]) + '\n')
Ejemplo n.º 9
0
def main(unused_argv):
    qhdatas, qh_target = loadDatas()
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        qhdatas, qh_target, test_size=0.2, random_state=42)

    # It's useful to scale to ensure Stochastic Gradient Descent
    # will do the right thing.
    scaler = StandardScaler()

    # DNN classifier.
    classifier = learn.DNNClassifier(
        feature_columns=learn.infer_real_valued_columns_from_input(x_train),
        hidden_units=[20, 10],
        n_classes=3)

    pipeline = Pipeline([('scaler', scaler), ('DNNclassifier', classifier)])

    pipeline.fit(x_train, y_train, DNNclassifier__steps=20)

    score = accuracy_score(y_test, list(pipeline.predict(x_test)))
    print('Accuracy: {0:f}'.format(score))

    # the label is :[0,0,1,2,0,2,1,1,2,0]
    original_labels = np.array([0, 0, 1, 2, 0, 2, 1, 1, 2, 0])
    new_samples = np.array(
        [[14, 5, 30], [24, 5, 30], [4, 15, 45], [4, 15, 60], [23, 5, 30],
         [14, 5, 60], [24, 5, 45], [4, 15, 45], [4, 15, 60], [23, 5, 30]],
        dtype=int)
    output = list(pipeline.predict(new_samples))

    # We get the [0,0,2,2,0,2,1,2,2,0]. The new samples accuracy is 0.6
    score = accuracy_score(original_labels, list(output))
    print('New samples accuracy: {0:f}'.format(score))
def trainDNN(Xtrain, Ytrain, model="DNN"):
    tf.logging.set_verbosity(tf.logging.ERROR)
    modeldir = os.getcwd() + os.sep + model + os.sep
    check_make_paths([modeldir])
    classifier = skflow.DNNClassifier(
        feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(Xtrain),
        hidden_units=[128, 128], n_classes=3, model_dir=modeldir)
    #print(tf.contrib.learn.infer_real_valued_columns_from_input(Xtrain))
    #return
    #classifier.evaluate(input_fn=input_fn_eval)
    #classifier.predict(x=x) # returns predicted labels (i.e. label's class index).
    Ytrain = [int(i) for i in Ytrain]
    start = time.clock()
    #classifier.fit(input_fn=lambda: input_fn(Xtrain, Ytrain))
    max_epochs = 10
    start2 = time.clock()
    for epoch in range(max_epochs):
        classifier.fit(input_fn=lambda: input_fn(Xtrain, Ytrain),steps=1000)
        loss = testDNN(Xtrain, classifier=classifier, Y=Ytrain)
        end2 = time.clock()
        print("Epoch",epoch,"Done. Took:", end2-start2, "loss of:", loss)
        start2 = end2
    end = time.clock()
    timeFit = end - start
    print("Done fitting, time spent:", timeFit)
    print("Done saving the model")
    testDNN(Xtrain, classifier=classifier, Y=Ytrain)
Ejemplo n.º 11
0
def main(unused_argv):
    iris = datasets.load_iris()
    x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.2,
                                                        random_state=42)

    x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                      y_train,
                                                      test_size=0.2,
                                                      random_state=42)
    val_monitor = learn.monitors.ValidationMonitor(x_val,
                                                   y_val,
                                                   early_stopping_rounds=200)

    model_dir = '/tmp/iris_model'
    clean_folder(model_dir)

    # classifier with early stopping on training data
    classifier1 = learn.DNNClassifier(
        feature_columns=learn.infer_real_valued_columns_from_input(x_train),
        hidden_units=[10, 20, 10],
        n_classes=3,
        model_dir=model_dir)
    classifier1.fit(x=x_train, y=y_train, steps=2000)
    predictions1 = list(classifier1.predict(x_test, as_iterable=True))
    score1 = metrics.accuracy_score(y_test, predictions1)

    model_dir = '/tmp/iris_model_val'
    clean_folder(model_dir)

    # classifier with early stopping on validation data, save frequently for
    # monitor to pick up new checkpoints.
    classifier2 = learn.DNNClassifier(
        feature_columns=learn.infer_real_valued_columns_from_input(x_train),
        hidden_units=[10, 20, 10],
        n_classes=3,
        model_dir=model_dir,
        config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
    classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor])
    predictions2 = list(classifier2.predict(x_test, as_iterable=True))
    score2 = metrics.accuracy_score(y_test, predictions2)

    # In many applications, the score is improved by using early stopping
    print('score1: ', score1)
    print('score2: ', score2)
    print('score2 > score1: ', score2 > score1)
def getBeliefDNN(X, classifier=None):
    if not classifier:
        modeldir = os.getcwd() + os.sep + "DNN" + os.sep
        classifier = skflow.DNNClassifier(
            feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X),
            hidden_units=[128, 128], n_classes=3, model_dir=modeldir)
    probs = classifier.predict_proba(X)
    probs_list = [i for i in probs]
    return probs_list
def trainDNNTF(A, Cl, A_test, Cl_test, Root):
    import tensorflow as tf
    import tensorflow.contrib.learn as skflow
    from sklearn import preprocessing

    model_directory = Root + "/DNN-TF_"
    print("\n  Training model saved in: ", model_directory, "\n")

    #**********************************************
    ''' Initialize Estimator and training data '''
    #**********************************************
    print(' Initializing TensorFlow...')
    tf.reset_default_graph()

    totA = np.vstack((A, A_test))
    totCl = np.append(Cl, Cl_test)
    numTotClasses = np.unique(totCl).size

    le = preprocessing.LabelEncoder()
    totCl2 = le.fit_transform(totCl)
    Cl2 = le.transform(Cl)
    Cl2_test = le.transform(Cl_test)

    feature_columns = skflow.infer_real_valued_columns_from_input(
        totA.astype(np.float32))
    ''' tf.estimator version '''
    #clf = tf.estimator.DNNClassifier(feature_columns=[totA], hidden_units=[20],
    #                           optimizer="Adagrad", n_classes=numTotClasses,
    #                           activation_fn="tanh", model_dir=model_directory)
    ''' tf.contrib.learn version '''
    clf = skflow.DNNClassifier(feature_columns=feature_columns,
                               hidden_units=[20],
                               optimizer="Adagrad",
                               n_classes=numTotClasses,
                               activation_fn="tanh",
                               model_dir=model_directory)

    #**********************************************
    ''' Train '''
    #**********************************************
    ''' tf.estimator version '''
    #clf.train(input_fn=lambda: input_fn(A, Cl2), steps=2000)
    ''' tf.contrib.learn version '''
    clf.fit(input_fn=lambda: input_fn(A, Cl2), steps=100)

    accuracy_score = clf.evaluate(input_fn=lambda: input_fn(A_test, Cl2_test),
                                  steps=1)
    print('\n  ================================')
    print('  \033[1mDNN-TF\033[0m - Accuracy')
    print('  ================================')
    print("\n  Accuracy: {:.2f}%".format(100 * accuracy_score["accuracy"]))
    print("  Loss: {:.2f}".format(accuracy_score["loss"]))
    print("  Global step: {:.2f}\n".format(accuracy_score["global_step"]))
    print('  ================================\n')

    return clf, le
Ejemplo n.º 14
0
def predict_batch(features):
    print("predicting dataset")    
    test_input = np.float32(np.reshape(f_nodes[0][0:context],(1,feature_size)))    
    feature_columns = learn.infer_real_valued_columns_from_input(test_input)
    classifier = learn.DNNClassifier( 
            feature_columns=feature_columns
            , hidden_units=[1024, 1024, 1024], n_classes=2
            , optimizer = tf.train.AdamOptimizer(1E-4)
            , model_dir= MODEL_DIR)    
    return classifier.predict(x=features, as_iterable = True)
Ejemplo n.º 15
0
def main(unused_in):
    #load dataset
    iris = learn.datasets.load_dataset('CERN')
    x_train, y_train, x_test, y_test = cross_validation.train_test_split(
        CERN.data, CERN.target, test_size=.2, random_state=42)
    #build 3 layer DNN with 10,20,10 hidden layers
    clasifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
    #fit and predict
    clasifier.fit(x_train, y_train, steps=200)
    score = metrics.accuracy_score(y_test, clasifier.predict(x_test))
    print("accuracy:{0:f}".format(score))
def DNNgetAccuracy(X, Y_for_score):
    modeldir = os.getcwd() + os.sep + "DNN" + os.sep
    check_make_paths([modeldir])
    classifier = skflow.DNNClassifier(
        feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X),
        hidden_units=[128, 128], n_classes=3, model_dir=modeldir)
    probs = classifier.predict_proba(X)
    probs_list = [i for i in probs]
    print(probs_list[0])
    numWrong, n = countWrongLinear(probs_list, Y_for_score)
    print(numWrong, "wrong /", n, "== accuracy of:", 1-(float(numWrong) / n))
    return (1-float(numWrong) / n)
Ejemplo n.º 17
0
def run5():
    # IRIS_TRAINING = "iris_training.csv"
    # IRIS_TEST = "iris_test.csv"

    iris = learn.datasets.load_iris()
    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

    feature_columns = [tf.contrib.layers.real_valued_column('', dimension=4)]
    clf = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3, feature_columns=feature_columns)

    clf.fit(X_train, y_train, steps=200)
    score = accuracy_score(y_test, clf.predict(X_test))
    print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 18
0
def main(unused_argv):
    #Load dataset
    iris = learn.datasets.load_dataset('iris')
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        iris.data, iris.target, test_size=0.2, random_state=42)

    #Build 3 layer DNN with 10, 20, 10 units respectively
    classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3)

    #Fit and predict
    classifier.fit(x_train, y_train, steps=200)
    score = metrics.accuracy_score(y_test, classifier.predict(x_test))
    print('Accuracy: {0:}'.format(score))
Ejemplo n.º 19
0
    def __init__(self, X, Y, tune_parameters=False):
        super(TensorFlowNeuralNetwork, self).__init__(X, Y, tune_parameters=False)
        self.X = X #.todense()  # TensorFlow/Skflow doesn't support sparse matrices
        # convert string labels into numerical labels
        self.Y = pd.factorize(Y)[0]
        output_layer = len(np.unique(Y))
        if tune_parameters:
            self.param_dist_random = {'learning_rate': random.random(100),
                                      'optimizer': ['Adam'],
                                      'hidden_units': [sp_randint(50, 500), sp_randint(50, 500)]}

        feature_columns = [tf.contrib.layers.real_valued_column("", dimension=self.X.shape[1])]
        self.clf = sklearn.DNNClassifier(hidden_units=self.hidden_units, feature_columns=feature_columns,
                                          n_classes=output_layer, optimizer='Adam', model_dir="log/dnn/")
def build_estimator(model_dir, model_type):
    """build an estimator"""

    # base sparse feature process
    gender = layers.sparse_column_with_keys(column_name='gender', keys=['female', 'male'])
    education = layers.sparse_column_with_hash_bucket(column_name='education', hash_bucket_size=1000)
    relationship = layers.sparse_column_with_hash_bucket(column_name='relationship', hash_bucket_size=100)
    workclass = layers.sparse_column_with_hash_bucket(column_name='workclass', hash_bucket_size=100)
    occupation = layers.sparse_column_with_hash_bucket(column_name='occupation', hash_bucket_size=1000)
    native_country = layers.sparse_column_with_hash_bucket(column_name='native_country', hash_bucket_size=1000)

    # base continuous feature
    age = layers.real_valued_column(column_name='age')
    education_num = layers.real_valued_column(column_name='education_num')
    capital_gain = layers.real_valued_column(column_name='capital_gain')
    capital_loss = layers.real_valued_column(column_name='capital_loss')
    hours_per_week = layers.real_valued_column(column_name='hours_per_week')

    # transformation.bucketization 将连续变量转化为类别标签。从而提高我们的准确性
    age_bucket = layers.bucketized_column(source_column=age,
                                          boundaries=[18, 25, 30, 35, 40, 45,50, 55, 60, 65])

    # wide columns and deep columns
    # 深度模型使用到的特征和广度模型使用到的特征
    # 广度模型特征只只用到了分类标签
    wide_columns = [gender, native_country, education, relationship, workclass, occupation, age_bucket,
                    layers.crossed_column(columns=[education, occupation], hash_bucket_size=int(1e4)),
                    layers.crossed_column(columns=[age_bucket, education, occupation], hash_bucket_size=int(1e6)),
                    layers.crossed_column(columns=[native_country, occupation], hash_bucket_size=int(1e4))]

    deep_columns = [layers.embedding_column(workclass, dimension=8),
                    layers.embedding_column(education, dimension=8),
                    layers.embedding_column(gender, dimension=8),
                    layers.embedding_column(relationship, dimension=8),
                    layers.embedding_column(native_country, dimension=8),
                    layers.embedding_column(occupation, dimension=8),
                    age, education_num, capital_gain, capital_loss, hours_per_week]

    if model_type == "wide":
        m=learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
    elif model_type == "deep":
        m=learn.DNNClassifier(feature_columns=deep_columns, model_dir=model_dir, hidden_units=[100, 50])
    else:
        m=learn.DNNLinearCombinedClassifier(model_dir=model_dir,
                                            linear_feature_columns=wide_columns,
                                            dnn_feature_columns=deep_columns,
                                            dnn_hidden_units=[256, 128, 64],
                                            dnn_activation_fn=tf.nn.relu)
    return m
Ejemplo n.º 21
0
def main(unused_argv):
    iris = datasets.load_iris()
    x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.2,
                                                        random_state=42)

    x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                      y_train,
                                                      test_size=0.2,
                                                      random_state=42)
    val_monitor = learn.monitors.ValidationMonitor(x_val,
                                                   y_val,
                                                   early_stopping_rounds=200)

    # classifier with early stopping on training data
    classifier1 = learn.DNNClassifier(hidden_units=[10, 20, 10],
                                      n_classes=3,
                                      model_dir='/tmp/iris_model/')
    classifier1.fit(x=x_train, y=y_train, steps=2000)
    score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test))

    # classifier with early stopping on validation data, save frequently for
    # monitor to pick up new checkpoints.
    classifier2 = learn.DNNClassifier(
        hidden_units=[10, 20, 10],
        n_classes=3,
        model_dir='/tmp/iris_model_val/',
        config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
    classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor])
    score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test))

    # In many applications, the score is improved by using early stopping
    print('score1: ', score1)
    print('score2: ', score2)
    print('score2 > score1: ', score2 > score1)
Ejemplo n.º 22
0
def contrib_learn_classifier_test():
    """Test tf.contrib.learn.DNN_classifier."""
    language_column = layers.sparse_column_with_hash_bucket(
        "language", hash_bucket_size=20)

    feature_columns = [
        layers.embedding_column(language_column, dimension=3),
        layers.real_valued_column("age", dtype=tf.int64)
    ]

    classifier = learn.DNNClassifier(
        n_classes=3,
        feature_columns=feature_columns,
        hidden_units=[100, 100],
        config=learn.RunConfig(tf_random_seed=1,
                               model_dir="../model_saver/estimators/"
                               "DNN_classifier_01"),
        # optimizer=optimizer_exp_decay
    )
    classifier.fit(input_fn=_input_fn, steps=10000)
    print("variables_names:\n", str(classifier.get_variable_names()))
    # scores = classifier.evaluate(input_fn=_input_fn,
    #                              steps=100)
    # print("scores:\n", str(scores))

    scores = classifier.evaluate(
        input_fn=_input_fn,
        steps=100,
        metrics={
            'my_accuracy':
            MetricSpec(metric_fn=metrics.streaming_accuracy,
                       prediction_key="classes"),
            'my_precision':
            MetricSpec(metric_fn=metrics.streaming_precision,
                       prediction_key="classes"),
            'my_recall':
            MetricSpec(metric_fn=metrics.streaming_recall,
                       prediction_key="classes"),
            'my_metric':
            MetricSpec(metric_fn=my_metric_op, prediction_key="classes")
        })
    print("scores:\n", str(scores))

    predictions = classifier.predict(input_fn=_input_fn,
                                     outputs=["classes", "probabilities"])
    print("predictions")
    for prediction in predictions:
        print(prediction)
def main(unused_argv):
  # Load dataset.
  iris = learn.datasets.load_dataset('iris')
  x_train, x_test, y_train, y_test = cross_validation.train_test_split(
      iris.data, iris.target, test_size=0.2, random_state=42)

  # Build 3 layer DNN with 10, 20, 10 units respectively.
  feature_columns = learn.infer_real_valued_columns_from_input(x_train)
  classifier = learn.DNNClassifier(
      feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3)

  # Fit and predict.
  classifier.fit(x_train, y_train, steps=300)
  predictions = list(classifier.predict(x_test, as_iterable=True))
  score = metrics.accuracy_score(y_test, predictions)
  print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 24
0
def main(argv):
    iris = learn.datasets.load_iris()
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        iris.data, iris.target, test_size=.2, random_state=42)

    feature_columns = [tf.contrib.layers.real_valued_column("", dimension=1)]
    classifier = learn.DNNClassifier(feature_columns=feature_columns,
                                     hidden_units=[10, 20, 10],
                                     n_classes=3)

    classifier.fit(x_train, y_train, steps=200)

    x_predit = classifier.predict_classes(x_test)
    x_predit = [x for x in x_predit]
    score = metrics.accuracy_score(y_test, x_predit)
    print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 25
0
def main():
    logging.getLogger().setLevel(logging.INFO)

    iris = learn.datasets.load_iris()

    X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

    feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)]

    # Build a neural network with 3 hidden layers: 10, 20, 10 units respectively
    classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3, feature_columns=feature_columns)

    classifier.fit(X_train, y_train, steps=2000)

    score = metrics.accuracy_score(y_test, classifier.predict(X_test))

    print("Accuracy: {0:f}".format(score))
Ejemplo n.º 26
0
def dnn_model(output_dir):
    real, sparse = get_features()
    all = {}
    all.update(real)

    # create embeddings of the sparse columns
    embed = {
       colname : create_embed(col) \
          for colname, col in sparse.items()
    }
    all.update(embed)

    estimator = tflearn.DNNClassifier(model_dir=output_dir,
                                      feature_columns=all.values(),
                                      hidden_units=[64, 16, 4])
    estimator.params["head"]._thresholds = [0.7]  # FIXME: hack
    return estimator
def testDNN(X, model="DNN", classifier=None, Y=None):
    modeldir = os.getcwd() + os.sep + model + os.sep
    if classifier == None:
      classifier = skflow.DNNClassifier(
        feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X),
        hidden_units=[128, 128], n_classes=3, model_dir=modeldir)
    print("classifier created")
    if Y != None:
        ev = classifier.evaluate(input_fn=lambda: input_fn(X, Y), steps=1)
        loss_score = ev["loss"]
        print("Loss: {0:f}".format(loss_score))
        return loss_score
    probs = (classifier.predict_proba(X))#input_fn=lambda: input_fn(X)))
    print("PROBS:", probs)
    for i in probs:
        print(i)
    return probs
Ejemplo n.º 28
0
def main(unused_argv):
    iris = load_iris()
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(
        iris.data, iris.target, test_size=0.2, random_state=42)

    # It's useful to scale to ensure Stochastic Gradient Descent
    # will do the right thing.
    scaler = StandardScaler()

    # DNN classifier
    classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3)

    pipeline = Pipeline([('scaler', scaler), ('DNNclassifier', classifier)])

    pipeline.fit(x_train, y_train, DNNclassifier__steps=200)

    score = accuracy_score(y_test, pipeline.predict(x_test))
    print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 29
0
def clasificacion():
    data =creacionDataset()
    print("soy una gueva")
    #use scikit.learn.datasets in the future
    print(len(data[0]),"gonorrea",len(data[1]))
    image_train = np.array(data[0])
    label_train = np.array(data[1])
    image_train =image_train.reshape(image_train.shape[0], image_train.shape[1] * image_train.shape[2])
    label_train = label_train.reshape(label_train.shape[0], )
    image_train, label_train = shuffle(image_train, label_train, random_state=42)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(image_train, label_train, test_size = .3, random_state = 42)
    #build 3 layer DNN with 10 20 10 units respectively
    feature_columns = [tf.contrib.layers.real_valued_column("", dimension=1)]
    classifier = learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10,20,10],n_classes=3)
    # #fit and predict
    classifier.fit(x_train, y_train, steps = 200)
    x_predict = classifier.predict_classes(x_test)
    x_predict = [x for x in x_predict ]
    score = metrics.accuracy_score(y_test, x_predict)
    print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 30
0
def get_model(filename=CLASSIFIER_FILE):
    ''' Get CNN classifier object from file or create one if none exists on file.'''
    if(filename == None):
        # Load dataset
        print(Helper.unserialize("../Datasets/raw_new_80.data"))
        train_data, train_targets, test_data, expected = get_featureset('raw')

        raw_train_data = np.zeros((train_data.shape[0], 20, 20))
        i = 0
        for item in train_data:
            raw_train_data[i] = item.reshape((20,20))
            #Display.show_image(raw_train_data[i])
            i = i+1

        raw_test_data = np.zeros((test_data.shape[0], 20, 20))
        i = 0
        for item in test_data:
            raw_test_data[i] = item.reshape((20,20))
            #Display.show_image(raw_test_data[i])
            i = i+1


        # Build Classifier
        # classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2,
        #                                         steps=500, learning_rate=0.05, batch_size=128)
        classifier = skflow.DNNClassifier(feature_engineering_fn=conv_model, n_classes=2)
        classifier.fit(raw_train_data, train_targets)

        # Assess built classifier
        predictions = classifier.predict(raw_test_data)
        accuracy = metrics.accuracy_score(expected, predictions)
        confusion_matrix = metrics.confusion_matrix(expected, predictions)
        print("Confusion matrix:\n%s" % confusion_matrix)
        print('Accuracy: %f' % accuracy)

        return classifier
    else:
        serialized_classifier = Helper.unserialize(filename)
        return serialized_classifier