Ejemplo n.º 1
0
def main():
    training_data = pd.read_csv('../data/20180105_label.csv',
                                skipinitialspace=True,
                                engine='python',
                                dtype=np.float64,
                                iterator=True,
                                )

    test_data = pd.read_csv('../data/20180107_label.csv',
                            skipinitialspace=True,
                            engine='python',
                            dtype=np.float64,
                            iterator=True,
                            )
    deep_columns = create_columns(CONTINUOUS_COLUMNS)

    model = DNNClassifier(feature_columns=deep_columns,
                      model_dir='./model',
                      hidden_units=[10, 10],
                      n_classes=2,
                      input_layer_min_slice_size=10000)

    tf.logging.set_verbosity(tf.logging.INFO)
    training_data_chunk = training_data.get_chunk(1000000000)
    model.fit(input_fn=lambda: input_fn(training_data_chunk),
          steps=100)

    tf.logging.info("end fit model")

    test_data_chunk = test_data.get_chunk(10000)

    accuracy = model.evaluate(input_fn=lambda: input_fn(test_data_chunk),
                          steps=100)['accuracy']
    print(accuracy * 100)
Ejemplo n.º 2
0
def main(_):
    mnist = input_data.read_data_sets("/tmp/data")
    X_train = mnist.train.images
    X_test = mnist.test.images
    Y_train = mnist.train.labels.astype("int")
    Y_test = mnist.test.labels.astype("int")

    config = RunConfig(tf_random_seed=42, save_checkpoints_secs=10)
    feature_cols = tf.contrib.learn.infer_real_valued_columns_from_input(
        X_train)
    validation_monitor = monitors.ValidationMonitor(x=X_test,
                                                    y=Y_test,
                                                    every_n_steps=100)
    dnn_clf = DNNClassifier(
        hidden_units=[300, 100],
        n_classes=10,
        feature_columns=feature_cols,
        config=config,
        model_dir="/home/mtb/Projects/machine_learning/tensorflow/mnist")

    dnn_clf.fit(X_train,
                Y_train,
                batch_size=50,
                steps=4000,
                monitors=[validation_monitor])
    accuracy_score = dnn_clf.evaluate(x=X_test, y=Y_test)["accuracy"]

    print(' accuracy_score:   {0} '.format(accuracy_score))
Ejemplo n.º 3
0
 def __init__(self,
              n_classes,
              type="w2v",
              hidden_units=[10, 20, 10],
              num_features=100,
              context=10,
              method=1):
     #if type=="w2v":
     #self.model = w2v_helpers.get_word2vec(num_features, context, method)
     self.type = type
     self.classifier = DNNClassifier(hidden_units=hidden_units,
                                     n_classes=n_classes)
Ejemplo n.º 4
0
    def test_dnn_classifier(self):
        """ test converting DNNClassifier model
        """
        algorithm_name = "dnn_classifier"
        model_output = os.path.join(self.data.base_path,
                                    "{}".format(algorithm_name))
        classifier = DNNClassifier(hidden_units=[4 * 3, 2 * 3],
                                   feature_columns=self._iris_dnn_features(),
                                   n_classes=3,
                                   optimizer=tf.train.AdamOptimizer,
                                   config=self.estimator_conf)
        self._generate_tf_model(classifier, model_output)
        self.assertTrue(os.path.exists(model_output))

        pmml_output = os.path.join(self.data.base_path,
                                   "{}.pmml".format(algorithm_name))
        ppmml.to_pmml(model_input=model_output,
                      pmml_output=pmml_output,
                      model_type='tensorflow')
        self.assertTrue(os.path.exists(pmml_output))

        # validate pmml file
        data_output = os.path.join(self.data.base_path,
                                   "{}.csv".format(algorithm_name))
        ppmml.predict(pmml_output, self.data.test_data_input, data_output)
        self.assertTrue(os.path.exists(data_output))
Ejemplo n.º 5
0
def main():
    iternumber = 2
    with open('MLPtrained_dead.pickle','rb') as f:
        deadcheck = pickle.load(f)
        
    if os.path.isfile('run_times_{}.npy'.format(iternumber -1)):
        print('run times exists. loading data')
        run_times = list(np.load('run_times_{}.npy '.format(iternumber-1)))
    else:
        run_times = []
        
    feature_columns = [tf.contrib.layers.real_valued_column("x", dimension=9600)]
    estimator = SKCompat(DNNClassifier(feature_columns=feature_columns,
                              hidden_units=[256,64],
                              model_dir='./model/'))

    for i in range(4,0,-1):
        print(i)
        time.sleep(1)
                    
#    saver = tf.train.import_meta_graph('canabalt nn 200 50.meta')
    while True:    
        run_data, run_time = play(estimator, deadcheck)
        run_times.append(run_time)
        np.save('run_times_{}.npy'.format(iternumber-1), run_times)
Ejemplo n.º 6
0
def DNNClassifierTrainTask(self, datasource, train_path, test_path, **kwargs):
    steps = kwargs.pop("steps", 2000)
    if datasource == 'system':  # data from system
        training_set = load_system_dataset(train_path)
        if test_path:
            test_set = load_system_dataset(test_path)
        feature_columns = [real_valued_column("", dimension=4)]
        classifier = DNNClassifier(feature_columns=feature_columns,
                                   **kwargs
                                   # hidden_units=[10, 20, 10],
                                   # n_classes=3
                                   )
        if test_path:
            classifier.fit(x=training_set.data,
                           y=training_set.target,
                           steps=steps)
            accuracy_score = classifier.evaluate(x=test_set.data,
                                                 y=test_set.target)["accuracy"]
            return accuracy_score
Ejemplo n.º 7
0
class DNN(BaseEstimator, ClassifierMixin):
    def __init__(self,
                 n_classes,
                 type="w2v",
                 hidden_units=[10, 20, 10],
                 num_features=100,
                 context=10,
                 method=1):
        #if type=="w2v":
        #self.model = w2v_helpers.get_word2vec(num_features, context, method)
        self.type = type
        self.classifier = DNNClassifier(hidden_units=hidden_units,
                                        n_classes=n_classes)

    def pre_transformX(self, df, colnames, df_test=None, n_gram=None):
        data = None
        if self.type == "w2v":
            data = features_helpers.create_sentences(df, colnames)
            data = features_helpers.transform_to_w2v_sentences(
                data, self.model)
            return data.as_matrix()
        else:
            x_train, x_test = features_helpers.transform_to_bow(
                df, df_test, colnames, n_gram)
            return x_train, x_test

    def pre_transformY(self, df, list_dict):
        y = map(lambda w: list_dict.index(w), list(df))
        return np.array(y)

    def fit(self, X, y=None):
        self.classifier.fit(x=X, y=y, steps=200)

    def predict(self, X, y=None):
        return self.classifier.predict(X)

    def evaluate(self, X, Y):
        return self.classifier.evaluate(x=X, y=Y)["accuracy"]

    def score(self, X, y, sample_weight=None):
        return super(DNN, self).score(X, y, sample_weight)
Ejemplo n.º 8
0
def get_classifier():
    # (kernel_size * kernel_size, 3)
    feature_columns = [layers.real_valued_column("", dimension=3)]
    return DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=[256, 128],
        n_classes=5,
        model_dir="saved_model",
        # optimizer=AdadeltaOptimizer(learning_rate=0.1)
        # optimizer=AdamOptimizer()
        # dropout=0.5
    )
def main():
    # If the training and test sets aren't stored locally, download them.
    if not os.path.exists(IRIS_TRAINING):
        raw = urlopen(IRIS_TRAINING_URL).read()
        with open(IRIS_TRAINING, "wb") as f:
            f.write(raw)

    if not os.path.exists(IRIS_TEST):
        raw = urlopen(IRIS_TEST_URL).read()
        with open(IRIS_TEST, "wb") as f:
            f.write(raw)

    # Load datasets.
    training_set = load_csv_with_header(filename=IRIS_TRAINING,
                                        target_dtype=np.int,
                                        features_dtype=np.float32)
    test_set = load_csv_with_header(filename=IRIS_TEST,
                                    target_dtype=np.int,
                                    features_dtype=np.float32)

    # Specify that all features have real-value data
    feature_columns = [real_valued_column("", dimension=4)]

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    classifier = DNNClassifier(feature_columns=feature_columns,
                               hidden_units=[10, 20, 10],
                               n_classes=3,
                               model_dir="/tmp/iris_model")

    # Define the training inputs
    def get_train_inputs():
        x = tf.constant(training_set.data)
        y = tf.constant(training_set.target)

        return x, y

    # Fit model.
    classifier.fit(input_fn=get_train_inputs, steps=2000)

    # Define the test inputs
    def get_test_inputs():
        x = tf.constant(test_set.data)
        y = tf.constant(test_set.target)

        return x, y

    # Evaluate accuracy.
    accuracy_score = classifier.evaluate(input_fn=get_test_inputs,
                                         steps=1)["accuracy"]

    print("\nTest Accuracy: {0:f}\n".format(accuracy_score))

    # Classify two new flower samples.
    def new_samples():
        return np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]],
                        dtype=np.float32)

    predictions = list(classifier.predict(input_fn=new_samples))

    print("New Samples, Class Predictions:    {}\n".format(predictions))
def dnn_main():
    x_train, x_test, y_train, y_test = load_SpamBase(
        "../data/spambase/spambase.data")
    feature_columns = infer_real_valued_columns_from_input(x_train)
    print(feature_columns)
    # hidden_units = [30, 10],表明具有两层隐藏层,每层节点数分别为30和10
    classifier = DNNClassifier(feature_columns=feature_columns,
                               hidden_units=[30, 10],
                               n_classes=2)
    # steps=500表明训练500个批次,batch_size=10表明每个批次有10个训练数据。
    # 一个epoch指的是使用全部数据集进行一次训练。进行训练时一个epoch可能更新了若干次参数。epoch_num为指定的epoch次数。
    # 一个step或一次iteration指的是更新一次参数,每次更新使用数据集中的batch_size个数据。
    # 注意: 使用相同的数据集,epoch也相同时,参数更新此时不一定是相同的,这时候会取决于batch_size。
    # iteration或step的总数为(数据总数 / batch_size + 1) * epoch_num
    # 每个epoch都会进行shuffle,对要输入的数据进行重新排序,分成不同的batch。

    classifier.fit(x_train, y_train, steps=500, batch_size=10)
    y_predict = list(classifier.predict(x_test, as_iterable=True))
    #y_predict = classifier.predict(x_test)
    #print y_predict
    score = metrics.accuracy_score(y_test, y_predict)
    print('Accuracy: {0:f}'.format(score))
Ejemplo n.º 11
0
def train_model(item_type):
    model_dir = "models/" + item_type.lower().replace(" ", "_")
    if os.path.exists(model_dir):
        return
    print("==> Training model for '%s'" % item_type)
    csv_filename = filename = "data/" + item_type.lower().replace(" ", "_") + ".csv"
    df_all = pd.read_csv(csv_filename, skipinitialspace=True, encoding='utf-8')
    df_all.fillna(0.0, inplace=True)

    # Convert the price to a bucket representing a range
    df_all['price_chaos'] = (df_all['price_chaos'].apply(util.price_bucket)).astype(int)

    # Hash the item type to a number
    df_all['itemType'] = (df_all['itemType'].apply(lambda x: util.type_hash[x])).astype(float)

    LABEL_COLUMN = util.LABEL_COLUMN

    # Split the data 80/20 training/test
    percent_test = 20
    n = (len(df_all) * percent_test)/100
    df_train = df_all.head(len(df_all) - n)
    df_test = df_all.tail(n)

    train_x = df_train.ix[:, df_train.columns != LABEL_COLUMN].as_matrix().astype(float)
    train_y = df_train.as_matrix([LABEL_COLUMN])
    test_x = df_test.ix[:, df_test.columns != LABEL_COLUMN].as_matrix().astype(float)
    test_y = df_test.as_matrix([LABEL_COLUMN])

    deep_columns = tf.contrib.learn.infer_real_valued_columns_from_input(train_x)
    hidden_units = util.get_hidden_units(len(df_train.columns)-1)
    model = DNNClassifier(model_dir=model_dir, feature_columns=deep_columns, hidden_units=hidden_units,
                          n_classes=len(util.bins), enable_centered_bias=True)

    steps = len(df_train)/75
    sessions = (steps/500)+2
    for i in range(sessions):
        model.fit(train_x, train_y, steps=500, batch_size=5000)
        results = model.evaluate(test_x, test_y, steps=1, batch_size=df_test.size)

    # Print some predictions from the test data
    predictions = df_test.sample(10)
    v = model.predict_proba(predictions.ix[:, df_test.columns != LABEL_COLUMN].as_matrix().astype(float), batch_size=10)

    price_map = []

    for i in v:
        # take the top 5 most likely price ranges
        top_largest = i.argsort()[-5:][::-1]
        prices = {}
        for p in top_largest:
            prices[util.get_bin_label(p)] = float(round(100*i[p], 1))
        price_map.append(prices)

    for r in price_map:
        print r
Ejemplo n.º 12
0
def main():
    training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
        filename='./iris_data/iris_training.csv',
        target_dtype=np.int,
        features_dtype=np.float32)
    test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
        filename='./iris_data/iris_test.csv',
        target_dtype=np.int,
        features_dtype=np.float32)

    feature_columns = [tf.feature_column.numeric_column("x", shape=[4])]

    clf = DNNClassifier(hidden_units=[10, 20, 10],
                        feature_columns=feature_columns,
                        model_dir='./iris_model',
                        n_classes=3)

    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": np.array(training_set.data)},
        y=np.array(training_set.target),
        num_epochs=None,
        shuffle=True)

    clf.fit(input_fn=train_input_fn, steps=2000)

    test_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": np.array(test_set.data)},
        y=np.array(test_set.target),
        num_epochs=1,
        shuffle=False)

    accuracy_score = clf.evaluate(input_fn=test_input_fn)["accuracy"]
    print("\nTest Accuracy: {0:f}\n".format(accuracy_score))

    new_samples = np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]],
                           dtype=np.float32)
    predict_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": new_samples},
                                                          num_epochs=1,
                                                          shuffle=False)
    predictions = list(clf.predict(input_fn=predict_input_fn))
    print predictions

    print("New Samples, Class Predictions:    {}\n".format(predictions))
Ejemplo n.º 13
0
def audit_serving_input_fn():
	return _serving_input_fn(audit_cont_columns, audit_cat_columns)

def build_audit(classifier, max_steps, name, with_proba = True):
	classifier.fit(input_fn = audit_input_fn, max_steps = max_steps)

	adjusted = DataFrame(classifier.predict(input_fn = audit_input_fn, as_iterable = False), columns = ["_target"])
	if(with_proba):
		adjusted_proba = DataFrame(classifier.predict_proba(input_fn = audit_input_fn, as_iterable = False), columns = ["probability(0)", "probability(1)"])
		adjusted = pandas.concat((adjusted, adjusted_proba), axis = 1)
	store_csv(adjusted, name + ".csv")

	store_savedmodel(classifier, audit_serving_input_fn, name)

build_audit(DNNClassifier(hidden_units = [2 * 49], feature_columns = _dnn_feature_columns(audit_feature_columns), optimizer = tf.train.AdamOptimizer(learning_rate = 0.00001), config = estimator_conf), 2000, "DNNClassificationAudit")
build_audit(LinearClassifier(feature_columns = audit_feature_columns, optimizer = tf.train.AdamOptimizer(learning_rate = 0.00025), config = estimator_conf), 5000, "LinearClassificationAudit")

#
# Multi-class classification
#

iris_df = load_csv("Iris.csv")
iris_df["Species"] = iris_df["Species"].replace("setosa", "0").replace("versicolor", "1").replace("virginica", "2").astype(int)

iris_cont_columns = ["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"]

iris_feature_columns = [real_valued_column(column, dtype = tf.float64) for column in iris_cont_columns]

def iris_input_fn():
	return _input_fn(iris_df, iris_cont_columns, [], "Species")
Ejemplo n.º 14
0
print('============================================================')
for classifier, acc, cv_acc in results:
    print(
        'Classifier = {}: Accuracy = {} || Mean Cross Val Accuracy scores = {}'
        .format(classifier, acc, cv_acc))

for name, bp in bestparams:
    print('============================================================')
    print('{}-classifier GridSearch Best Params'.format(name))
    print('============================================================')
    display(bp)
print()
print()

feature_columns = [
    tf.contrib.layers.real_valued_column("", dimension=len(X[0]))
]
dl_clf = DNNClassifier(hidden_units=[10, 20, 10],
                       n_classes=2,
                       feature_columns=feature_columns,
                       model_dir="/tmp/ilpd")
dl_clf.fit(X_train, y_train, steps=4000)
predictions = list(dl_clf.predict(X_test, as_iterable=True))
acc = accuracy_score(y_test, predictions)
print('============================================================')
print('Classifier = {}: Accuracy = {} '.format(DNNClassifier, acc))
print('============================================================')
print('{}-classifier GridSearch Best Params'.format(DNNClassifier))
display(dl_clf.params)
print('============================================================')
Ejemplo n.º 15
0
# Need to set Kernel to Python 3
import pandas
from tensorflow.contrib.learn import DNNClassifier
from tensorflow.contrib.learn import SKCompat
#from tensorflow.contrib.RunConfig import RunConfig
from tensorflow.contrib.learn import infer_real_valued_columns_from_input

#config = RunConfig(tf_random_seed = 42)

# Extracting features from the training data
feature_columns = infer_real_valued_columns_from_input(X_train)

# Create the DNN with two hidden layers (300 neurons and 100 neurons)
dnn_clf = DNNClassifier(hidden_units=[300, 100],
                        n_classes=10,
                        feature_columns=feature_columns)
#config = config)

#Wrapper
dnn_clf = SKCompat(dnn_clf)

# Train DNN with mini-batch descent
dnn_clf.fit(X_train, y_train, batch_size=64, steps=5000)

# In[1]:

# VizWiz daatset
import os
import json
from pprint import pprint
Ejemplo n.º 16
0
mnist.keys()

#%%
X=mnist['data']
y=mnist['target']
y=y.astype(np.int32)
y
#%%
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X)

#%%
feature_columns

#%%
from tensorflow.contrib.learn import DNNClassifier
model=DNNClassifier(hidden_units=[300,100],feature_columns=feature_columns,n_classes=10,)
model.fit(x=X,y=y,batch_size=50,steps=40000)

#%%
# 如果你在 MNIST 数据集上运行这个代码(在缩放它之后,例如,通过使用 skLearn
# 的 StandardScaler  ),你实际上可以得到一个在测试集上达到 98.1% 以上精度的模型!这比
# 我们在第 3 章中训练的最好的模型都要好:
from sklearn.metrics import accuracy_score
y_pred=list(model.predict(X))
print(accuracy_score(y,y_pred))

#%%
# TF.Learn 学习库也为评估模型提供了一些方便的功能
model.evaluate(X,y)

#%%

results, dataframes, best_parameters = parameter_tuning(models, X_train, X_test, y_train, y_test)
print()
print('============================================================')
for classifier, acc, cv_acc in results:
    print('{}: Accuracy with Best Parameters = {}% || Mean Cross Validation Accuracy = {}%'.format(classifier, round(acc*100,4), round(cv_acc*100,4)))
print()

for name, bp in best_parameters:
    print('============================================================')
    print('{} classifier GridSearch Best Parameters'.format(name))
    display(bp)
print()
print()

# Deep Learning using Tensor flow
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=len(X[0]))]
deep_learning = DNNClassifier(hidden_units=[10,20,10],
                      feature_columns=feature_columns, model_dir="/tmp/iris")
deep_learning.fit(X_train, y_train, steps=1500)
predictions = list(deep_learning.predict(X_test, as_iterable=True))
acc = accuracy_score(predictions, predictions)
print('============================================================')
print('Deep Learning classifier Accuracy = ', round(acc*100,4),'%')
print('------------------------------------------------------------')
print('Deep Learning classifier Best Parameters')
display(deep_learning.params)
print('***************** Execution Completed **********************')
print('------------------------------------------------------------')
Ejemplo n.º 18
0
def build_estimator(model_dir, model_type):
    """Build an estimator."""
    # Sparse base columns.
    clickTime = tf.contrib.layers.sparse_column_with_integerized_feature(
        "clickTime", bucket_size=24)
    # creativeID = tf.contrib.layers.sparse_column_with_integerized_feature(
    #     "creativeID", bucket_size=7000)
    positionID = tf.contrib.layers.sparse_column_with_integerized_feature(
        "positionID", bucket_size=7646)
    connectionType = tf.contrib.layers.sparse_column_with_integerized_feature(
        "connectionType", bucket_size=5)
    telecomsOperator = tf.contrib.layers.sparse_column_with_integerized_feature(
        "telecomsOperator", bucket_size=4)
    age = tf.contrib.layers.sparse_column_with_integerized_feature(
        "age", bucket_size=81)
    gender = tf.contrib.layers.sparse_column_with_integerized_feature(
        "gender", bucket_size=3)
    education = tf.contrib.layers.sparse_column_with_integerized_feature(
        "education", bucket_size=8)
    marriageStatus = tf.contrib.layers.sparse_column_with_integerized_feature(
        "marriageStatus", bucket_size=4)
    haveBaby = tf.contrib.layers.sparse_column_with_integerized_feature(
        "haveBaby", bucket_size=7)
    hometown = tf.contrib.layers.sparse_column_with_integerized_feature(
        "hometown", bucket_size=365)
    residence = tf.contrib.layers.sparse_column_with_integerized_feature(
        "residence", bucket_size=400)
    adID = tf.contrib.layers.sparse_column_with_integerized_feature(
        "adID", bucket_size=3616)
    camgaignID = tf.contrib.layers.sparse_column_with_integerized_feature(
        "camgaignID", bucket_size=720)
    advertiserID = tf.contrib.layers.sparse_column_with_integerized_feature(
        "advertiserID", bucket_size=91)
    appPlatform = tf.contrib.layers.sparse_column_with_integerized_feature(
        "appPlatform", bucket_size=3)
    appCategory = tf.contrib.layers.sparse_column_with_integerized_feature(
        "appCategory", bucket_size=504)
    wide_columns = [
        clickTime,
        positionID,
        connectionType,
        telecomsOperator,
        age,
        gender,
        education,
        marriageStatus,
        haveBaby,
        hometown,
        residence,
        adID,
        camgaignID,
        advertiserID,
        appPlatform,
        appCategory,
        # tf.contrib.layers.crossed_column([education, occupation],
        #                                  hash_bucket_size=int(1e4)),
        # tf.contrib.layers.crossed_column(
        #     [age_buckets, education, occupation],
        #     hash_bucket_size=int(1e6)),
        tf.contrib.layers.crossed_column(
            [clickTime, connectionType, telecomsOperator],
            hash_bucket_size=int(1e4))
    ]
    deep_columns = [
        tf.contrib.layers.embedding_column(clickTime, dimension=8),
        tf.contrib.layers.embedding_column(positionID, dimension=8),
        tf.contrib.layers.embedding_column(connectionType, dimension=8),
        tf.contrib.layers.embedding_column(telecomsOperator, dimension=8),
        tf.contrib.layers.embedding_column(age, dimension=8),
        tf.contrib.layers.embedding_column(gender, dimension=8),
        tf.contrib.layers.embedding_column(education, dimension=8),
        tf.contrib.layers.embedding_column(marriageStatus, dimension=8),
        tf.contrib.layers.embedding_column(haveBaby, dimension=8),
        tf.contrib.layers.embedding_column(hometown, dimension=8),
        tf.contrib.layers.embedding_column(residence, dimension=8),
        tf.contrib.layers.embedding_column(adID, dimension=8),
        tf.contrib.layers.embedding_column(camgaignID, dimension=8),
        tf.contrib.layers.embedding_column(advertiserID, dimension=8),
        tf.contrib.layers.embedding_column(appCategory, dimension=8),
        tf.contrib.layers.embedding_column(appPlatform, dimension=8)
    ]
    if model_type == "wide":
        m = LinearClassifier(model_dir=model_dir, feature_columns=wide_columns)
    elif model_type == "deep":
        m = DNNClassifier(model_dir=model_dir,
                          feature_columns=deep_columns,
                          hidden_units=[100, 50])
    else:
        m = DNNLinearCombinedClassifier(model_dir=model_dir,
                                        linear_feature_columns=wide_columns,
                                        dnn_feature_columns=deep_columns,
                                        dnn_hidden_units=[100, 50],
                                        fix_global_step_increment_bug=True)
    return m
Ejemplo n.º 19
0
    adjusted = DataFrame(classifier.predict(input_fn=audit_input_fn,
                                            as_iterable=False),
                         columns=["_target"])
    if (with_proba):
        adjusted_proba = DataFrame(
            classifier.predict_proba(input_fn=audit_input_fn,
                                     as_iterable=False),
            columns=["probability(0)", "probability(1)"])
        adjusted = pandas.concat((adjusted, adjusted_proba), axis=1)
    store_csv(adjusted, name + ".csv")

    store_savedmodel(classifier, audit_serving_input_fn, name)


build_audit(
    DNNClassifier(hidden_units=[71, 11],
                  feature_columns=_dnn_feature_columns(audit_feature_columns)),
    "DNNClassificationAudit")
build_audit(LinearClassifier(feature_columns=audit_feature_columns),
            "LinearClassificationAudit")

#
# Multi-class classification
#

iris_df = load_csv("Iris.csv")
iris_df["Species"] = iris_df["Species"].replace("setosa", "0").replace(
    "versicolor", "1").replace("virginica", "2").astype(int)

iris_cont_columns = [
    "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"
]
Ejemplo n.º 20
0
    l.remove(l[0])
    l = np.array(l)
    labels = l[:, :1]
    data = l[:, 1:]
    return to_int(data), formalize(to_int(labels), 10)


def load_test_data():
    l = []
    with open("test.csv") as f:
        lines = csv.reader(f)
        for line in lines:
            l.append(line)
    l.remove(l[0])
    return to_int(l)


train_images, train_labels = load_train_data()
test_images = load_test_data()
print(train_images[0])

feature_columns = infer_real_valued_columns_from_input(train_images)
clf = DNNClassifier([100], feature_columns, n_classes=10)
print(train_images.shape)
print(train_labels.shape)
clf.fit(train_images, train_labels)
print("done training")

pred = clf.predict(test_images[0])
print(pred)
Ejemplo n.º 21
0
                        type=str)
    parse = parser.parse_args()
    TRAIN_DATASET = parse.train
    TEST_DATASET = parse.test
    OUTPUT_PATH = parse.output
    np.random.seed(19260817)

    train_set = pandas.read_csv(TRAIN_DATASET)
    test_set = pandas.read_csv(TEST_DATASET)
    encoder = LabelEncoder().fit(train_set["species"])
    train = train_set.drop(["species", "id"], axis=1).values
    label = encoder.transform(train_set["species"])
    test = test_set.drop(["id"], axis=1).values
    scaler = StandardScaler().fit(train)
    train = scaler.transform(train)
    scaler = StandardScaler().fit(test)
    test = scaler.transform(test)

    feature_columns = [real_valued_column("", dimension=192)]
    classifier = DNNClassifier(feature_columns=feature_columns,
                               n_classes=99,
                               hidden_units=[1024, 512, 256],
                               optimizer=tf.train.AdamOptimizer)
    classifier.fit(x=train, y=label, steps=1000)
    output = classifier.predict(test)
    output_prob = classifier.predict_proba(test)
    test_id = test_set.pop("id")
    result = pandas.DataFrame(output_prob,
                              index=test_id,
                              columns=encoder.classes_)
    result.to_csv(OUTPUT_PATH)