Beispiel #1
0
def load_dbpedia(size='small', test_with_fake_data=False):
  """Get DBpedia datasets from CSV files."""
  if not test_with_fake_data:
    data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''), 'dbpedia_data')
    maybe_download_dbpedia(data_dir)

    train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv')
    test_path = os.path.join(data_dir, 'dbpedia_csv', 'test.csv')

    if size == 'small':
      # Reduce the size of original data by a factor of 1000.
      base.shrink_csv(train_path, 1000)
      base.shrink_csv(test_path, 1000)
      train_path = train_path.replace('train.csv', 'train_small.csv')
      test_path = test_path.replace('test.csv', 'test_small.csv')
  else:
    module_path = os.path.dirname(__file__)
    train_path = os.path.join(module_path, 'data', 'text_train.csv')
    test_path = os.path.join(module_path, 'data', 'text_test.csv')

  train = base.load_csv_without_header(
      train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0)
  test = base.load_csv_without_header(
      test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0)

  return base.Datasets(train=train, validation=None, test=test)
Beispiel #2
0
def load_dbpedia(size='small', test_with_fake_data=False):
    """Get DBpedia datasets from CSV files."""
    if not test_with_fake_data:
        data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''),
                                'dbpedia_data')
        maybe_download_dbpedia(data_dir)

        train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv')
        test_path = os.path.join(data_dir, 'dbpedia_csv', 'code.csv')

        if size == 'small':
            # Reduce the size of original data by a factor of 1000.
            base.shrink_csv(train_path, 1000)
            base.shrink_csv(test_path, 1000)
            train_path = train_path.replace('train.csv', 'train_small.csv')
            test_path = test_path.replace('code.csv', 'test_small.csv')
    else:
        module_path = os.path.dirname(__file__)
        train_path = os.path.join(module_path, 'data', 'text_train.csv')
        test_path = os.path.join(module_path, 'data', 'text_test.csv')

    train = base.load_csv_without_header(train_path,
                                         target_dtype=np.int32,
                                         features_dtype=np.str,
                                         target_column=0)
    test = base.load_csv_without_header(test_path,
                                        target_dtype=np.int32,
                                        features_dtype=np.str,
                                        target_column=0)

    return base.Datasets(train=train, validation=None, test=test)
Beispiel #3
0
def load_data():
    module_path = os.path.dirname(__file__)
    #train_path = os.path.join(module_path, 'nrf_data', 'nrf-traindata.csv')
    #test_path = os.path.join(module_path, 'nrf_data', 'nrf-testdata.csv')

    #train_path = os.path.join(module_path, 'nrf_data', 'train_10000.csv')
    #test_path = os.path.join(module_path, 'nrf_data', 'eval_10000.csv')

    #train_path = os.path.join(module_path, 'nrf_data', 'train_10000_only_one_objective.csv')
    #test_path = os.path.join(module_path, 'nrf_data', 'eval_10000_only_one_objective.csv')

    train_path = os.path.join(module_path, 'nrf_data',
                              'train_10000_processed.csv')
    test_path = os.path.join(module_path, 'nrf_data',
                             'eval_10000_processed.csv')

    train = base.load_csv_without_header(train_path,
                                         target_dtype=np.int32,
                                         features_dtype=np.str,
                                         target_column=0)
    test = base.load_csv_without_header(test_path,
                                        target_dtype=np.int32,
                                        features_dtype=np.str,
                                        target_column=0)

    return base.Datasets(train=train, validation=None, test=test)
Beispiel #4
0
def load_origin_data():
    module_path = os.path.dirname(__file__)
    train_path = os.path.join(module_path, 'nrf_data', 'nrf-traindata.csv')
    test_path = os.path.join(module_path, 'nrf_data', 'nrf-testdata.csv')

    train = base.load_csv_without_header(train_path,
                                         target_dtype=np.int32,
                                         features_dtype=np.str,
                                         target_column=0)
    test = base.load_csv_without_header(test_path,
                                        target_dtype=np.int32,
                                        features_dtype=np.str,
                                        target_column=0)

    return base.Datasets(train=train, validation=None, test=test)
Beispiel #5
0
def train(data_file, model_path, num_class):
    ds = load_csv_without_header(data_file, np.int32, np.float32, 19)

    for dropi in range(7):
        group = groups[dropi]
        num_feature = len(group)
        dsplit = DataSplit(fetch(ds.data, group), ds.target, 0.75)
        dtrain = dsplit.getTrain()
        dtest = dsplit.getTest()

        x, label, train_step, accuracy, prediction = build_graph(
            num_feature, num_class)
        signature = tf.saved_model.signature_def_utils.build_signature_def(
            inputs={'input': tf.saved_model.utils.build_tensor_info(x)},
            outputs={
                'output': tf.saved_model.utils.build_tensor_info(prediction)
            },
            method_name=tf.saved_model.PREDICT_METHOD_NAME)
        shutil.rmtree(model_path, ignore_errors=True)

        best_test = 0

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Build Signature to save to model

            # Start training loop
            for i in range(5000):
                batch = dtrain.next_batch(50)
                if batch is None:
                    break
                if i % 100 == 0:
                    train_accuracy = accuracy.eval(feed_dict={
                        x: batch[0],
                        label: batch[1]
                    })
                    # print('step %d, training accuracy %g' % (i, train_accuracy))
                    test_accuracy = accuracy.eval(feed_dict={
                        x: dtest.data,
                        label: dtest.label
                    })
                    # print('step %d, test accuracy %g' % (i, test_accuracy))
                    if best_test < test_accuracy:
                        best_test = test_accuracy
                        # print("Current best, saving model")
                        shutil.rmtree(model_path, ignore_errors=True)
                        builder = tf.saved_model.builder.SavedModelBuilder(
                            model_path)
                        builder.add_meta_graph_and_variables(
                            sess, [tf.saved_model.SERVING],
                            signature_def_map={
                                tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                                signature
                            })
                        builder.save()
                        ## Print the value just saved
                train_step.run(feed_dict={x: batch[0], label: batch[1]})

        print("Final Result: %d, %g" % (dropi, best_test))
Beispiel #6
0
def load_full_data():
    module_path = os.path.dirname(__file__)
    #train_path = os.path.join(module_path, 'nrf_data', 'nrf-traindata.csv')
    #test_path = os.path.join(module_path, 'nrf_data', 'nrf-testdata.csv')
    train_path = os.path.join(module_path, 'nrf_data', 'train_10000.csv')
    test_path = os.path.join(module_path, 'nrf_data', 'eval_10000.csv')

    train = base.load_csv_without_header(train_path,
                                         target_dtype=np.int32,
                                         features_dtype=np.str,
                                         target_column=0)
    test = base.load_csv_without_header(test_path,
                                        target_dtype=np.int32,
                                        features_dtype=np.str,
                                        target_column=0)

    data = np.concatenate((train.data, test.data), axis=0)
    target = np.concatenate((train.target, test.target), axis=0)

    return Dataset(data=np.array(data),
                   target=np.array(target).astype(np.int32))
Beispiel #7
0
def loadDbpedia(size='small'):
    """Get DBpedia datasets from CSV files."""
    data_dir = '../data/dbpedia_data'

    train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv')
    test_path = os.path.join(data_dir, 'dbpedia_csv', 'test.csv')

    if size == 'small':
        # Reduce the size of original data by a factor of 1000.
        train_path = train_path.replace('train.csv', 'train_small.csv')
        test_path = test_path.replace('test.csv', 'test_small.csv')

    train = base.load_csv_without_header(train_path,
                                         target_dtype=np.int32,
                                         features_dtype=np.str,
                                         target_column=0)
    test = base.load_csv_without_header(test_path,
                                        target_dtype=np.int32,
                                        features_dtype=np.str,
                                        target_column=0)

    return base.Datasets(train=train, validation=None, test=test)
Beispiel #8
0
def main(args):
    # Load datasets
    abalone_train, abalone_test, abalone_predict = maybe_download(
        FLAGS.train_data, FLAGS.test_data, FLAGS.predict_data)

    # Training examples
    training_set = load_csv_without_header(filename=abalone_train,
                                           target_dtype=np.int,
                                           features_dtype=np.float64)

    # Test examples
    test_set = tf.contrib.learn.datasets.base.load_csv_without_header(
        filename=abalone_test, target_dtype=np.int, features_dtype=np.float64)

    # Set of 7 examples for which to predict abalone ages
    prediction_set = tf.contrib.learn.datasets.base.load_csv_without_header(
        filename=abalone_predict,
        target_dtype=np.int,
        features_dtype=np.float64)

    # Set model params
    model_params = {"learning_rate": LEARNING_RATE}

    # Instantiate Estimator
    nn = tf.contrib.learn.Estimator(model_fn=model_fn, params=model_params)

    def get_train_inputs():
        x = tf.constant(training_set.data)
        y = tf.constant(training_set.target)
        return x, y

    # Fit
    nn.fit(input_fn=get_train_inputs, steps=5000)

    # Score accuracy
    def get_test_inputs():
        x = tf.constant(test_set.data)
        y = tf.constant(test_set.target)
        return x, y

    ev = nn.evaluate(input_fn=get_test_inputs, steps=1)
    print("Loss: %s" % ev["loss"])
    print("Root Mean Squared Error: %s" % ev["rmse"])

    # Print out predictions
    predictions = nn.predict(x=prediction_set.data, as_iterable=True)
    for i, p in enumerate(predictions):
        print("Prediction %s: %s" % (i + 1, p["ages"]))
Beispiel #9
0
def train(data_file, model_path, num_class):
    ds = load_csv_without_header(data_file, np.int32, np.float32, 19)
    dsplit = DataSplit(ds.data, ds.target, 0.75)
    dtrain = dsplit.getTrain()
    dtest = dsplit.getTest()

    x, label, train_step, accuracy, prediction = build_graph(num_class)
    builder = tf.saved_model.builder.SavedModelBuilder(model_path)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for i in range(50000):
            batch = dtrain.next_batch(50)
            if batch is None:
                break
            if i % 100 == 0:
                train_accuracy = accuracy.eval(feed_dict={
                    x: batch[0],
                    label: batch[1]
                })
                print('step %d, training accuracy %g' % (i, train_accuracy))
                test_accuracy = accuracy.eval(feed_dict={
                    x: dtest.data,
                    label: dtest.label
                })
                print('step %d, test accuracy %g' % (i, test_accuracy))
            train_step.run(feed_dict={x: batch[0], label: batch[1]})

        # Build Signature to save to model
        signature = tf.saved_model.signature_def_utils.build_signature_def(
            inputs={'input': tf.saved_model.utils.build_tensor_info(x)},
            outputs={
                'output': tf.saved_model.utils.build_tensor_info(prediction)
            },
            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
        builder.add_meta_graph_and_variables(
            sess, [tf.saved_model.tag_constants.SERVING],
            signature_def_map={
                tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                signature
            })
        builder.save()
Beispiel #10
0
import numpy as np
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets import base
INPUTNODE = 100
OUTPUTNODE = 2
LAYER1 = 10
filenametrain = "/Users/hhy/Desktop/vectortrain.csv"
train = base.load_csv_without_header(filename=filenametrain,
                                     target_dtype=np.int,
                                     features_dtype=np.int)
filenametest = "/Users/hhy/Desktop/vectortest.csv"
test = base.load_csv_without_header(filename=filenametest,
                                    target_dtype=np.int,
                                    features_dtype=np.int)


def add_layer(inputs, in_size, out_size, activation_function=None):
    Weights = tf.Variable(tf.random_normal([in_size, out_size]))
    biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
    Wx_plus_b = tf.matmul(inputs, Weights) + biases
    if activation_function is None:
        outputs = Wx_plus_b
    else:
        outputs = activation_function(Wx_plus_b)
    return outputs


x_train = train.data.reshape(24780, 100)
#y_train=train.target.reshape(24780,1)
y_train = np.array(train.target).reshape(24780, 2)
#y_train=tf.one_hot(y_train1,2,1,0)
Beispiel #11
0
from tensorflow.contrib.learn.python.learn.datasets import base

import tensorflow as tf
import numpy as np

# print tensor flow version
print('TF Version: ', tf.__version__)

# data file which we will train on
TRAIN = "candles_train.txt"

# data file which we will test to determine accuracy
TEST = "candles_test.txt"

# training set
train_set = base.load_csv_without_header(filename=TRAIN, features_dtype=np.double, target_dtype=np.double)

# test set
test_set = base.load_csv_without_header(filename=TEST, features_dtype=np.double, target_dtype=np.double)

# print train data set
# print(train_set.data)

# print test data set
# print(test_set.data)

# add feature columns so tensor flow will know what we need to train on
feature_name = "stock_data_features"
feature_columns = [tf.feature_column.numeric_column(feature_name, shape=[1])]

# our classifier will do the training as well as keep track of the state if we need to use it again