def train_test_split(data, train_rate=0.7, randomly=True):
    data_copy = data.copy()
    _n_train_samples = int(data.n_samples * train_rate)
    if randomly:
        data_copy = shuffle(data_copy)
    train_data_data = data_copy.data[:_n_train_samples, :]
    test_data_data = data_copy.data[_n_train_samples:, :]
    train_data_data_xs = train_data_data[:, :data.n_xfeatures]
    train_data_data_ys = train_data_data[:, data.n_xfeatures:]
    test_data_data_xs = test_data_data[:, :data.n_xfeatures]
    test_data_data_ys = test_data_data[:, data.n_xfeatures:]
    t_data = tfnn.Data(train_data_data_xs, train_data_data_ys, name='train')
    v_data = tfnn.Data(test_data_data_xs, test_data_data_ys, name='validate')
    return [t_data, v_data]
Esempio n. 2
0
def train(data_path):
    load_data = pd.read_pickle(data_path).iloc[:10000, :]
    xs = load_data.iloc[:, 1:]
    print(xs.head(2))
    print('sample size:', pd.read_pickle(data_path).shape[0])
    ys = load_data.a
    data = tfnn.Data(xs, ys, name='road_data')

    network = tfnn.RegNetwork(xs.shape[1], 1, do_dropout=False)
    n_data = network.normalizer.minmax_fit(data)
    t_data, v_data = n_data.train_test_split(0.7)
    network.add_hidden_layer(100, activator=tfnn.nn.relu, dropout_layer=True)
    network.add_output_layer(activator=None, dropout_layer=False)
    global_step = tfnn.Variable(0, trainable=False)
    # lr = tfnn.train.exponential_decay(0.001, global_step, 2000, 0.9)
    optimizer = tfnn.train.AdamOptimizer(0.001)
    network.set_optimizer(optimizer, global_step)
    evaluator = tfnn.Evaluator(network)
    summarizer = tfnn.Summarizer(network, save_path='/tmp/log')

    for i in range(10000):
        b_xs, b_ys = t_data.next_batch(100, loop=True)
        network.run_step(b_xs, b_ys, 0.5)
        if i % 1000 == 0:
            print(evaluator.compute_cost(v_data.xs, v_data.ys))
            summarizer.record_train(b_xs, b_ys, i, 0.5)
            summarizer.record_validate(v_data.xs, v_data.ys, i)
    network.save()
    evaluator.regression_plot_linear_comparison(v_data.xs,
                                                v_data.ys,
                                                continue_plot=True)
    network.sess.close()
    summarizer.web_visualize()
Esempio n. 3
0
def train_test_split(data, train_rate=0.7, randomly=True):
    _n_train_samples = int(data.n_samples * train_rate)
    if randomly:
        xs_ys = pd.concat([data.xs, data.ys], axis=1, join='outer')
        df = xs_ys.reindex(np.random.permutation(xs_ys.index))
        shuffled_xs = df.iloc[:, :data.xs.shape[1]]
        shuffled_ys = df.iloc[:, data.xs.shape[1]:]
        t_xs = shuffled_xs.iloc[:_n_train_samples, :]
        t_ys = shuffled_ys.iloc[:_n_train_samples, :]
        v_xs = shuffled_xs.iloc[_n_train_samples:, :]
        v_ys = shuffled_ys.iloc[_n_train_samples:, :]
    else:
        t_xs = data.xs.iloc[_n_train_samples:, :]
        t_ys = data.ys.iloc[_n_train_samples:, :]
        v_xs = data.xs.iloc[_n_train_samples:, :]
        v_ys = data.ys.iloc[_n_train_samples:, :]
    t_data = tfnn.Data(t_xs, t_ys, name='train')
    v_data = tfnn.Data(v_xs, v_ys, name='validate')
    return [t_data, v_data]
Esempio n. 4
0
 def fit(self, feed_xs, feed_ys, n_iter=5000, *args):
     """
     Fit data to network, automatically training the network.
     :param feed_xs:
     :param feed_ys:
     :param n_iter: when n_iter=-1, the training steps= n_samples*2
     :param args: pass keep_prob when use dropout, pass l2_lambda when use l2 regularization.
     :return: Nothing
     """
     train_data = tfnn.Data(feed_xs, feed_ys)
     for _ in range(n_iter):
         b_xs, b_ys = train_data.next_batch(100, loop=True)
         self.run_step(feed_xs=b_xs, feed_ys=b_ys, *args)
Esempio n. 5
0
    def fit(self, feed_xs, feed_ys, steps=None, *args, **kwargs):
        def _print_log(log):
            print('\r{}'.format(log), end='')

        def _get_progress(t_cost, step, steps):
            _time_remaining_second = int(t_cost * (steps - step) / step)
            if _time_remaining_second > 60:
                _time_remaining_min = _time_remaining_second // 60
                _time_remaining_second %= 60
                if _time_remaining_min > 60:
                    _time_remaining_hour = _time_remaining_min // 60
                    _time_remaining_min %= 60
                    _time_remaining = str(_time_remaining_hour) + 'h-' \
                                      + str(_time_remaining_min) + 'm-' \
                                      + str(_time_remaining_second) + 's'
                else:
                    _time_remaining = str(_time_remaining_min) + 'm-' \
                                      + str(_time_remaining_second) + 's'
            else:
                _time_remaining = str(_time_remaining_second) + 's'

            _percentage = str(round(step / steps * 100, 2)) + '%'
            return [_time_remaining, _percentage]

        train_data = tfnn.Data(feed_xs, feed_ys)
        if steps is None:
            steps = train_data.n_samples
        time_start = time.time()
        for step in range(1, steps + 1):
            b_xs, b_ys = train_data.next_batch(50)
            self.run_step(feed_xs=b_xs, feed_ys=b_ys, *args, **kwargs)
            if step % 200 == 0:
                time_cost = time.time() - time_start
                time_remaining, percentage = _get_progress(
                    time_cost, step, steps)
                feed_dict = self._get_feed_dict(b_xs,
                                                b_ys,
                                                keep_prob=1.,
                                                l2_value=0.)
                cost = self.sess.run(self.loss, feed_dict=feed_dict)
                _log = percentage + ' | ETA: ' + str(time_remaining) + ' | Cost: ' + \
                       str(round(cost, 5))
                _print_log(_log)
        print('\r')
Esempio n. 6
0
 def fit(self, feed_xs, feed_ys, steps=2000, *args, **kwargs):
     train_data = tfnn.Data(feed_xs, feed_ys)
     for _ in range(steps):
         b_xs, b_ys = train_data.next_batch(100)
         self.run_step(feed_xs=b_xs, feed_ys=b_ys, *args, **kwargs)
from sklearn.datasets import load_boston
import tfnn

# load data
xs = load_boston().data
ys = load_boston().target

# set data into tfnn.Data format
data = tfnn.Data(xs, ys)
data.shuffle(inplace=True)

# define network properties
network = tfnn.RegNetwork(input_size=data.xs.shape[1],
                          output_size=data.ys.shape[1])

# normalize features
norm_data = network.normalizer.minmax(data)

# train test split
t_data, v_data = norm_data.train_test_split(0.7)

# set hidden layer
h1 = tfnn.HiddenLayer(n_neurons=10, activator='relu')
h2 = tfnn.HiddenLayer(n_neurons=10, activator='relu')

# set output layer
out = tfnn.OutputLayer(activator=None)

# build network layers
network.build_layers([h1, h2, out])
import tfnn
import pandas as pd

bank_data = pd.read_csv('bank-full.csv', sep=';')

data = tfnn.Data(bank_data.iloc[:, :-1], bank_data.iloc[:, -1])
data.encode_cat_y(inplace=True)
data.encode_cat_x(inplace=True)
network = tfnn.ClfNetwork(data.xs.shape[1], data.ys.shape[1],)
data = network.normalizer.minmax_fit(data, -1, 1)
train_data, test_data = data.train_test_split()
network.add_hidden_layer(50, activator=tfnn.nn.relu)
network.add_output_layer(activator=None)
network.set_optimizer(tfnn.train.GradientDescentOptimizer(0.0001))
evaluator = tfnn.Evaluator(network)

for i in range(1000):
    b_xs, b_ys = train_data.next_batch(100, loop=True)
    network.run_step(b_xs, b_ys)
    if i % 50 == 0:
        print(evaluator.compute_accuracy(test_data.xs, test_data.ys))
# print(test_data.ys.iloc[:,0].value_counts())
print(network.predict(test_data.xs.iloc[20:30, :]))
print(test_data.ys.iloc[20:30, :])