def train_test_split(data, train_rate=0.7, randomly=True): data_copy = data.copy() _n_train_samples = int(data.n_samples * train_rate) if randomly: data_copy = shuffle(data_copy) train_data_data = data_copy.data[:_n_train_samples, :] test_data_data = data_copy.data[_n_train_samples:, :] train_data_data_xs = train_data_data[:, :data.n_xfeatures] train_data_data_ys = train_data_data[:, data.n_xfeatures:] test_data_data_xs = test_data_data[:, :data.n_xfeatures] test_data_data_ys = test_data_data[:, data.n_xfeatures:] t_data = tfnn.Data(train_data_data_xs, train_data_data_ys, name='train') v_data = tfnn.Data(test_data_data_xs, test_data_data_ys, name='validate') return [t_data, v_data]
def train(data_path): load_data = pd.read_pickle(data_path).iloc[:10000, :] xs = load_data.iloc[:, 1:] print(xs.head(2)) print('sample size:', pd.read_pickle(data_path).shape[0]) ys = load_data.a data = tfnn.Data(xs, ys, name='road_data') network = tfnn.RegNetwork(xs.shape[1], 1, do_dropout=False) n_data = network.normalizer.minmax_fit(data) t_data, v_data = n_data.train_test_split(0.7) network.add_hidden_layer(100, activator=tfnn.nn.relu, dropout_layer=True) network.add_output_layer(activator=None, dropout_layer=False) global_step = tfnn.Variable(0, trainable=False) # lr = tfnn.train.exponential_decay(0.001, global_step, 2000, 0.9) optimizer = tfnn.train.AdamOptimizer(0.001) network.set_optimizer(optimizer, global_step) evaluator = tfnn.Evaluator(network) summarizer = tfnn.Summarizer(network, save_path='/tmp/log') for i in range(10000): b_xs, b_ys = t_data.next_batch(100, loop=True) network.run_step(b_xs, b_ys, 0.5) if i % 1000 == 0: print(evaluator.compute_cost(v_data.xs, v_data.ys)) summarizer.record_train(b_xs, b_ys, i, 0.5) summarizer.record_validate(v_data.xs, v_data.ys, i) network.save() evaluator.regression_plot_linear_comparison(v_data.xs, v_data.ys, continue_plot=True) network.sess.close() summarizer.web_visualize()
def train_test_split(data, train_rate=0.7, randomly=True): _n_train_samples = int(data.n_samples * train_rate) if randomly: xs_ys = pd.concat([data.xs, data.ys], axis=1, join='outer') df = xs_ys.reindex(np.random.permutation(xs_ys.index)) shuffled_xs = df.iloc[:, :data.xs.shape[1]] shuffled_ys = df.iloc[:, data.xs.shape[1]:] t_xs = shuffled_xs.iloc[:_n_train_samples, :] t_ys = shuffled_ys.iloc[:_n_train_samples, :] v_xs = shuffled_xs.iloc[_n_train_samples:, :] v_ys = shuffled_ys.iloc[_n_train_samples:, :] else: t_xs = data.xs.iloc[_n_train_samples:, :] t_ys = data.ys.iloc[_n_train_samples:, :] v_xs = data.xs.iloc[_n_train_samples:, :] v_ys = data.ys.iloc[_n_train_samples:, :] t_data = tfnn.Data(t_xs, t_ys, name='train') v_data = tfnn.Data(v_xs, v_ys, name='validate') return [t_data, v_data]
def fit(self, feed_xs, feed_ys, n_iter=5000, *args): """ Fit data to network, automatically training the network. :param feed_xs: :param feed_ys: :param n_iter: when n_iter=-1, the training steps= n_samples*2 :param args: pass keep_prob when use dropout, pass l2_lambda when use l2 regularization. :return: Nothing """ train_data = tfnn.Data(feed_xs, feed_ys) for _ in range(n_iter): b_xs, b_ys = train_data.next_batch(100, loop=True) self.run_step(feed_xs=b_xs, feed_ys=b_ys, *args)
def fit(self, feed_xs, feed_ys, steps=None, *args, **kwargs): def _print_log(log): print('\r{}'.format(log), end='') def _get_progress(t_cost, step, steps): _time_remaining_second = int(t_cost * (steps - step) / step) if _time_remaining_second > 60: _time_remaining_min = _time_remaining_second // 60 _time_remaining_second %= 60 if _time_remaining_min > 60: _time_remaining_hour = _time_remaining_min // 60 _time_remaining_min %= 60 _time_remaining = str(_time_remaining_hour) + 'h-' \ + str(_time_remaining_min) + 'm-' \ + str(_time_remaining_second) + 's' else: _time_remaining = str(_time_remaining_min) + 'm-' \ + str(_time_remaining_second) + 's' else: _time_remaining = str(_time_remaining_second) + 's' _percentage = str(round(step / steps * 100, 2)) + '%' return [_time_remaining, _percentage] train_data = tfnn.Data(feed_xs, feed_ys) if steps is None: steps = train_data.n_samples time_start = time.time() for step in range(1, steps + 1): b_xs, b_ys = train_data.next_batch(50) self.run_step(feed_xs=b_xs, feed_ys=b_ys, *args, **kwargs) if step % 200 == 0: time_cost = time.time() - time_start time_remaining, percentage = _get_progress( time_cost, step, steps) feed_dict = self._get_feed_dict(b_xs, b_ys, keep_prob=1., l2_value=0.) cost = self.sess.run(self.loss, feed_dict=feed_dict) _log = percentage + ' | ETA: ' + str(time_remaining) + ' | Cost: ' + \ str(round(cost, 5)) _print_log(_log) print('\r')
def fit(self, feed_xs, feed_ys, steps=2000, *args, **kwargs): train_data = tfnn.Data(feed_xs, feed_ys) for _ in range(steps): b_xs, b_ys = train_data.next_batch(100) self.run_step(feed_xs=b_xs, feed_ys=b_ys, *args, **kwargs)
from sklearn.datasets import load_boston import tfnn # load data xs = load_boston().data ys = load_boston().target # set data into tfnn.Data format data = tfnn.Data(xs, ys) data.shuffle(inplace=True) # define network properties network = tfnn.RegNetwork(input_size=data.xs.shape[1], output_size=data.ys.shape[1]) # normalize features norm_data = network.normalizer.minmax(data) # train test split t_data, v_data = norm_data.train_test_split(0.7) # set hidden layer h1 = tfnn.HiddenLayer(n_neurons=10, activator='relu') h2 = tfnn.HiddenLayer(n_neurons=10, activator='relu') # set output layer out = tfnn.OutputLayer(activator=None) # build network layers network.build_layers([h1, h2, out])
import tfnn import pandas as pd bank_data = pd.read_csv('bank-full.csv', sep=';') data = tfnn.Data(bank_data.iloc[:, :-1], bank_data.iloc[:, -1]) data.encode_cat_y(inplace=True) data.encode_cat_x(inplace=True) network = tfnn.ClfNetwork(data.xs.shape[1], data.ys.shape[1],) data = network.normalizer.minmax_fit(data, -1, 1) train_data, test_data = data.train_test_split() network.add_hidden_layer(50, activator=tfnn.nn.relu) network.add_output_layer(activator=None) network.set_optimizer(tfnn.train.GradientDescentOptimizer(0.0001)) evaluator = tfnn.Evaluator(network) for i in range(1000): b_xs, b_ys = train_data.next_batch(100, loop=True) network.run_step(b_xs, b_ys) if i % 50 == 0: print(evaluator.compute_accuracy(test_data.xs, test_data.ys)) # print(test_data.ys.iloc[:,0].value_counts()) print(network.predict(test_data.xs.iloc[20:30, :])) print(test_data.ys.iloc[20:30, :])