def train_net(filename, overwrite, should_graph): # If overwrite is false and a saved model already exists, then simply # return if not overwrite and model_exists(): print("Model already exists (did you mean to include --overwrite?)") return # Otherwise, we train and save the model # Load the data data, labels = load_data(filename) # Create batches assert validation_set_size <= len( data), 'validation_set_size must be smaller than len(data)' training_data = data[:len(data) - validation_set_size] training_labels = labels[:len(labels) - validation_set_size] validation_data = data[len(data) - validation_set_size:] validation_labels = labels[len(labels) - validation_set_size:] #print('Training data: ' + str(len(training_data))) #print('Validation data: ' + str(len(validation_data))) assert float( len(training_data) ) / batch_size % 1 == 0, 'batch_size must evenly divide len(training_data)' #assert float(validation_set_size) / batch_size % 1 == 0, 'batch_size must evenly divide validation_set_size' num_training_batches = len(training_data) / batch_size #num_validation_batches = validation_set_size / batch_size num_validation_batches = 1 training_data_batches = [] training_label_batches = [] validation_data_batches = [] validation_label_batches = [] for i in range(num_training_batches): training_data_batches.append(training_data[i * batch_size:(i + 1) * batch_size]) training_label_batches.append(training_labels[i * batch_size:(i + 1) * batch_size]) for i in range(num_validation_batches): validation_data_batches.append(validation_data[i * batch_size:(i + 1) * batch_size]) validation_label_batches.append( validation_labels[i * batch_size:(i + 1) * batch_size]) # Build model and get variable handles train_op, x, y, out, loss, accuracy, weights, biases = model( learning_rate, dropout) # Initialize environment initialize = tf.global_variables_initializer() # Session config config = tf.ConfigProto(device_count={'GPU': 1 if use_GPU == True else 0}) # Run model done = False epoch = 0 iteration = 0 sustained_loss = 0.0 loss_values = [] validation_accuracy_values = [] max_accuracy_values = [] max_accuracy = 0.0 max_accuracy_weights = None max_accuracy_biases = None with tf.Session(config=config) as session: session.run(initialize) print '==========' print 'GPU ' + ('enabled' if use_GPU else 'disabled') print # Show weight initialization if show_weights: weights_val = session.run(weights) display_weights(weights_val) layout = [ dict(name='Ep.', width=4, align='center'), dict(name='Batch', width=2 * len(str(num_training_batches)) + 1, suffix='/' + str(num_training_batches)), dict(name='Loss', width=8), dict(name='Val Acc', width=6, suffix='%'), dict(name='Max Acc', width=6, suffix='%'), dict(name='Time', width=progress_bar_size + 2, align='center'), ] table = DynamicConsoleTable(layout) table.print_header() while not done: epoch += 1 # Trains on the data, in batches for i in range(num_training_batches): iteration += 1 data_batch = training_data_batches[i] label_batch = training_label_batches[i] _, loss_val = session.run([train_op, loss], feed_dict={ x: data_batch, y: label_batch }) sustained_loss = decay_rate * sustained_loss + ( 1.0 - decay_rate) * loss_val if len(loss_values) == loss_saved_iterations: loss_values.pop(0) if iteration >= loss_starting_iteration: loss_values.append(loss_val) data_batch = validation_data_batches[iteration % num_validation_batches] label_batch = validation_label_batches[iteration % num_validation_batches] validation_accuracy = 0.0 for j in range(num_validation_batches): data_batch = validation_data_batches[j] label_batch = validation_label_batches[j] accuracy_val, out_val = session.run([accuracy, out], feed_dict={ x: data_batch, y: label_batch }) if epoch >= max_epochs: for j in range(len(label_batch)): print label_batch[j], np.argmax(out_val[j]) validation_accuracy += accuracy_val validation_accuracy /= num_validation_batches if len(validation_accuracy_values ) == accuracy_saved_iterations: validation_accuracy_values.pop(0) if iteration >= accuracy_starting_iteration: validation_accuracy_values.append(validation_accuracy) if validation_accuracy > max_accuracy: weights_val, biases_val = session.run([weights, biases]) max_accuracy = validation_accuracy max_accuracy_weights = weights_val max_accuracy_biases = biases_val # Save weights tf.add_to_collection("vars", out) tf.add_to_collection("vars", x) saver = tf.train.Saver() saver.save(session, 'model') if len(max_accuracy_values) == accuracy_saved_iterations: max_accuracy_values.pop(0) if iteration >= accuracy_starting_iteration: max_accuracy_values.append(max_accuracy) progress = int( math.ceil(progress_bar_size * float( (iteration - 1) % num_training_batches) / max(1, num_training_batches - 1))) progress_string = '[' + '#' * progress + ' ' * ( progress_bar_size - progress) + ']' if iteration % num_training_batches == 0: progress_string = time.strftime("%I:%M:%S %p", time.localtime()) table.update(epoch, (iteration - 1) % num_training_batches + 1, sustained_loss, validation_accuracy * 100, max_accuracy * 100, progress_string) # Termination condition if sustained_loss < loss_threshold: done = True break update_output(iteration, weights_val, loss_values, validation_accuracy_values, max_accuracy_values) table.finalize() # Termination condition if epoch >= max_epochs or sustained_loss < loss_threshold: done = True update_output(iteration, weights_val, loss_values, validation_accuracy_values, max_accuracy_values, override=True) n = 100 plt.figure('FC weights') #plt.plot(weights_val['out'][:,1]) plt.plot( np.sum([ max_accuracy_weights['out'][k * n:(k + 1) * n, 1] for k in range(num_kernels) ], axis=0)) #plt.plot(np.sum([weights_val['out'][0:1*n,1], weights_val['out'][1*n:2*n,1], weights_val['out'][2*n:3*n,1], weights_val['out'][3*n:4*n,1], weights_val['out'][4*n:5*n,1], weights_val['out'][5*n:6*n,1]], axis=0)) plt.show() plt.pause(0)
def train(self, training_data, training_labels, validation_data=None, validation_labels=None, skip_evaluation=False, loss_fn=None, optimizer_fn=None, accuracy_fn=None, max_epochs=float('inf'), batch_size=1, validation_set_size=None, validation_interval=1, loss_threshold=0.0, sustained_loss_decay_rate=0.9, row_output_interval=None): assert loss_fn, 'Must specify a loss_fn (a function that takes (out, y) as input)' assert optimizer_fn, 'Must specify a optimizer_fn (a function that takes loss as input)' if validation_data is not None: validation_set_size = (validation_set_size and \ min(validation_set_size, len(validation_data))) or \ len(validation_data) assert len(training_data) == len(training_labels), \ 'Number of training data and training labels do not match' if not skip_evaluation and (validation_data is not None or validation_labels is not None): assert validation_data is not None and validation_labels is not None and \ len(validation_data) == len(validation_labels), \ 'Number of validation data and validation labels do not match' else: skip_evaluation = True if not skip_evaluation: assert accuracy_fn, \ 'Must specify an accuracy_fn (a function that takes (out, y) as input),' + \ ' in order to evaluate the validation set' if len(training_data) % batch_size != 0: print 'WARNING: batch_size does not evenly divide len(training_data).' + \ 'Some training data will be unused' validation_data = np.array(validation_data) validation_labels = np.array(validation_labels) num_training_batches = len(training_data) / batch_size training_data_indices = np.random.permutation(len(training_data)) training_data_permuted = np.array(training_data)[training_data_indices] training_labels_permuted = np.array( training_labels)[training_data_indices] training_data_batches = [] training_label_batches = [] for i in range(num_training_batches): training_data_batches.append( training_data_permuted[i * batch_size:(i + 1) * batch_size]) training_label_batches.append( training_labels_permuted[i * batch_size:(i + 1) * batch_size]) row_output_interval = row_output_interval or num_training_batches y = tf.placeholder(tf.int64, [None]) loss = loss_fn(self.out, y) optimizer = optimizer_fn(loss) accuracy = accuracy_fn(self.out, y) if accuracy_fn else None self.skip_evaluation = skip_evaluation layout = [ dict(name='Ep.', width=3, align='center'), dict(name='Batch', width=2*len(str(num_training_batches))+1, suffix='/'+str(num_training_batches), align='center'), dict(name='Loss', width=8, align='center')] + \ ([dict(name='Val Acc', width=7, suffix='%', align='center'), dict(name='Max Acc', width=7, suffix='%', align='center')] \ if not self.skip_evaluation else []) + \ [dict(name='Progress/Timestamp', width=self.progress_bar_size+2, align='center'), dict(name='Elapsed (s)', width=7, align='center')] # Initialize environment initialize = tf.global_variables_initializer() # Session config config = tf.ConfigProto( device_count={'GPU': 1 if self.use_gpu == True else 0}) # Run model done = False epoch = 0 iteration = 0 sustained_loss = 0.0 loss_values = [] sustained_loss_values = [] last_validation_accuracy = 0.0 validation_accuracy_values = [] max_accuracy_values = [] max_accuracy = 0.0 start_time = time.time() with tf.Session(config=config) as session: session.run(initialize) print '==========' print 'GPU ' + ('enabled' if self.use_gpu else 'disabled') print table = DynamicConsoleTable(layout) table.print_header() multiple_rows_per_epoch = row_output_interval < num_training_batches while not done: epoch += 1 if self.use_sound: self.sounds.alert() # Trains on the data, in batches for i in range(num_training_batches): iteration += 1 data_batch = training_data_batches[i] labels_batch = training_label_batches[i] _, loss_val = session.run([optimizer, loss], feed_dict={ self.x: data_batch, y: labels_batch }) sustained_loss = sustained_loss_decay_rate * sustained_loss + \ (1.0 - sustained_loss_decay_rate) * loss_val if len(loss_values) == self.loss_display_saved_iterations: loss_values.pop(0) sustained_loss_values.pop(0) if iteration == self.loss_display_starting_iteration: sustained_loss = loss_val if iteration >= self.loss_display_starting_iteration: loss_values.append(loss_val) sustained_loss_values.append(sustained_loss) validation_accuracy = last_validation_accuracy if not skip_evaluation and iteration % validation_interval == 0: validation_set_indices = np.random.choice( np.arange(len(validation_data)), size=validation_set_size, replace=False) validation_data_batch = validation_data[ validation_set_indices] validation_labels_batch = validation_labels[ validation_set_indices] validation_accuracy = session.run( accuracy, feed_dict={ self.x: validation_data_batch, y: validation_labels_batch }) last_validation_accuracy = validation_accuracy if len(validation_accuracy_values ) == self.accuracy_display_saved_iterations: validation_accuracy_values.pop(0) if iteration >= self.accuracy_display_starting_iteration: validation_accuracy_values.append( validation_accuracy) if validation_accuracy > max_accuracy: max_accuracy = validation_accuracy if self.use_sound: self.sounds.success() if len(max_accuracy_values ) == self.accuracy_display_saved_iterations: max_accuracy_values.pop(0) if iteration >= self.accuracy_display_starting_iteration: max_accuracy_values.append(max_accuracy) progress = int(math.ceil(self.progress_bar_size * \ float((iteration - 1) % num_training_batches) /\ (num_training_batches - 1))) elapsed = time.time() - start_time progress_string = '[' + '#' * progress + ' ' * \ (self.progress_bar_size - progress) + ']' if iteration % num_training_batches == 0 or \ iteration % row_output_interval == 0: progress_string = time.strftime( "%I:%M:%S %p", time.localtime()) if not self.skip_evaluation: table.update( epoch, (iteration - 1) % num_training_batches + 1, sustained_loss, validation_accuracy * 100, max_accuracy * 100, progress_string, elapsed) else: table.update( epoch, (iteration - 1) % num_training_batches + 1, sustained_loss, progress_string, elapsed) if iteration % num_training_batches == 0 or \ iteration % row_output_interval == 0: heavy = False if multiple_rows_per_epoch and iteration % num_training_batches == 0: heavy = True table.finalize(heavy=heavy) # Termination condition if sustained_loss < loss_threshold: done = True break self.update_output(iteration, loss_values, sustained_loss_values, validation_accuracy_values, max_accuracy_values) # Termination condition if epoch >= max_epochs or sustained_loss < loss_threshold: done = True self.update_output(iteration, loss_values, sustained_loss_values, validation_accuracy_values, max_accuracy_values, override=True) plt.pause(0)
date.strftime("%m/%d/%Y") if last_date != date else '', # months[date.month-1] + ' ' + str(date.day) + ' ' + str(date.year) if last_date != date else '', meta[i-1][1][0], ('+' if increased else '') + format_balance(total_balance_diffs[i]), ' --- ' if previous_balance == 0 else \ ('+' if increased else '') + format_percent((balance - previous_balance) * 100 / previous_balance), format_balance(balance), format_balance(checking_balance), format_balance(savings_balance), ] + ([ format_balance(abs(credit_balance)), ] if include_credit else []) + [ format_percent(balance * 100 / max_balance), format_percent(balance * 100 / current_balance), ] table.update(*args) last_date = date table.finalize(heavy=False, divider=True) #table.print_header() print print 'Most recent '+str(trend_days)+'-day savings trend:', '$' + format_balance(trends[-1]/12) + ' per month,', \ '$' + format_balance(trends[-1]) + ' per year' (x_data, total_balances_data, checking_balances_data, savings_balances_data, credit_balances_data, bank_balances_data, total_balance_diffs, bank_balance_diffs, credit_balance_diffs, bank_rises, bank_small_falls, bank_large_falls) = collapsed_calculated if plot_collapse_days else calculated ##### Plot output