validation_accuracy, finished=True) if show_confusion_matrix: predicted = np.argmax(output, axis=1) actual = np.argmax(val_labels, axis=1) output = [[] for _ in range(num_classes)] count_matrix = [[0] * num_classes for _ in range(num_classes)] for i in range(len(actual)): output[actual[i]].append(predicted[i]) count_matrix[actual[i]][predicted[i]] += 1 print print max_length = max( 2, len(str(max(map(len, validation_sequence_groups))))) print ' ' * (max_length - 2) + 'Predicted ', ''.join( map(lambda x: str(x) + ' ' * (max_length - len(str(x)) + 1), range(num_classes))) print 'Actual\t', '-' * ((num_classes + 1) * (max_length + 1) + 1) for i in range(num_classes): print ' '*(5-len(str(i))), i, '\t|' + ' ' * (max_length - 1), \ ''.join(map(lambda x: (str(x) if x else '-') + ' ' * (max_length - len(str(x)) + 1), \ count_matrix[i])) + '| ', np.mean(np.array(output[i]) == i) reprint_header = ( epoch + 1) % 10 == 0 and epoch < num_epochs - 1 or show_confusion_matrix table.finalize(divider=not reprint_header) if reprint_header: table.print_header()
def train_net(filename, overwrite, should_graph): # If overwrite is false and a saved model already exists, then simply # return if not overwrite and model_exists(): print("Model already exists (did you mean to include --overwrite?)") return # Otherwise, we train and save the model # Load the data data, labels = load_data(filename) # Create batches assert validation_set_size <= len( data), 'validation_set_size must be smaller than len(data)' training_data = data[:len(data) - validation_set_size] training_labels = labels[:len(labels) - validation_set_size] validation_data = data[len(data) - validation_set_size:] validation_labels = labels[len(labels) - validation_set_size:] #print('Training data: ' + str(len(training_data))) #print('Validation data: ' + str(len(validation_data))) assert float( len(training_data) ) / batch_size % 1 == 0, 'batch_size must evenly divide len(training_data)' #assert float(validation_set_size) / batch_size % 1 == 0, 'batch_size must evenly divide validation_set_size' num_training_batches = len(training_data) / batch_size #num_validation_batches = validation_set_size / batch_size num_validation_batches = 1 training_data_batches = [] training_label_batches = [] validation_data_batches = [] validation_label_batches = [] for i in range(num_training_batches): training_data_batches.append(training_data[i * batch_size:(i + 1) * batch_size]) training_label_batches.append(training_labels[i * batch_size:(i + 1) * batch_size]) for i in range(num_validation_batches): validation_data_batches.append(validation_data[i * batch_size:(i + 1) * batch_size]) validation_label_batches.append( validation_labels[i * batch_size:(i + 1) * batch_size]) # Build model and get variable handles train_op, x, y, out, loss, accuracy, weights, biases = model( learning_rate, dropout) # Initialize environment initialize = tf.global_variables_initializer() # Session config config = tf.ConfigProto(device_count={'GPU': 1 if use_GPU == True else 0}) # Run model done = False epoch = 0 iteration = 0 sustained_loss = 0.0 loss_values = [] validation_accuracy_values = [] max_accuracy_values = [] max_accuracy = 0.0 max_accuracy_weights = None max_accuracy_biases = None with tf.Session(config=config) as session: session.run(initialize) print '==========' print 'GPU ' + ('enabled' if use_GPU else 'disabled') print # Show weight initialization if show_weights: weights_val = session.run(weights) display_weights(weights_val) layout = [ dict(name='Ep.', width=4, align='center'), dict(name='Batch', width=2 * len(str(num_training_batches)) + 1, suffix='/' + str(num_training_batches)), dict(name='Loss', width=8), dict(name='Val Acc', width=6, suffix='%'), dict(name='Max Acc', width=6, suffix='%'), dict(name='Time', width=progress_bar_size + 2, align='center'), ] table = DynamicConsoleTable(layout) table.print_header() while not done: epoch += 1 # Trains on the data, in batches for i in range(num_training_batches): iteration += 1 data_batch = training_data_batches[i] label_batch = training_label_batches[i] _, loss_val = session.run([train_op, loss], feed_dict={ x: data_batch, y: label_batch }) sustained_loss = decay_rate * sustained_loss + ( 1.0 - decay_rate) * loss_val if len(loss_values) == loss_saved_iterations: loss_values.pop(0) if iteration >= loss_starting_iteration: loss_values.append(loss_val) data_batch = validation_data_batches[iteration % num_validation_batches] label_batch = validation_label_batches[iteration % num_validation_batches] validation_accuracy = 0.0 for j in range(num_validation_batches): data_batch = validation_data_batches[j] label_batch = validation_label_batches[j] accuracy_val, out_val = session.run([accuracy, out], feed_dict={ x: data_batch, y: label_batch }) if epoch >= max_epochs: for j in range(len(label_batch)): print label_batch[j], np.argmax(out_val[j]) validation_accuracy += accuracy_val validation_accuracy /= num_validation_batches if len(validation_accuracy_values ) == accuracy_saved_iterations: validation_accuracy_values.pop(0) if iteration >= accuracy_starting_iteration: validation_accuracy_values.append(validation_accuracy) if validation_accuracy > max_accuracy: weights_val, biases_val = session.run([weights, biases]) max_accuracy = validation_accuracy max_accuracy_weights = weights_val max_accuracy_biases = biases_val # Save weights tf.add_to_collection("vars", out) tf.add_to_collection("vars", x) saver = tf.train.Saver() saver.save(session, 'model') if len(max_accuracy_values) == accuracy_saved_iterations: max_accuracy_values.pop(0) if iteration >= accuracy_starting_iteration: max_accuracy_values.append(max_accuracy) progress = int( math.ceil(progress_bar_size * float( (iteration - 1) % num_training_batches) / max(1, num_training_batches - 1))) progress_string = '[' + '#' * progress + ' ' * ( progress_bar_size - progress) + ']' if iteration % num_training_batches == 0: progress_string = time.strftime("%I:%M:%S %p", time.localtime()) table.update(epoch, (iteration - 1) % num_training_batches + 1, sustained_loss, validation_accuracy * 100, max_accuracy * 100, progress_string) # Termination condition if sustained_loss < loss_threshold: done = True break update_output(iteration, weights_val, loss_values, validation_accuracy_values, max_accuracy_values) table.finalize() # Termination condition if epoch >= max_epochs or sustained_loss < loss_threshold: done = True update_output(iteration, weights_val, loss_values, validation_accuracy_values, max_accuracy_values, override=True) n = 100 plt.figure('FC weights') #plt.plot(weights_val['out'][:,1]) plt.plot( np.sum([ max_accuracy_weights['out'][k * n:(k + 1) * n, 1] for k in range(num_kernels) ], axis=0)) #plt.plot(np.sum([weights_val['out'][0:1*n,1], weights_val['out'][1*n:2*n,1], weights_val['out'][2*n:3*n,1], weights_val['out'][3*n:4*n,1], weights_val['out'][4*n:5*n,1], weights_val['out'][5*n:6*n,1]], axis=0)) plt.show() plt.pause(0)
def train(self, training_data, training_labels, validation_data=None, validation_labels=None, skip_evaluation=False, loss_fn=None, optimizer_fn=None, accuracy_fn=None, max_epochs=float('inf'), batch_size=1, validation_set_size=None, validation_interval=1, loss_threshold=0.0, sustained_loss_decay_rate=0.9, row_output_interval=None): assert loss_fn, 'Must specify a loss_fn (a function that takes (out, y) as input)' assert optimizer_fn, 'Must specify a optimizer_fn (a function that takes loss as input)' if validation_data is not None: validation_set_size = (validation_set_size and \ min(validation_set_size, len(validation_data))) or \ len(validation_data) assert len(training_data) == len(training_labels), \ 'Number of training data and training labels do not match' if not skip_evaluation and (validation_data is not None or validation_labels is not None): assert validation_data is not None and validation_labels is not None and \ len(validation_data) == len(validation_labels), \ 'Number of validation data and validation labels do not match' else: skip_evaluation = True if not skip_evaluation: assert accuracy_fn, \ 'Must specify an accuracy_fn (a function that takes (out, y) as input),' + \ ' in order to evaluate the validation set' if len(training_data) % batch_size != 0: print 'WARNING: batch_size does not evenly divide len(training_data).' + \ 'Some training data will be unused' validation_data = np.array(validation_data) validation_labels = np.array(validation_labels) num_training_batches = len(training_data) / batch_size training_data_indices = np.random.permutation(len(training_data)) training_data_permuted = np.array(training_data)[training_data_indices] training_labels_permuted = np.array( training_labels)[training_data_indices] training_data_batches = [] training_label_batches = [] for i in range(num_training_batches): training_data_batches.append( training_data_permuted[i * batch_size:(i + 1) * batch_size]) training_label_batches.append( training_labels_permuted[i * batch_size:(i + 1) * batch_size]) row_output_interval = row_output_interval or num_training_batches y = tf.placeholder(tf.int64, [None]) loss = loss_fn(self.out, y) optimizer = optimizer_fn(loss) accuracy = accuracy_fn(self.out, y) if accuracy_fn else None self.skip_evaluation = skip_evaluation layout = [ dict(name='Ep.', width=3, align='center'), dict(name='Batch', width=2*len(str(num_training_batches))+1, suffix='/'+str(num_training_batches), align='center'), dict(name='Loss', width=8, align='center')] + \ ([dict(name='Val Acc', width=7, suffix='%', align='center'), dict(name='Max Acc', width=7, suffix='%', align='center')] \ if not self.skip_evaluation else []) + \ [dict(name='Progress/Timestamp', width=self.progress_bar_size+2, align='center'), dict(name='Elapsed (s)', width=7, align='center')] # Initialize environment initialize = tf.global_variables_initializer() # Session config config = tf.ConfigProto( device_count={'GPU': 1 if self.use_gpu == True else 0}) # Run model done = False epoch = 0 iteration = 0 sustained_loss = 0.0 loss_values = [] sustained_loss_values = [] last_validation_accuracy = 0.0 validation_accuracy_values = [] max_accuracy_values = [] max_accuracy = 0.0 start_time = time.time() with tf.Session(config=config) as session: session.run(initialize) print '==========' print 'GPU ' + ('enabled' if self.use_gpu else 'disabled') print table = DynamicConsoleTable(layout) table.print_header() multiple_rows_per_epoch = row_output_interval < num_training_batches while not done: epoch += 1 if self.use_sound: self.sounds.alert() # Trains on the data, in batches for i in range(num_training_batches): iteration += 1 data_batch = training_data_batches[i] labels_batch = training_label_batches[i] _, loss_val = session.run([optimizer, loss], feed_dict={ self.x: data_batch, y: labels_batch }) sustained_loss = sustained_loss_decay_rate * sustained_loss + \ (1.0 - sustained_loss_decay_rate) * loss_val if len(loss_values) == self.loss_display_saved_iterations: loss_values.pop(0) sustained_loss_values.pop(0) if iteration == self.loss_display_starting_iteration: sustained_loss = loss_val if iteration >= self.loss_display_starting_iteration: loss_values.append(loss_val) sustained_loss_values.append(sustained_loss) validation_accuracy = last_validation_accuracy if not skip_evaluation and iteration % validation_interval == 0: validation_set_indices = np.random.choice( np.arange(len(validation_data)), size=validation_set_size, replace=False) validation_data_batch = validation_data[ validation_set_indices] validation_labels_batch = validation_labels[ validation_set_indices] validation_accuracy = session.run( accuracy, feed_dict={ self.x: validation_data_batch, y: validation_labels_batch }) last_validation_accuracy = validation_accuracy if len(validation_accuracy_values ) == self.accuracy_display_saved_iterations: validation_accuracy_values.pop(0) if iteration >= self.accuracy_display_starting_iteration: validation_accuracy_values.append( validation_accuracy) if validation_accuracy > max_accuracy: max_accuracy = validation_accuracy if self.use_sound: self.sounds.success() if len(max_accuracy_values ) == self.accuracy_display_saved_iterations: max_accuracy_values.pop(0) if iteration >= self.accuracy_display_starting_iteration: max_accuracy_values.append(max_accuracy) progress = int(math.ceil(self.progress_bar_size * \ float((iteration - 1) % num_training_batches) /\ (num_training_batches - 1))) elapsed = time.time() - start_time progress_string = '[' + '#' * progress + ' ' * \ (self.progress_bar_size - progress) + ']' if iteration % num_training_batches == 0 or \ iteration % row_output_interval == 0: progress_string = time.strftime( "%I:%M:%S %p", time.localtime()) if not self.skip_evaluation: table.update( epoch, (iteration - 1) % num_training_batches + 1, sustained_loss, validation_accuracy * 100, max_accuracy * 100, progress_string, elapsed) else: table.update( epoch, (iteration - 1) % num_training_batches + 1, sustained_loss, progress_string, elapsed) if iteration % num_training_batches == 0 or \ iteration % row_output_interval == 0: heavy = False if multiple_rows_per_epoch and iteration % num_training_batches == 0: heavy = True table.finalize(heavy=heavy) # Termination condition if sustained_loss < loss_threshold: done = True break self.update_output(iteration, loss_values, sustained_loss_values, validation_accuracy_values, max_accuracy_values) # Termination condition if epoch >= max_epochs or sustained_loss < loss_threshold: done = True self.update_output(iteration, loss_values, sustained_loss_values, validation_accuracy_values, max_accuracy_values, override=True) plt.pause(0)
last_date = None for i in range(len(total_balances)): (date, balance) = total_balances[i] current_month = date.year * 12 + date.month current_year = date.year new_month = current_month != last_month new_year = current_year != last_year last_month = current_month last_year = current_year # if i > 0: # table.finalize(heavy=new_month and output_table_dividers, divider=last_date != date) # if new_year and output_table_dividers: # table.print_divider(heavy=True) if i > 0: table.finalize(divider=last_date != date and not (new_month and output_table_dividers)) if new_month and output_table_dividers: table.print_message(months_full[date.month - 1] + ' ' + str(date.year), heavy=True) table.print_header() previous_balance = total_balances[i - 1][1] if i > 0 else 0 checking_balance = checking_balances[i][1] savings_balance = savings_balances[i][1] credit_balance = credit_balances[i][1] increased = total_balance_diffs[i] >= 0 args = [ date.strftime("%m/%d/%Y") if last_date != date else '', # months[date.month-1] + ' ' + str(date.day) + ' ' + str(date.year) if last_date != date else '', meta[i-1][1][0], ('+' if increased else '') + format_balance(total_balance_diffs[i]),