Example #1
0
def train_net(filename, overwrite, should_graph):

    # If overwrite is false and a saved model already exists, then simply
    # return
    if not overwrite and model_exists():
        print("Model already exists (did you mean to include --overwrite?)")
        return

    # Otherwise, we train and save the model

    # Load the data
    data, labels = load_data(filename)

    # Create batches
    assert validation_set_size <= len(
        data), 'validation_set_size must be smaller than len(data)'

    training_data = data[:len(data) - validation_set_size]
    training_labels = labels[:len(labels) - validation_set_size]
    validation_data = data[len(data) - validation_set_size:]
    validation_labels = labels[len(labels) - validation_set_size:]
    #print('Training data: ' + str(len(training_data)))
    #print('Validation data: ' + str(len(validation_data)))

    assert float(
        len(training_data)
    ) / batch_size % 1 == 0, 'batch_size must evenly divide len(training_data)'
    #assert float(validation_set_size) / batch_size % 1 == 0, 'batch_size must evenly divide validation_set_size'
    num_training_batches = len(training_data) / batch_size
    #num_validation_batches = validation_set_size / batch_size
    num_validation_batches = 1

    training_data_batches = []
    training_label_batches = []
    validation_data_batches = []
    validation_label_batches = []
    for i in range(num_training_batches):
        training_data_batches.append(training_data[i * batch_size:(i + 1) *
                                                   batch_size])
        training_label_batches.append(training_labels[i * batch_size:(i + 1) *
                                                      batch_size])
    for i in range(num_validation_batches):
        validation_data_batches.append(validation_data[i * batch_size:(i + 1) *
                                                       batch_size])
        validation_label_batches.append(
            validation_labels[i * batch_size:(i + 1) * batch_size])

    # Build model and get variable handles
    train_op, x, y, out, loss, accuracy, weights, biases = model(
        learning_rate, dropout)

    # Initialize environment
    initialize = tf.global_variables_initializer()

    # Session config
    config = tf.ConfigProto(device_count={'GPU': 1 if use_GPU == True else 0})

    # Run model
    done = False
    epoch = 0
    iteration = 0
    sustained_loss = 0.0
    loss_values = []
    validation_accuracy_values = []
    max_accuracy_values = []

    max_accuracy = 0.0
    max_accuracy_weights = None
    max_accuracy_biases = None

    with tf.Session(config=config) as session:
        session.run(initialize)

        print '=========='
        print 'GPU ' + ('enabled' if use_GPU else 'disabled')
        print

        # Show weight initialization
        if show_weights:
            weights_val = session.run(weights)
            display_weights(weights_val)

        layout = [
            dict(name='Ep.', width=4, align='center'),
            dict(name='Batch',
                 width=2 * len(str(num_training_batches)) + 1,
                 suffix='/' + str(num_training_batches)),
            dict(name='Loss', width=8),
            dict(name='Val Acc', width=6, suffix='%'),
            dict(name='Max Acc', width=6, suffix='%'),
            dict(name='Time', width=progress_bar_size + 2, align='center'),
        ]
        table = DynamicConsoleTable(layout)
        table.print_header()

        while not done:
            epoch += 1

            # Trains on the data, in batches
            for i in range(num_training_batches):
                iteration += 1

                data_batch = training_data_batches[i]
                label_batch = training_label_batches[i]
                _, loss_val = session.run([train_op, loss],
                                          feed_dict={
                                              x: data_batch,
                                              y: label_batch
                                          })
                sustained_loss = decay_rate * sustained_loss + (
                    1.0 - decay_rate) * loss_val

                if len(loss_values) == loss_saved_iterations:
                    loss_values.pop(0)
                if iteration >= loss_starting_iteration:
                    loss_values.append(loss_val)

                data_batch = validation_data_batches[iteration %
                                                     num_validation_batches]
                label_batch = validation_label_batches[iteration %
                                                       num_validation_batches]

                validation_accuracy = 0.0
                for j in range(num_validation_batches):
                    data_batch = validation_data_batches[j]
                    label_batch = validation_label_batches[j]
                    accuracy_val, out_val = session.run([accuracy, out],
                                                        feed_dict={
                                                            x: data_batch,
                                                            y: label_batch
                                                        })

                    if epoch >= max_epochs:
                        for j in range(len(label_batch)):
                            print label_batch[j], np.argmax(out_val[j])

                    validation_accuracy += accuracy_val
                validation_accuracy /= num_validation_batches

                if len(validation_accuracy_values
                       ) == accuracy_saved_iterations:
                    validation_accuracy_values.pop(0)
                if iteration >= accuracy_starting_iteration:
                    validation_accuracy_values.append(validation_accuracy)

                if validation_accuracy > max_accuracy:
                    weights_val, biases_val = session.run([weights, biases])
                    max_accuracy = validation_accuracy
                    max_accuracy_weights = weights_val
                    max_accuracy_biases = biases_val
                    # Save weights
                    tf.add_to_collection("vars", out)
                    tf.add_to_collection("vars", x)
                    saver = tf.train.Saver()
                    saver.save(session, 'model')

                if len(max_accuracy_values) == accuracy_saved_iterations:
                    max_accuracy_values.pop(0)
                if iteration >= accuracy_starting_iteration:
                    max_accuracy_values.append(max_accuracy)

                progress = int(
                    math.ceil(progress_bar_size * float(
                        (iteration - 1) % num_training_batches) /
                              max(1, num_training_batches - 1)))
                progress_string = '[' + '#' * progress + ' ' * (
                    progress_bar_size - progress) + ']'
                if iteration % num_training_batches == 0:
                    progress_string = time.strftime("%I:%M:%S %p",
                                                    time.localtime())
                table.update(epoch, (iteration - 1) % num_training_batches + 1,
                             sustained_loss, validation_accuracy * 100,
                             max_accuracy * 100, progress_string)

                # Termination condition
                if sustained_loss < loss_threshold:
                    done = True
                    break

                update_output(iteration, weights_val, loss_values,
                              validation_accuracy_values, max_accuracy_values)

            table.finalize()

            # Termination condition
            if epoch >= max_epochs or sustained_loss < loss_threshold:
                done = True
                update_output(iteration,
                              weights_val,
                              loss_values,
                              validation_accuracy_values,
                              max_accuracy_values,
                              override=True)
                n = 100
                plt.figure('FC weights')
                #plt.plot(weights_val['out'][:,1])
                plt.plot(
                    np.sum([
                        max_accuracy_weights['out'][k * n:(k + 1) * n, 1]
                        for k in range(num_kernels)
                    ],
                           axis=0))
                #plt.plot(np.sum([weights_val['out'][0:1*n,1], weights_val['out'][1*n:2*n,1], weights_val['out'][2*n:3*n,1], weights_val['out'][3*n:4*n,1], weights_val['out'][4*n:5*n,1], weights_val['out'][5*n:6*n,1]], axis=0))
                plt.show()
                plt.pause(0)
Example #2
0
    def train(self,
              training_data,
              training_labels,
              validation_data=None,
              validation_labels=None,
              skip_evaluation=False,
              loss_fn=None,
              optimizer_fn=None,
              accuracy_fn=None,
              max_epochs=float('inf'),
              batch_size=1,
              validation_set_size=None,
              validation_interval=1,
              loss_threshold=0.0,
              sustained_loss_decay_rate=0.9,
              row_output_interval=None):

        assert loss_fn, 'Must specify a loss_fn (a function that takes (out, y) as input)'
        assert optimizer_fn, 'Must specify a optimizer_fn (a function that takes loss as input)'

        if validation_data is not None:
            validation_set_size = (validation_set_size and \
                                   min(validation_set_size, len(validation_data))) or \
                                   len(validation_data)

        assert len(training_data) == len(training_labels), \
            'Number of training data and training labels do not match'
        if not skip_evaluation and (validation_data is not None
                                    or validation_labels is not None):
            assert validation_data is not None and validation_labels is not None and \
                len(validation_data) == len(validation_labels), \
                'Number of validation data and validation labels do not match'
        else:
            skip_evaluation = True
        if not skip_evaluation:
            assert accuracy_fn, \
                'Must specify an accuracy_fn (a function that takes (out, y) as input),' + \
                ' in order to evaluate the validation set'

        if len(training_data) % batch_size != 0:
            print 'WARNING: batch_size does not evenly divide len(training_data).' + \
                'Some training data will be unused'

        validation_data = np.array(validation_data)
        validation_labels = np.array(validation_labels)

        num_training_batches = len(training_data) / batch_size
        training_data_indices = np.random.permutation(len(training_data))
        training_data_permuted = np.array(training_data)[training_data_indices]
        training_labels_permuted = np.array(
            training_labels)[training_data_indices]
        training_data_batches = []
        training_label_batches = []
        for i in range(num_training_batches):
            training_data_batches.append(
                training_data_permuted[i * batch_size:(i + 1) * batch_size])
            training_label_batches.append(
                training_labels_permuted[i * batch_size:(i + 1) * batch_size])

        row_output_interval = row_output_interval or num_training_batches

        y = tf.placeholder(tf.int64, [None])
        loss = loss_fn(self.out, y)
        optimizer = optimizer_fn(loss)
        accuracy = accuracy_fn(self.out, y) if accuracy_fn else None

        self.skip_evaluation = skip_evaluation

        layout = [
            dict(name='Ep.', width=3, align='center'),
            dict(name='Batch', width=2*len(str(num_training_batches))+1,
                 suffix='/'+str(num_training_batches), align='center'),
            dict(name='Loss', width=8, align='center')] + \
            ([dict(name='Val Acc', width=7, suffix='%', align='center'),
              dict(name='Max Acc', width=7, suffix='%', align='center')] \
                if not self.skip_evaluation else []) + \
            [dict(name='Progress/Timestamp', width=self.progress_bar_size+2, align='center'),
             dict(name='Elapsed (s)', width=7, align='center')]

        # Initialize environment
        initialize = tf.global_variables_initializer()

        # Session config
        config = tf.ConfigProto(
            device_count={'GPU': 1 if self.use_gpu == True else 0})

        # Run model
        done = False
        epoch = 0
        iteration = 0
        sustained_loss = 0.0
        loss_values = []
        sustained_loss_values = []
        last_validation_accuracy = 0.0
        validation_accuracy_values = []
        max_accuracy_values = []
        max_accuracy = 0.0
        start_time = time.time()

        with tf.Session(config=config) as session:
            session.run(initialize)
            print '=========='
            print 'GPU ' + ('enabled' if self.use_gpu else 'disabled')
            print
            table = DynamicConsoleTable(layout)
            table.print_header()
            multiple_rows_per_epoch = row_output_interval < num_training_batches

            while not done:
                epoch += 1
                if self.use_sound:
                    self.sounds.alert()

                # Trains on the data, in batches
                for i in range(num_training_batches):
                    iteration += 1
                    data_batch = training_data_batches[i]
                    labels_batch = training_label_batches[i]

                    _, loss_val = session.run([optimizer, loss],
                                              feed_dict={
                                                  self.x: data_batch,
                                                  y: labels_batch
                                              })
                    sustained_loss = sustained_loss_decay_rate * sustained_loss + \
                        (1.0 - sustained_loss_decay_rate) * loss_val

                    if len(loss_values) == self.loss_display_saved_iterations:
                        loss_values.pop(0)
                        sustained_loss_values.pop(0)
                    if iteration == self.loss_display_starting_iteration:
                        sustained_loss = loss_val
                    if iteration >= self.loss_display_starting_iteration:
                        loss_values.append(loss_val)
                        sustained_loss_values.append(sustained_loss)

                    validation_accuracy = last_validation_accuracy
                    if not skip_evaluation and iteration % validation_interval == 0:
                        validation_set_indices = np.random.choice(
                            np.arange(len(validation_data)),
                            size=validation_set_size,
                            replace=False)
                        validation_data_batch = validation_data[
                            validation_set_indices]
                        validation_labels_batch = validation_labels[
                            validation_set_indices]
                        validation_accuracy = session.run(
                            accuracy,
                            feed_dict={
                                self.x: validation_data_batch,
                                y: validation_labels_batch
                            })
                        last_validation_accuracy = validation_accuracy

                        if len(validation_accuracy_values
                               ) == self.accuracy_display_saved_iterations:
                            validation_accuracy_values.pop(0)
                        if iteration >= self.accuracy_display_starting_iteration:
                            validation_accuracy_values.append(
                                validation_accuracy)

                        if validation_accuracy > max_accuracy:
                            max_accuracy = validation_accuracy
                            if self.use_sound:
                                self.sounds.success()

                        if len(max_accuracy_values
                               ) == self.accuracy_display_saved_iterations:
                            max_accuracy_values.pop(0)
                        if iteration >= self.accuracy_display_starting_iteration:
                            max_accuracy_values.append(max_accuracy)

                    progress = int(math.ceil(self.progress_bar_size * \
                                             float((iteration - 1) % num_training_batches) /\
                                             (num_training_batches - 1)))
                    elapsed = time.time() - start_time
                    progress_string = '[' + '#' * progress + ' ' * \
                        (self.progress_bar_size - progress) + ']'
                    if iteration % num_training_batches == 0 or \
                        iteration % row_output_interval == 0:
                        progress_string = time.strftime(
                            "%I:%M:%S %p", time.localtime())

                    if not self.skip_evaluation:
                        table.update(
                            epoch, (iteration - 1) % num_training_batches + 1,
                            sustained_loss, validation_accuracy * 100,
                            max_accuracy * 100, progress_string, elapsed)
                    else:
                        table.update(
                            epoch, (iteration - 1) % num_training_batches + 1,
                            sustained_loss, progress_string, elapsed)

                    if iteration % num_training_batches == 0 or \
                        iteration % row_output_interval == 0:
                        heavy = False
                        if multiple_rows_per_epoch and iteration % num_training_batches == 0:
                            heavy = True
                        table.finalize(heavy=heavy)

                    # Termination condition
                    if sustained_loss < loss_threshold:
                        done = True
                        break

                    self.update_output(iteration, loss_values,
                                       sustained_loss_values,
                                       validation_accuracy_values,
                                       max_accuracy_values)

                # Termination condition
                if epoch >= max_epochs or sustained_loss < loss_threshold:
                    done = True
                    self.update_output(iteration,
                                       loss_values,
                                       sustained_loss_values,
                                       validation_accuracy_values,
                                       max_accuracy_values,
                                       override=True)
                    plt.pause(0)
Example #3
0
        date.strftime("%m/%d/%Y") if last_date != date else '',
        #        months[date.month-1] + ' ' + str(date.day) + ' ' + str(date.year) if last_date != date else '',
        meta[i-1][1][0],
        ('+' if increased else '') + format_balance(total_balance_diffs[i]),
        ' --- ' if previous_balance == 0 else \
            ('+' if increased else '') + format_percent((balance - previous_balance) * 100 / previous_balance),
        format_balance(balance),
        format_balance(checking_balance),
        format_balance(savings_balance),
    ] + ([
        format_balance(abs(credit_balance)),
    ] if include_credit else []) + [
        format_percent(balance * 100 / max_balance),
        format_percent(balance * 100 / current_balance),
    ]
    table.update(*args)
    last_date = date
table.finalize(heavy=False, divider=True)
#table.print_header()

print
print 'Most recent '+str(trend_days)+'-day savings trend:', '$' + format_balance(trends[-1]/12) + ' per month,', \
                                                            '$' + format_balance(trends[-1]) + ' per year'

(x_data, total_balances_data, checking_balances_data, savings_balances_data,
 credit_balances_data, bank_balances_data, total_balance_diffs,
 bank_balance_diffs, credit_balance_diffs, bank_rises, bank_small_falls,
 bank_large_falls) = collapsed_calculated if plot_collapse_days else calculated

##### Plot output