Esempio n. 1
0
    def test_pickle_save_load_object(self):
        """Tests the methods to save and load objects using pickle."""
        files = ["file{}".format(i) for i in range(1, 6)]
        dummy1 = DummyPickleTester("dummy1", 1, files)
        dummy2 = DummyPickleTester("dummy2", 2, files, child=dummy1)

        h.save_object(dummy1, "dummy1")
        h.save_objects([dummy1, dummy2], "dummies")

        # Save
        file_dummy1 = "dummy1.pkl"
        file_dummies = "dummies.pkl"
        self.assertTrue(os.path.exists(file_dummy1))
        self.assertTrue(os.path.exists(file_dummies))

        # Load
        loaded_dummy = list(h.load_objects(file_dummy1))[0]
        loaded_dummies = list(h.load_objects(file_dummies))
        self.assertEqual(loaded_dummy, dummy1)
        self.assertEqual(loaded_dummies[0], dummy1)
        self.assertEqual(loaded_dummies[1], dummy2)

        # Cleanup
        os.remove(file_dummy1)
        os.remove(file_dummies)
def train_on_copy_task(sess,
                       model,
                       data,
                       batch_size=100,
                       max_batches=None,
                       batches_in_epoch=1000,
                       max_time_diff=float("inf"),
                       verbose=False):
    """
    Train the `Seq2SeqModel` on a copy task

    @param sess is a tensorflow session
    @param model is the seq2seq model
    @param data is the data (in batch-major form and not padded or a list of files (depending on `in_memory`))
    """
    batches = helpers.get_batches(data, batch_size=batch_size)

    loss_track = []

    batches_in_data = len(data) // batch_size
    if max_batches is None or batches_in_data < max_batches:
        max_batches = batches_in_data - 1

    try:
        for batch in range(max_batches):
            print("Batch {}/{}".format(batch, max_batches))
            fd, _, length = model.next_batch(batches, False, max_time_diff)
            _, l = sess.run([model.train_op, model.loss], fd)
            loss_track.append(l / length)

            if batch == 0 or batch % batches_in_epoch == 0:
                model.save(sess, 'seq2seq_model')
                helpers.save_object(loss_track, 'loss_track.pkl')

                if verbose:
                    stdout.write('  minibatch loss: {}\n'.format(
                        sess.run(model.loss, fd)))
                    predict_ = sess.run(model.decoder_outputs, fd)
                    for i, (inp, pred) in enumerate(
                            zip(fd[model.encoder_inputs].swapaxes(0, 1),
                                predict_.swapaxes(0, 1))):
                        stdout.write('  sample {}:\n'.format(i + 1))
                        stdout.write('    input     > {}\n'.format(inp))
                        stdout.write('    predicted > {}\n'.format(pred))
                        if i >= 0:
                            break
                    stdout.write('\n')

    except KeyboardInterrupt:
        stdout.write('training interrupted')
        model.save(sess, 'seq2seq_model')
        exit(0)

    model.save(sess, 'seq2seq_model')
    helpers.save_object(loss_track, 'loss_track.pkl')

    return loss_track
Esempio n. 3
0
 def save(self, path=None):
     path = path or f"./scrape_results/scraper_{datetime.utcnow().strftime('%Y-%m-%d_%s')}.pkl"
     helpers.save_object(path, self)
Esempio n. 4
0
    def test_save_load_obj(self):
        obj = [1, 2, 3]
        helpers.save_object(self.save_path, obj)

        t = helpers.load_object(self.save_path)
        self.assertEqual(obj, t)
Esempio n. 5
0
            # print('Saving the files in a dictionary: ', time.time()-start_time)
        current_acc = np.mean(valid_accuracy_list)
        if current_acc > best_valid_acc:
            best_W = W
            best_valid_acc = current_acc
            best_train_acc = np.median(train_accuracy_list)
            best_pivot = pivot
        print("Current Pivot: ", pivot)
        print("Current Current acc: ", best_valid_acc)
        full_train_accuracy_list = [pivot] + train_accuracy_list
        full_valid_accuracy_list = [pivot] + valid_accuracy_list
        pivot += exploration_step
    pivot = best_pivot
    begin_val = max(begin_val, pivot - exploration_step)
    end_val = min(end_val, pivot + exploration_step)
    exploration_step = exploration_step / 10
print("BEST PIVOT:", best_pivot)
print("BEST PIVOT:", best_valid_acc)

results['best_lambda'] = best_pivot
results['lambda_train_acc'] = best_train_acc
results['lambda_valid_acc'] = best_valid_acc
results['weight'] = best_W
results['full_train_accuracy_list'] = full_train_accuracy_list
results['full_valid_accuracy_list'] = full_valid_accuracy_list
# test_t = torch.zeros(feature_size,train_size)

start_time = time.time()
save_object(results, obj_name)
print('Saving the pickle: ', time.time() - start_time)