Example #1
0
def train(hyper):
    apollo.set_random_seed(hyper['random_seed'])
    if hyper['gpu'] is None:
        apollo.set_mode_cpu()
        logging.info('Using cpu device (pass --gpu X to train on the gpu)')
    else:
        apollo.set_mode_gpu()
        apollo.set_device(hyper['gpu'])
        logging.info('Using gpu device %d' % hyper['gpu'])
    apollo.set_logging_verbosity(hyper['loglevel'])

    net = apollo.Net()
    forward(net, hyper)
    network_path = '%s/network.jpg' % hyper['schematic_prefix']
    net.draw_to_file(network_path)
    logging.info('Drawing network to %s' % network_path)
    net.reset_forward()
    if 'weights' in hyper:
        logging.info('Loading weights from %s' % hyper['weights'])
        net.load(hyper['weights'])

    train_loss_hist = []
    for i in xrange(hyper['start_iter'], hyper['max_iter']):
        train_loss_hist.append(forward(net, hyper))
        net.backward()
        lr = (hyper['base_lr'] * hyper['gamma']**(i // hyper['stepsize']))
        net.update(lr=lr,
                   momentum=hyper['momentum'],
                   clip_gradients=hyper['clip_gradients'])
        if i % hyper['display_interval'] == 0:
            logging.info(
                'Iteration %d: %s' %
                (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
        if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']:
            filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
        if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']:
            sub = hyper.get('sub', 100)
            plt.plot(
                np.convolve(train_loss_hist,
                            np.ones(sub) / sub)[sub:-sub])
            filename = '%s/train_loss.jpg' % hyper['graph_prefix']
            logging.info('Saving figure to: %s' % filename)
            plt.savefig(filename)
Example #2
0
def train(hyper):
    apollo.set_random_seed(hyper['random_seed'])
    if hyper['gpu'] is None:
        apollo.set_mode_cpu()
        logging.info('Using cpu device (pass --gpu X to train on the gpu)')
    else:
        apollo.set_mode_gpu()
        apollo.set_device(hyper['gpu'])
        logging.info('Using gpu device %d' % hyper['gpu'])
    apollo.set_logging_verbosity(hyper['loglevel'])

    net = apollo.Net()
    forward(net, hyper)
    network_path = '%s/network.jpg' % hyper['schematic_prefix']
    net.draw_to_file(network_path)
    logging.info('Drawing network to %s' % network_path)
    net.reset_forward()
    if 'weights' in hyper:
        logging.info('Loading weights from %s' % hyper['weights'])
        net.load(hyper['weights'])

    train_loss_hist = []
    for i in xrange(hyper['start_iter'], hyper['max_iter']):
        train_loss_hist.append(forward(net, hyper))
        net.backward()
        lr = (hyper['base_lr'] * hyper['gamma']**(i // hyper['stepsize']))
        net.update(lr=lr, momentum=hyper['momentum'],
            clip_gradients=hyper['clip_gradients'])
        if i % hyper['display_interval'] == 0:
            logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
        if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']:
            filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
        if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']:
            sub = hyper.get('sub', 100)
            plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub])
            filename = '%s/train_loss.jpg' % hyper['graph_prefix']
            logging.info('Saving figure to: %s' % filename)
            plt.savefig(filename)
Example #3
0
    def solve(self, train_data, val_data=None):
        if self.random_seed is not None:
            apollo.set_random_seed(self.random_seed)
        train_data = make_batch_generator(train_data, self.batch_size)
        if val_data is not None:
            val_data = make_batch_generator(val_data, self.batch_size)
        train_loss = []
        val_loss = []

        for idx in xrange(self.start_iter, self.max_iter):
            if idx % self.val_interval == 0 and val_data is not None:
                for val_idx in xrange(self.val_iter):
                    self.net.forward("val", val_data.next())
                    val_loss.append(self.net.apollo_net.loss)
                    self.net.apollo_net.clear_forward()
            self.net.forward("train", train_data.next())
            train_loss.append(self.net.apollo_net.loss)
            self.net.backward()
            lr = self.base_lr * self.gamma_lr**(idx // self.gamma_stepsize)
            self.net.apollo_net.update(lr, momentum=self.momentum,
                clip_gradients=self.clip_gradients, weight_decay=self.weight_decay)
            for logger in self.loggers:
                logger.log(idx, {"train_loss": train_loss, "val_loss": val_loss, "net": self.net, "start_iter": self.start_iter})
Example #4
0
def default_train(hyper, forward, test_forward=None):
    if test_forward is None:
        test_forward = forward
    import matplotlib

    matplotlib.use("Agg", warn=False)
    import matplotlib.pyplot as plt

    d = default_hyper()
    d.update(hyper)
    hyper = d
    validate_hyper(hyper)
    apollo.set_random_seed(hyper["random_seed"])
    if hyper["gpu"] is None:
        apollo.set_mode_cpu()
        logging.info("Using cpu device (pass --gpu X to train on the gpu)")
    else:
        apollo.set_mode_gpu()
        apollo.set_device(hyper["gpu"])
        logging.info("Using gpu device %d" % hyper["gpu"])
    apollo.set_logging_verbosity(hyper["loglevel"])

    net = apollo.Net()
    forward(net, hyper)
    network_path = "%s/network.jpg" % hyper["schematic_prefix"]
    net.draw_to_file(network_path)
    logging.info("Drawing network to %s" % network_path)
    net.reset_forward()

    if hyper.get("separate_test_net", True) == True:
        test_net = apollo.Net()
        test_forward(test_net, hyper)
        test_net.reset_forward()
    else:
        test_net = net
    if "weights" in hyper:
        logging.info("Loading weights from %s" % weights)
        net.load(hyper["weights"])

    train_loss_hist = []
    for i in xrange(hyper["start_iter"], hyper["max_iter"]):
        train_loss_hist.append(forward(net, hyper))
        net.backward()
        lr = hyper["base_lr"] * hyper.get("gamma", 1.0) ** (i // hyper.get("stepsize", sys.maxint))
        net.update(
            lr=lr,
            momentum=hyper.get("momentum", 0.0),
            clip_gradients=hyper.get("clip_gradients", -1),
            weight_decay=hyper.get("weight_decay", 0.0),
        )
        if i % hyper["display_interval"] == 0:
            logging.info("Iteration %d: %s" % (i, np.mean(train_loss_hist[-hyper["display_interval"] :])))
        if i % hyper["snapshot_interval"] == 0 and i > hyper["start_iter"]:
            filename = "%s/%d.h5" % (hyper["snapshot_prefix"], i)
            logging.info("Saving net to: %s" % filename)
            net.save(filename)
        if i % hyper["graph_interval"] == 0 and i > hyper["start_iter"]:
            sub = hyper.get("sub", 100)
            plt.plot(np.convolve(train_loss_hist, np.ones(sub) / sub)[sub:-sub])
            filename = "%s/train_loss.jpg" % hyper["graph_prefix"]
            logging.info("Saving figure to: %s" % filename)
            plt.savefig(filename)
        if hyper["test_interval"] is not None and i % hyper["test_interval"] == 0:
            test_loss = []
            accuracy = []
            test_net.phase = "test"
            test_net.copy_params_from(net)
            for j in xrange(hyper["test_iter"]):
                test_loss.append(test_forward(test_net, hyper))
                test_net.reset_forward()
                if "accuracy" in test_net.tops:
                    accuracy.append(test_net.tops["accuracy"].data.flatten()[0])
            if len(accuracy) > 0:
                logging.info("Accuracy: %.5f" % np.mean(accuracy))
            logging.info("Test loss: %f" % np.mean(test_loss))
            test_net.phase = "train"
Example #5
0
def default_train(hyper, forward, test_forward=None):
    if test_forward is None:
        test_forward=forward
    import matplotlib; matplotlib.use('Agg', warn=False); import matplotlib.pyplot as plt
    d = default_hyper()
    d.update(hyper)
    hyper = d
    validate_hyper(hyper)
    apollo.set_random_seed(hyper['random_seed'])
    if hyper['gpu'] is None:
        apollo.set_mode_cpu()
        logging.info('Using cpu device (pass --gpu X to train on the gpu)')
    else:
        apollo.set_mode_gpu()
        apollo.set_device(hyper['gpu'])
        logging.info('Using gpu device %d' % hyper['gpu'])
    apollo.set_logging_verbosity(hyper['loglevel'])

    net = apollo.Net()
    forward(net, hyper)
    network_path = '%s/network.jpg' % hyper['schematic_prefix']
    net.draw_to_file(network_path)
    logging.info('Drawing network to %s' % network_path)
    net.reset_forward()

    if hyper.get('separate_test_net', True) == True:
        test_net = apollo.Net()
        test_forward(test_net, hyper)
        test_net.reset_forward()
    else:
        test_net = net
    if 'weights' in hyper:
        logging.info('Loading weights from %s' % hyper['weights'])
        net.load(hyper['weights'])

    train_loss_hist = []
    for i in xrange(hyper['start_iter'], hyper['max_iter']):
        train_loss_hist.append(forward(net, hyper))
        net.backward()
        lr = (hyper['base_lr'] * hyper.get('gamma', 1.)**(i // hyper.get('stepsize', sys.maxint)))
        net.update(lr=lr, momentum=hyper.get('momentum', 0.0),
            clip_gradients=hyper.get('clip_gradients', -1), weight_decay=hyper.get('weight_decay', 0.0))
        if i % hyper['display_interval'] == 0:
            logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:])))
        if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']:
            filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i)
            logging.info('Saving net to: %s' % filename)
            net.save(filename)
        if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']:
            sub = hyper.get('sub', 100)
            plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub])
            filename = '%s/train_loss.jpg' % hyper['graph_prefix']
            logging.info('Saving figure to: %s' % filename)
            plt.savefig(filename)
        if hyper['test_interval'] is not None and i % hyper['test_interval'] == 0:
            test_loss = []
            accuracy = []
            test_net.phase = 'test'
            test_net.copy_params_from(net)
            for j in xrange(hyper['test_iter']):
                test_loss.append(test_forward(test_net, hyper))
                test_net.reset_forward()
                if 'accuracy' in test_net.tops:
                    accuracy.append(test_net.tops['accuracy'].data.flatten()[0])
            if len(accuracy) > 0:
                logging.info('Accuracy: %.5f' % np.mean(accuracy))
            logging.info('Test loss: %f' % np.mean(test_loss))
            test_net.phase = 'train'