def train(): net = apollo.Net() forward(net) net.reset_forward() net.load(alexnet.weights_file()) train_loss_hist = [] for i in range(hyper['max_iter']): train_loss_hist.append(forward(net)) net.backward() lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize'])) net.update(lr=lr, momentum=hyper['momentum'], clip_gradients=hyper.get('clip_gradients', -1), weight_decay=hyper['weight_decay']) if i % hyper['display_interval'] == 0: logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) if i % hyper['snapshot_interval'] == 0 and i > 0: filename = '%s_%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) with open('/tmp/log.txt', 'w') as f: f.write(json.dumps(train_loss_hist)) if i % hyper['graph_interval'] == 0 and i > 0: sub = 100 plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub]) filename = '%strain_loss.jpg' % hyper['graph_prefix'] logging.info('Saving figure to: %s' % filename) plt.savefig(filename)
def eval(hyper): eval_net = apollo.Net() # evaluate the net once to set up structure before loading parameters evaluate_forward(eval_net) eval_net.load('%s/%d.h5' % (hyper['snapshot_prefix'], hyper['max_iter'] - 1)) print evaluate_forward(eval_net)
def eval_performance(net, hyper): test_net = apollo.Net() eval_forward(test_net, hyper) test_net.reset_forward() test_net.phase = 'test' test_net.copy_params_from(net) eval_forward(test_net, hyper) test_net.reset_forward()
def run(hyper): net = apollo.Net() arch = apollo.Architecture() arch.load_from_proto(hyper['net_prototxt']) test_net = apollo.Net(phase='test') test_arch = apollo.Architecture(phase='test') test_arch.load_from_proto(hyper['net_prototxt']) arch.forward(net) if hyper['weights']: print 'loading' net.load(hyper['weights']) net.reset_forward() test_arch.forward(test_net) test_net.reset_forward() def test_performance(net, test_net): test_error = [] test_net.copy_params_from(net) for _ in range(hyper['test_iter']): test_error.append(test_arch.forward(test_net)) test_net.reset_forward() logging.info('Test Error: %f' % np.mean(test_error)) error = [] for i in range(hyper['max_iter']): error.append(arch.forward(net)) net.backward() lr = (hyper['base_lr'] / (hyper.get('gamma', 1.))**(i // hyper['stepsize'])) net.update(lr=lr, momentum=hyper['momentum'], clip_gradients=hyper.get('clip_gradients', -1), weight_decay=hyper['weight_decay']) if i % hyper['display_interval'] == 0: logging.info('Iteration %d: %s' % (i, np.mean(error))) error = [] if i % hyper['test_interval'] == 0: test_performance(net, test_net) if i % hyper['snapshot_interval'] == 0 and i > 0: net.save('%s_%d.h5' % (hyper['snapshot_prefix'], i))
def train(hyper): apollo.set_random_seed(hyper['random_seed']) if hyper['gpu'] is None: apollo.set_mode_cpu() logging.info('Using cpu device (pass --gpu X to train on the gpu)') else: apollo.set_mode_gpu() apollo.set_device(hyper['gpu']) logging.info('Using gpu device %d' % hyper['gpu']) apollo.set_logging_verbosity(hyper['loglevel']) net = apollo.Net() forward(net, hyper) network_path = '%s/network.jpg' % hyper['schematic_prefix'] net.draw_to_file(network_path) logging.info('Drawing network to %s' % network_path) net.reset_forward() if 'weights' in hyper: logging.info('Loading weights from %s' % hyper['weights']) net.load(hyper['weights']) train_loss_hist = [] for i in xrange(hyper['start_iter'], hyper['max_iter']): train_loss_hist.append(forward(net, hyper)) net.backward() lr = (hyper['base_lr'] * hyper['gamma']**(i // hyper['stepsize'])) net.update(lr=lr, momentum=hyper['momentum'], clip_gradients=hyper['clip_gradients']) if i % hyper['display_interval'] == 0: logging.info( 'Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']: filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']: sub = hyper.get('sub', 100) plt.plot( np.convolve(train_loss_hist, np.ones(sub) / sub)[sub:-sub]) filename = '%s/train_loss.jpg' % hyper['graph_prefix'] logging.info('Saving figure to: %s' % filename) plt.savefig(filename)
def main(): parser = apollo.utils.training.default_parser() parser.add_argument('--solver') args = parser.parse_args() config = imp.load_source('module.name', args.solver) hyper = {} hyper.update(config.get_hyper()) hyper.update({k:v for k, v in vars(args).iteritems() if v is not None}) arch = Architecture() arch.load_from_proto(hyper['net_prototxt']) test_net = apollo.Net(phase='test') test_arch = Architecture(phase='test') test_arch.load_from_proto(hyper['net_prototxt']) apollo.utils.training.default_train(hyper, forward=arch.forward, test_forward=test_arch.forward)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--loglevel', default=0) args = parser.parse_args() apollo.Caffe.set_logging_verbosity(int(args.loglevel)) # turn off logging sentences = get_data() sentence_batches = get_data_batch(sentences) net = apollo.Net() net.set_phase_train() apollo.Caffe.set_random_seed(10) apollo.Caffe.set_mode_gpu() apollo.Caffe.set_device(0) #import time #shape = [3,256,256] #numpy_layer = NumpyDataLayer(name='numpy', tops=['numpy'], data=np.reshape(np.arange(shape[0] * shape[1] * shape[2]), shape)) #net.forward_layer(numpy_layer) #net.backward() #net.update(lr=20) #print net.blobs['numpy'].data().shape #shape = [2,256,256] #numpy_layer = NumpyDataLayer(name='numpy', tops=['numpy'], data=np.reshape(np.arange(shape[0] * shape[1] * shape[2]), shape)) #net.forward_layer(numpy_layer) #net.backward() #net.update(lr=20) #print net.blobs['numpy'].data().shape #return #print time.time() - start #print net.blobs['numpy'].data() net.reshape_only = True iter(net, sentence_batches) net.reshape_only = False error = 0 display_interval = 10 for i in range(100000): loss = iter(net, sentence_batches) error += loss / display_interval if i % display_interval == 0 and i > 0: print 'Iteration %d: %s' % (i, np.mean(error)) error = 0
def train(): net = apollo.Net() train_loss_hist = [] for i in range(hyper['max_iter']): train_loss_hist.append(forward(net)) net.backward() lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize'])) net.update(lr=lr, momentum=hyper['momentum'], clip_gradients=hyper['clip_gradients'], weight_decay=hyper['weight_decay']) if i % hyper['display_interval'] == 0: logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) if (i % hyper['snapshot_interval'] == 0 and i > 0) or i == hyper['max_iter'] - 1: filename = '%s_%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) if i % hyper['graph_interval'] == 0 and i > 0: sub = 100 plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub]) plt.savefig('%strain_loss.jpg' % hyper['graph_prefix'])
def main(): hyper = {} hyper['max_iter'] = 10000 hyper['snapshot_prefix'] = './char/' hyper['schematic_prefix'] = './graph/' hyper['snapshot_interval'] = 1000 hyper['random_seed'] = 22 hyper['gamma'] = 0.8 hyper['stepsize'] = 2500 hyper['graph_interval'] = 1000 hyper['graph_prefix'] = './graph/' hyper['mem_cells'] = 250 hyper['vocab_size'] = 256 hyper['batch_size'] = 32 hyper['init_range'] = 0.1 #hyper['zero_symbol'] = hyper['vocab_size'] - 1 #hyper['unknown_symbol'] = hyper['vocab_size'] - 2 hyper['test_interval'] = 100 hyper['test_iter'] = 1 hyper['base_lr'] = 0.2 hyper['weight_decay'] = 0 hyper['momentum'] = 0.0 hyper['clip_gradients'] = 100 hyper['display_interval'] = 100 hyper['length'] = 2000 hyper['i_temperature'] = 1.5 hyper['unknown_symbol'] = len(vocab) + 1 hyper['zero_symbol'] = len(vocab) + 2 hyper['vocab_size'] = len(vocab) + 2 args = apollo.utils.training.default_parser().parse_args() hyper.update({k: v for k, v in vars(args).iteritems() if v is not None}) net = apollo.Net() eval_forward(net, hyper) net.load('./char/4000.h5') eval_performance(net, hyper)
'-n', '--name', default="latest", type=str, help="Session name; prefix used to save snapshots and loss history") args = parser.parse_args() random.seed(0) # --- Use them to init caffe state --- apollo.Caffe.set_random_seed(hyper['random_seed']) apollo.Caffe.set_mode_gpu() apollo.Caffe.set_device(args.gpu) apollo.Caffe.set_logging_verbosity(args.loglevel) # --- Initialize network --- net = apollo.Net() batch_iter = get_batch_iterator(hyper['batch_size'], data_type="extra") test_batch_iter = get_batch_iterator(hyper['batch_size'], data_type="extra") batch = batch_iter.next() overfeat_net = OverfeatNet(net, batch) if args.parameter_file: overfeat_net.net.load(args.parameter_file) # --- Do the training --- train_loss_hist = [] binary_softmax_hist = [] label_softmax_hist = [] bbox_loss_hist = [] for i in range(hyper['max_iter']):
def train(): net = apollo.Net() test_net = apollo.Net() sentences = get_data() sentence_batches = get_data_batch(sentences) forward(net, sentence_batches) if hyper['weights'] is not None: net.load(hyper['weights']) net.reset_forward() train_loss_hist = [] for i in range(hyper['max_iter']): train_loss_hist.append(forward(net, sentence_batches)) net.backward() lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize'])) net.update(lr=lr, momentum=hyper['momentum'], clip_gradients=hyper['clip_gradients'], weight_decay=hyper['weight_decay']) if i % hyper['display_interval'] == 0: logging.info( 'Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) output = [] target = [] source = [] for step in range(hyper['max_len']): try: output.append( np.argmax(net.tops['softmax%d' % step].data[0].flatten())) target.append( np.int(net.tops['word%d' % step].data[0].flatten())) source.append( int(net.tops['source_word%d' % step].data[0].flatten()[0])) except: break logging.info('input:\t' + ' '.join(es_ivocab[x] for x in source)) logging.info('output:\t' + ' '.join(en_ivocab[x] for x in padded_reverse(output))) logging.info('target:\t' + ' '.join(en_ivocab[x] for x in padded_reverse(target))) if i % hyper['snapshot_interval'] == 0 and i > 0: filename = '%s_%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) with open('/tmp/log.txt', 'w') as f: f.write(json.dumps(train_loss_hist)) if i % hyper['graph_interval'] == 0 and i > 0: sub = 100 plt.plot( np.convolve(train_loss_hist, np.ones(sub) / sub)[sub:-sub]) filename = '%strain_loss.jpg' % hyper['graph_prefix'] logging.info('Saving figure to: %s' % filename) plt.savefig(filename)
net.forward_layer( layers.InnerProduct(name='ip%d' % step, bottoms=['dropout%d' % step], param_names=['ip_weight', 'ip_bias'], num_output=hyper['vocab_size'], weight_filler=filler)) loss.append( net.forward_layer( layers.SoftmaxWithLoss( name='softmax_loss%d' % step, ignore_label=hyper['zero_symbol'], bottoms=['ip%d' % step, 'label%d' % step]))) return np.mean(loss) net = apollo.Net() test_net = apollo.Net() sentences = get_data() sentence_batches = get_data_batch(sentences) forward(net, sentence_batches) net.draw_to_file('/tmp/s2s.png') net.reset_forward() train_loss_hist = [] for i in range(hyper['max_iter']): train_loss_hist.append(forward(net, sentence_batches)) net.backward() lr = (hyper['base_lr'] * (hyper['gamma'])**(i // hyper['stepsize'])) net.update(lr=lr,
def default_train(hyper, forward, test_forward=None): if test_forward is None: test_forward=forward import matplotlib; matplotlib.use('Agg', warn=False); import matplotlib.pyplot as plt d = default_hyper() d.update(hyper) hyper = d validate_hyper(hyper) apollo.set_random_seed(hyper['random_seed']) if hyper['gpu'] is None: apollo.set_mode_cpu() logging.info('Using cpu device (pass --gpu X to train on the gpu)') else: apollo.set_mode_gpu() apollo.set_device(hyper['gpu']) logging.info('Using gpu device %d' % hyper['gpu']) apollo.set_logging_verbosity(hyper['loglevel']) net = apollo.Net() forward(net, hyper) network_path = '%s/network.jpg' % hyper['schematic_prefix'] net.draw_to_file(network_path) logging.info('Drawing network to %s' % network_path) net.reset_forward() if hyper.get('separate_test_net', True) == True: test_net = apollo.Net() test_forward(test_net, hyper) test_net.reset_forward() else: test_net = net if 'weights' in hyper: logging.info('Loading weights from %s' % hyper['weights']) net.load(hyper['weights']) train_loss_hist = [] for i in xrange(hyper['start_iter'], hyper['max_iter']): train_loss_hist.append(forward(net, hyper)) net.backward() lr = (hyper['base_lr'] * hyper.get('gamma', 1.)**(i // hyper.get('stepsize', sys.maxint))) net.update(lr=lr, momentum=hyper.get('momentum', 0.0), clip_gradients=hyper.get('clip_gradients', -1), weight_decay=hyper.get('weight_decay', 0.0)) if i % hyper['display_interval'] == 0: logging.info('Iteration %d: %s' % (i, np.mean(train_loss_hist[-hyper['display_interval']:]))) if i % hyper['snapshot_interval'] == 0 and i > hyper['start_iter']: filename = '%s/%d.h5' % (hyper['snapshot_prefix'], i) logging.info('Saving net to: %s' % filename) net.save(filename) if i % hyper['graph_interval'] == 0 and i > hyper['start_iter']: sub = hyper.get('sub', 100) plt.plot(np.convolve(train_loss_hist, np.ones(sub)/sub)[sub:-sub]) filename = '%s/train_loss.jpg' % hyper['graph_prefix'] logging.info('Saving figure to: %s' % filename) plt.savefig(filename) if hyper['test_interval'] is not None and i % hyper['test_interval'] == 0: test_loss = [] accuracy = [] test_net.phase = 'test' test_net.copy_params_from(net) for j in xrange(hyper['test_iter']): test_loss.append(test_forward(test_net, hyper)) test_net.reset_forward() if 'accuracy' in test_net.tops: accuracy.append(test_net.tops['accuracy'].data.flatten()[0]) if len(accuracy) > 0: logging.info('Accuracy: %.5f' % np.mean(accuracy)) logging.info('Test loss: %f' % np.mean(test_loss)) test_net.phase = 'train'