def run(): name = 'coco-nopeep' epochs = 200 subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S") if not os.path.isdir(subdir): os.mkdir(subdir) bs = 200 data_train = MSCoco(dataset='train', num=82611, bs=bs) data_valid = MSCoco(dataset='val', num=4989, bs=bs) train_stream = DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs)) valid_stream = DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, bs)) img_height, img_width = (32,32) x = T.matrix('features') #x.tag.test_value = np.random.rand(bs, 3*32*32).astype('float32') y = T.lmatrix('captions') #y.tag.test_value = np.random.rand(bs, 57).astype(int) mask = T.lmatrix('mask') #mask.tag.test_value = np.ones((bs,57)).astype(int) K = 25323 lang_N = 57 N = 32 read_size = 9 write_size = 9 m = 256 gen_dim = 550 infer_dim = 550 z_dim = 275 l = 512 model = ImageModel(bs, K, lang_N, N, read_size, write_size, m, gen_dim, infer_dim, z_dim, l, image_size=32*32, channels=3, cinit=0.0) model._inputs = [x,y,mask] kl, log_recons, log_likelihood, c = model.train(x,y,mask) kl.name = 'kl' log_recons.name = 'log_recons' log_likelihood.name = 'log_likelihood' c.name = 'c' model._outputs = [kl, log_recons, log_likelihood, c] params = model.params #from solvers.RMSProp import RMSProp as solver lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX)) #updates = solver(log_likelihood, params, lr=lr, clipnorm=10.0)#0.001) #lr = 0.001 grads = T.grad(log_likelihood, params) his = [] for p in params: pz = p.get_value()*0 his.append(theano.shared(pz)) threshold = 10.0 decay = 0.9 updates = OrderedDict() for p, ph, g in zip(params, his, grads): l2_norm = T.sqrt(T.sqr(g).sum()) m = T.switch(l2_norm < threshold, 1, threshold/l2_norm) grad = m*g ph_n = decay * ph + (1-decay)*grad**2 updates[ph] = ph_n updates[p] = p-(lr/T.sqrt(ph_n+1e-6))*grad model._updates = updates logger.info('Compiling sample function') model.build_sample_function(y, mask) logger.info('Compiled sample function') # ============= TRAIN ========= plots = [['train_kl','valid_kl'], ['train_log_recons','valid_log_recons'], ['train_log_likelihood','valid_log_likelihood']] main_loop = MainLoop(model, train_stream, [FinishAfter(epochs), Track(variables=['kl','log_recons','log_likelihood'], prefix='train'), #TrackBest(variables=['kl'], prefix='train'), DataStreamTrack(valid_stream, ['kl','log_recons','log_likelihood'], prefix='valid'), SampleSentences(subdir, bs, 32, 32), DropLearningRate(lr, 25, 0.0001), Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'), SaveModel(subdir, name+'.model'), TimeProfile(), Printing()]) main_loop.run()
def run(): name = 'colored-mnist' epochs = 200 subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S") if not os.path.isdir(subdir): os.mkdir(subdir) bs = 150 data_train = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='train', num=50000, bs=bs) data_valid = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='valid', num=10000, bs=bs) train_stream = DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs)) valid_stream = DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, bs)) img_height, img_width = (60,60) x = T.matrix('features') #x.tag.test_value = np.random.rand(bs, 60*60).astype('float32') y = T.lmatrix('captions') #y.tag.test_value = np.random.rand(bs, 12).astype(int) mask = T.lmatrix('mask') #mask.tag.test_value = np.ones((bs,12)).astype(int) K = 29 lang_N = 14 N = 32 read_size = 8 write_size = 8 m = 256 gen_dim = 300 infer_dim = 300 z_dim = 150 l = 512 model = ImageModel(bs, K, lang_N, N, read_size, write_size, m, gen_dim, infer_dim, z_dim, l, image_size=60*60, cinit=-10, channels=3) model._inputs = [x,y,mask] kl, log_recons, log_likelihood, c = model.train(x,y,mask) kl.name = 'kl' log_recons.name = 'log_recons' log_likelihood.name = 'log_likelihood' c.name = 'c' model._outputs = [kl, log_recons, log_likelihood, c] params = model.params from solvers.RMSProp import RMSProp as solver lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX)) updates = solver(log_likelihood, params, lr=lr)#0.001)#, clipnorm=10.0) model._updates = updates logger.info('Compiling sample function') model.build_sample_function(y, mask) logger.info('Compiled sample function') # ============= TRAIN ========= plots = [['train_kl','valid_kl'], ['train_log_recons','valid_log_recons'], ['train_log_likelihood','valid_log_likelihood']] main_loop = MainLoop(model, train_stream, [FinishAfter(epochs), Track(variables=['kl','log_recons','log_likelihood'], prefix='train'), #TrackBest(variables=['kl'], prefix='train'), DataStreamTrack(valid_stream, ['kl','log_recons','log_likelihood'], prefix='valid'), SampleSentences(subdir, bs, 60, 60), DropLearningRate(lr, 110, 0.00001), Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'), SaveModel(subdir, name+'.model'), TimeProfile(), Printing()]) main_loop.run()
def run(): name = 'coco-nopeep' epochs = 200 subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S") if not os.path.isdir(subdir): os.mkdir(subdir) bs = 200 data_train = MSCoco(dataset='train', num=82611, bs=bs) data_valid = MSCoco(dataset='val', num=4989, bs=bs) train_stream = DataStream.default_stream(data_train, iteration_scheme=SequentialScheme( data_train.num_examples, bs)) valid_stream = DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme( data_valid.num_examples, bs)) img_height, img_width = (32, 32) x = T.matrix('features') #x.tag.test_value = np.random.rand(bs, 3*32*32).astype('float32') y = T.lmatrix('captions') #y.tag.test_value = np.random.rand(bs, 57).astype(int) mask = T.lmatrix('mask') #mask.tag.test_value = np.ones((bs,57)).astype(int) K = 25323 lang_N = 57 N = 32 read_size = 9 write_size = 9 m = 256 gen_dim = 550 infer_dim = 550 z_dim = 275 l = 512 model = ImageModel(bs, K, lang_N, N, read_size, write_size, m, gen_dim, infer_dim, z_dim, l, image_size=32 * 32, channels=3, cinit=0.0) model._inputs = [x, y, mask] kl, log_recons, log_likelihood, c = model.train(x, y, mask) kl.name = 'kl' log_recons.name = 'log_recons' log_likelihood.name = 'log_likelihood' c.name = 'c' model._outputs = [kl, log_recons, log_likelihood, c] params = model.params #from solvers.RMSProp import RMSProp as solver lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX)) #updates = solver(log_likelihood, params, lr=lr, clipnorm=10.0)#0.001) #lr = 0.001 grads = T.grad(log_likelihood, params) his = [] for p in params: pz = p.get_value() * 0 his.append(theano.shared(pz)) threshold = 10.0 decay = 0.9 updates = OrderedDict() for p, ph, g in zip(params, his, grads): l2_norm = T.sqrt(T.sqr(g).sum()) m = T.switch(l2_norm < threshold, 1, threshold / l2_norm) grad = m * g ph_n = decay * ph + (1 - decay) * grad**2 updates[ph] = ph_n updates[p] = p - (lr / T.sqrt(ph_n + 1e-6)) * grad model._updates = updates logger.info('Compiling sample function') model.build_sample_function(y, mask) logger.info('Compiled sample function') # ============= TRAIN ========= plots = [['train_kl', 'valid_kl'], ['train_log_recons', 'valid_log_recons'], ['train_log_likelihood', 'valid_log_likelihood']] main_loop = MainLoop( model, train_stream, [ FinishAfter(epochs), Track(variables=['kl', 'log_recons', 'log_likelihood'], prefix='train'), #TrackBest(variables=['kl'], prefix='train'), DataStreamTrack(valid_stream, ['kl', 'log_recons', 'log_likelihood'], prefix='valid'), SampleSentences(subdir, bs, 32, 32), DropLearningRate(lr, 25, 0.0001), Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'), SaveModel(subdir, name + '.model'), TimeProfile(), Printing() ]) main_loop.run()