def run():
    name = 'coco-nopeep'
    epochs = 200
    subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S")
    if not os.path.isdir(subdir):
        os.mkdir(subdir)
    

    bs = 200
    data_train = MSCoco(dataset='train', num=82611, bs=bs)
    data_valid = MSCoco(dataset='val', num=4989, bs=bs)

    train_stream = DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs))
    valid_stream = DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, bs))


    img_height, img_width = (32,32)

    
    x = T.matrix('features')
    #x.tag.test_value = np.random.rand(bs, 3*32*32).astype('float32')
    y = T.lmatrix('captions')
    #y.tag.test_value = np.random.rand(bs, 57).astype(int)
    mask = T.lmatrix('mask')
    #mask.tag.test_value = np.ones((bs,57)).astype(int)

    K = 25323
    lang_N = 57
    N = 32
    read_size = 9
    write_size = 9
    m = 256
    gen_dim = 550
    infer_dim = 550
    z_dim = 275
    l = 512

    model = ImageModel(bs, K, lang_N, N, read_size, write_size, m, gen_dim, infer_dim, z_dim, l, image_size=32*32, channels=3, cinit=0.0)
    model._inputs = [x,y,mask]

    kl, log_recons, log_likelihood, c = model.train(x,y,mask)
    kl.name = 'kl'
    log_recons.name = 'log_recons'
    log_likelihood.name = 'log_likelihood'
    c.name = 'c'

    model._outputs = [kl, log_recons, log_likelihood, c]

    params = model.params

    #from solvers.RMSProp import RMSProp as solver
    lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX))
    #updates = solver(log_likelihood, params, lr=lr, clipnorm=10.0)#0.001)
    #lr = 0.001
    grads = T.grad(log_likelihood, params)
    his = []
    for p in params:
        pz = p.get_value()*0
        his.append(theano.shared(pz))

    threshold = 10.0
    decay = 0.9
    updates = OrderedDict()

    for p, ph, g in zip(params, his, grads):
        l2_norm = T.sqrt(T.sqr(g).sum())
        m = T.switch(l2_norm < threshold, 1, threshold/l2_norm)
        grad = m*g
        
        ph_n = decay * ph + (1-decay)*grad**2
        updates[ph] = ph_n
        updates[p] = p-(lr/T.sqrt(ph_n+1e-6))*grad
    
    model._updates = updates

    logger.info('Compiling sample function')
    model.build_sample_function(y, mask)
    logger.info('Compiled sample function')

    # ============= TRAIN =========
    plots = [['train_kl','valid_kl'],
             ['train_log_recons','valid_log_recons'],
             ['train_log_likelihood','valid_log_likelihood']]
    main_loop = MainLoop(model, train_stream,
                         [FinishAfter(epochs),
                          Track(variables=['kl','log_recons','log_likelihood'], prefix='train'),
                          #TrackBest(variables=['kl'], prefix='train'),
                          DataStreamTrack(valid_stream, ['kl','log_recons','log_likelihood'], prefix='valid'),
                          SampleSentences(subdir, bs, 32, 32),
                          DropLearningRate(lr, 25, 0.0001),
                          Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'),
                          SaveModel(subdir, name+'.model'),
                          TimeProfile(),
                          Printing()])
    main_loop.run()
Exemple #2
0
def run():
    name = 'colored-mnist'
    epochs = 200
    subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S")
    if not os.path.isdir(subdir):
        os.mkdir(subdir)
    

    bs = 150
    data_train = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='train', num=50000, bs=bs)
    data_valid = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='valid', num=10000, bs=bs)

    train_stream = DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs))
    valid_stream = DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, bs))


    img_height, img_width = (60,60)

    
    x = T.matrix('features')
    #x.tag.test_value = np.random.rand(bs, 60*60).astype('float32')
    y = T.lmatrix('captions')
    #y.tag.test_value = np.random.rand(bs, 12).astype(int)
    mask = T.lmatrix('mask')
    #mask.tag.test_value = np.ones((bs,12)).astype(int)

    K = 29
    lang_N = 14
    N = 32
    read_size = 8
    write_size = 8
    m = 256
    gen_dim = 300
    infer_dim = 300
    z_dim = 150
    l = 512

    model = ImageModel(bs, K, lang_N, N, read_size, write_size, m, gen_dim, infer_dim, z_dim, l, image_size=60*60, cinit=-10, channels=3)
    model._inputs = [x,y,mask]

    kl, log_recons, log_likelihood, c = model.train(x,y,mask)
    kl.name = 'kl'
    log_recons.name = 'log_recons'
    log_likelihood.name = 'log_likelihood'
    c.name = 'c'

    model._outputs = [kl, log_recons, log_likelihood, c]

    params = model.params

    from solvers.RMSProp import RMSProp as solver
    lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX))
    updates = solver(log_likelihood, params, lr=lr)#0.001)#, clipnorm=10.0)
    model._updates = updates

    logger.info('Compiling sample function')
    model.build_sample_function(y, mask)
    logger.info('Compiled sample function')

    # ============= TRAIN =========
    plots = [['train_kl','valid_kl'],
             ['train_log_recons','valid_log_recons'],
             ['train_log_likelihood','valid_log_likelihood']]
    main_loop = MainLoop(model, train_stream,
                         [FinishAfter(epochs),
                          Track(variables=['kl','log_recons','log_likelihood'], prefix='train'),
                          #TrackBest(variables=['kl'], prefix='train'),
                          DataStreamTrack(valid_stream, ['kl','log_recons','log_likelihood'], prefix='valid'),
                          SampleSentences(subdir, bs, 60, 60),
                          DropLearningRate(lr, 110, 0.00001),
                          Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'),
                          SaveModel(subdir, name+'.model'),
                          TimeProfile(),
                          Printing()])
    main_loop.run()
Exemple #3
0
def run():
    name = 'coco-nopeep'
    epochs = 200
    subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S")
    if not os.path.isdir(subdir):
        os.mkdir(subdir)

    bs = 200
    data_train = MSCoco(dataset='train', num=82611, bs=bs)
    data_valid = MSCoco(dataset='val', num=4989, bs=bs)

    train_stream = DataStream.default_stream(data_train,
                                             iteration_scheme=SequentialScheme(
                                                 data_train.num_examples, bs))
    valid_stream = DataStream.default_stream(data_valid,
                                             iteration_scheme=SequentialScheme(
                                                 data_valid.num_examples, bs))

    img_height, img_width = (32, 32)

    x = T.matrix('features')
    #x.tag.test_value = np.random.rand(bs, 3*32*32).astype('float32')
    y = T.lmatrix('captions')
    #y.tag.test_value = np.random.rand(bs, 57).astype(int)
    mask = T.lmatrix('mask')
    #mask.tag.test_value = np.ones((bs,57)).astype(int)

    K = 25323
    lang_N = 57
    N = 32
    read_size = 9
    write_size = 9
    m = 256
    gen_dim = 550
    infer_dim = 550
    z_dim = 275
    l = 512

    model = ImageModel(bs,
                       K,
                       lang_N,
                       N,
                       read_size,
                       write_size,
                       m,
                       gen_dim,
                       infer_dim,
                       z_dim,
                       l,
                       image_size=32 * 32,
                       channels=3,
                       cinit=0.0)
    model._inputs = [x, y, mask]

    kl, log_recons, log_likelihood, c = model.train(x, y, mask)
    kl.name = 'kl'
    log_recons.name = 'log_recons'
    log_likelihood.name = 'log_likelihood'
    c.name = 'c'

    model._outputs = [kl, log_recons, log_likelihood, c]

    params = model.params

    #from solvers.RMSProp import RMSProp as solver
    lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX))
    #updates = solver(log_likelihood, params, lr=lr, clipnorm=10.0)#0.001)
    #lr = 0.001
    grads = T.grad(log_likelihood, params)
    his = []
    for p in params:
        pz = p.get_value() * 0
        his.append(theano.shared(pz))

    threshold = 10.0
    decay = 0.9
    updates = OrderedDict()

    for p, ph, g in zip(params, his, grads):
        l2_norm = T.sqrt(T.sqr(g).sum())
        m = T.switch(l2_norm < threshold, 1, threshold / l2_norm)
        grad = m * g

        ph_n = decay * ph + (1 - decay) * grad**2
        updates[ph] = ph_n
        updates[p] = p - (lr / T.sqrt(ph_n + 1e-6)) * grad

    model._updates = updates

    logger.info('Compiling sample function')
    model.build_sample_function(y, mask)
    logger.info('Compiled sample function')

    # ============= TRAIN =========
    plots = [['train_kl', 'valid_kl'],
             ['train_log_recons', 'valid_log_recons'],
             ['train_log_likelihood', 'valid_log_likelihood']]
    main_loop = MainLoop(
        model,
        train_stream,
        [
            FinishAfter(epochs),
            Track(variables=['kl', 'log_recons', 'log_likelihood'],
                  prefix='train'),
            #TrackBest(variables=['kl'], prefix='train'),
            DataStreamTrack(valid_stream,
                            ['kl', 'log_recons', 'log_likelihood'],
                            prefix='valid'),
            SampleSentences(subdir, bs, 32, 32),
            DropLearningRate(lr, 25, 0.0001),
            Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'),
            SaveModel(subdir, name + '.model'),
            TimeProfile(),
            Printing()
        ])
    main_loop.run()