예제 #1
0
def train(params):

    try:

        #GRID SEARCH
        print (params)

        global  model_config
        model_config['margin'] = params['margin'] if 'margin' in params else model_config['margin']
        model_config['output_dim'] = params['output_dim'] if 'output_dim' in params else model_config['output_dim']
        model_config['max_cap_length'] = params['max_cap_length']  if 'max_cap_length' in params else model_config['max_cap_length']
        model_config['optimizer'] =  params['optimizer']  if 'optimizer' in params else model_config['optimizer'],
        model_config['dim_word'] = params['dim_word'] if 'dim_word' in params else model_config['dim_word']


        # Load training and development sets
        print ('Loading dataset')
        dataset = load_dataset(model_config['data'], cnn=model_config['cnn'])

        train = dataset['train']
        #train['ims'] = train['ims'][0:1000]
        #train['caps'] = train['caps'][0:5000]
        for key, value in train.items():
            print('Size: ' + key + ': ')
            print(len(value))
        test = dataset['test']
        val = dataset['dev']

        # Create dictionary
        print ('Creating dictionary')

        worddict = build_dictionary(train['caps'] + val['caps'])
        print ('Dictionary size: ' + str(len(worddict)))
        model_config['worddict'] = len(worddict)

        embeddings_index = {}
        f = open(model_config['glove.path'])
        num_nonreg = 0
        for line in f:
            values = line.split()
            try:
              word = values[0].encode('ascii')
            except:
              print(values[0])
              num_nonreg += 1
              # word = values[0].encode('ascii', 'ignore')
            try:
                coefs = numpy.asarray(values[1:], dtype='float32')
            except:
                print(values[1:])
            embeddings_index[word] = coefs
        f.close()
        print('Found %s word vectors.' % len(embeddings_index))
        print('Found %s non-regular words.' % num_nonreg)

        embedding_matrix = numpy.zeros((len(worddict) + 2, model_config['dim_word']))
        for word, i in worddict.items():
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
            else:
                print(word)
            if i < 5:
              print(i)
              print(word)
              print(embedding_matrix[i+2])

        print(embedding_matrix[0])
        print(embedding_matrix[4])

        print ('Loading data')
        train_iter = datasource.Datasource(train, batch_size=model_config['batch_size'], worddict=worddict)
        val_iter = datasource.Datasource(val, batch_size=model_config['batch_size'], worddict=worddict)
        test_iter = datasource.Datasource(test, batch_size=model_config['batch_size'], worddict=worddict)

        print ("Image model loading")
        # # this returns a tensor of emb_image
        image_input = Input(shape=(model_config['dim_cnn'],), name='image_input')
        X = Dense(model_config['output_dim'])(image_input)
        emb_image = Lambda(lambda x: l2norm(x))(X)
        #emb_image = Lambda(lambda x: abs(x))(X)

        print ("Text model loading")
        # this returns a tensor of emb_cap
        cap_input = Input(shape=(model_config['max_cap_length'],), dtype='int32', name='cap_input')
        X = Masking(mask_value=0,input_shape=(model_config['max_cap_length'], model_config['output_dim']))(X)
        
        # from scratch
        #X = Embedding(output_dim=model_config['dim_word'], input_dim=model_config['worddict']+2, input_length=model_config['max_cap_length'])(cap_input)
        
        # pretrained GloVe
        X = Embedding(output_dim=model_config['dim_word'], input_dim=len(worddict)+2, input_length=model_config['max_cap_length'], weights=[embedding_matrix], trainable=True)(cap_input)
        
        # GRU activation
        X = GRU(output_dim=model_config['output_dim'], return_sequences=False)(X)
        
        # LSTM activation
        #X = LSTM(output_dim=model_config['output_dim'], return_sequences=False)(X)
        
        emb_cap = Lambda(lambda x: l2norm(x))(X)
        #emb_cap = Lambda(lambda x: abs(x))(X)

        print ("loading the joined model")
        # merged = _Merge( mode='concat')([emb_cap, emb_image])
        merged = concatenate([emb_cap, emb_image])
        model = Model(inputs=[cap_input, image_input], outputs=[merged])

        print ("compiling the model")
        model.compile(optimizer=model_config['optimizer'][0], loss=contrastive_loss)
        def train_generator(batch_size):
            while True:
                batches = [[x, im] for x, im in train_iter]
                dummy = numpy.zeros(shape=(batch_size, model_config['output_dim'] * 2))
                for batch in batches:
                    yield (batch, dummy)

        print(model_config['worddict'] / model_config['batch_size'] / 100)

        # uncomment in order to load model weights
        #model.load_weights('my_model_weights.h5')

        def eval_model():
            print ('evaluating model...')
            weights = model.get_weights()
            for j in range(len(weights)):
                print(weights[j].shape)
            emb_w = weights[0]
            im_w = weights[4]
            im_b = weights[5]
            gru_weights = weights[1:4]

            test_model_im = Model(inputs=image_input, outputs=emb_image)
            test_model_im.set_weights([im_w, im_b])
            test_model_im.compile(optimizer='adam', loss=contrastive_loss)
            test_model_cap = Model(inputs=cap_input, outputs=emb_cap)
            test_model_cap.set_weights([emb_w]+ gru_weights)
            test_model_cap.compile(optimizer='adam', loss=contrastive_loss)

            test_cap, test_im = test_iter.all()
            all_caps = numpy.zeros(shape=(len(test_cap),model_config['max_cap_length']))
            all_images = numpy.zeros(shape=(len(test_cap), model_config['dim_cnn']))
            pred_cap = test_model_cap.predict(test_cap)
            pred_im = test_model_im.predict(test_im)
            test_errs = compute_errors(pred_cap, pred_im)
            
            r10_c, rmean_c = t2i(test_errs)
            r10_i, rmean_i = i2t(test_errs)
            print ("Image to text: %.1f %.1f" % (r10_i, rmean_i))
            print ("Text to image: %.1f %.1f" % (r10_c, rmean_c))


        for ip in range(model_config['epoch']):

            print('Epoch: %s ...' % str(ip+1))
            train_hist = model.fit_generator(train_generator(batch_size=model_config['batch_size']),
                                           steps_per_epoch=(
                                               len(train['ims']) / model_config['batch_size']),
                                           #steps_per_epoch=5,
                                           epochs=model_config['epoch']/model_config['epoch'], verbose=1, class_weight=None, max_queue_size=1)
            model.save_weights('../results/from_scratch/my_model_weights_' + str(ip) + '.h5')
            print(train_hist.history)

        #evaluate model - recall@10 & mean_rank metric
        eval_model()

    except:
        raise
예제 #2
0
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl' % load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime(
        '%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName,
              hyperparams=model_options,
              saveDir='vis/training',
              xLabel='Examples Seen',
              saveFrequency=1)

    print curr_model['options']

    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'],
                           cnn=model_options['cnn'],
                           load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))

    print 'Loading data'
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)

            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs,
                                                              vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1, r5, r10, medr, meanr)
                log.update(
                    {
                        'R@1': r1,
                        'R@5': r5,
                        'R@10': r10,
                        'median_rank': medr,
                        'mean_rank': meanr
                    }, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        'Image2Caption_R@1': r1i,
                        'Image2Caption_R@5': r5i,
                        'Image2CaptionR@10': r10i,
                        'Image2Caption_median_rank': medri,
                        'Image2Caption_mean_rank': meanri
                    }, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open(
                            'vis/roc/%s/%s.json' %
                        (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open('vis/roc/index.json', 'r'))
                    except IOError:
                        index = {model_options['data']: []}

                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)

        print 'Seen %d samples' % n_samples
예제 #3
0
def train(params):

    try:

        #GRID SEARCH
        print(params)

        global model_config
        model_config['margin'] = params[
            'margin'] if 'margin' in params else model_config['margin']
        model_config['output_dim'] = params[
            'output_dim'] if 'output_dim' in params else model_config[
                'output_dim']
        model_config['max_cap_length'] = params[
            'max_cap_length'] if 'max_cap_length' in params else model_config[
                'max_cap_length']
        model_config['optimizer'] = params[
            'optimizer'] if 'optimizer' in params else model_config[
                'optimizer'],
        model_config['dim_word'] = params[
            'dim_word'] if 'dim_word' in params else model_config['dim_word']

        # Load training and development sets
        print('Loading dataset')
        dataset = load_dataset(model_config['data'], cnn=model_config['cnn'])

        train = dataset['train']
        test = dataset['test']
        val = dataset['dev']

        # Create dictionary
        print('Creating dictionary')

        worddict = build_dictionary(train['caps'] + val['caps'])
        print('Dictionary size: ' + str(len(worddict)))
        model_config['worddict'] = len(worddict)

        print('Loading data')
        train_iter = datasource.Datasource(
            train, batch_size=model_config['batch_size'], worddict=worddict)
        val_iter = datasource.Datasource(val,
                                         batch_size=model_config['batch_size'],
                                         worddict=worddict)
        test_iter = datasource.Datasource(
            test, batch_size=model_config['batch_size'], worddict=worddict)

        print("Image model loading")
        # # this returns a tensor of emb_image
        image_input = Input(shape=(model_config['dim_cnn'], ),
                            name='image_input')
        X = Dense(model_config['output_dim'], )(image_input)
        X = Lambda(lambda x: l2norm(x))(X)
        emb_image = Lambda(lambda x: abs(x))(X)

        print("Text model loading")
        # this returns a tensor of emb_cap
        cap_input = Input(shape=(model_config['max_cap_length'], ),
                          dtype='int32',
                          name='cap_input')
        X = Masking(mask_value=0,
                    input_shape=(model_config['max_cap_length'],
                                 model_config['output_dim']))(cap_input)
        X = Embedding(output_dim=model_config['dim_word'],
                      input_dim=model_config['worddict'],
                      input_length=model_config['max_cap_length'])(cap_input)
        X = GRU(output_dim=model_config['output_dim'],
                return_sequences=False)(X)
        X = Lambda(lambda x: l2norm(x))(X)
        emb_cap = Lambda(lambda x: abs(x))(X)

        print("loading the joined model")
        merged = Merge(mode='concat')([emb_cap, emb_image])
        model = Model(input=[cap_input, image_input], output=[merged])

        print("compiling the model")
        model.compile(optimizer=model_config['optimizer'][0],
                      loss=contrastive_loss)

        # uncomment for model selection and add  validation_data=(gen_val_data()) when calling fit_generator
        # def gen_val_data():
        #     val_bacthes = [[x, im] for x, im in val_iter]
        #     x1 = []
        #     x2 = []
        #     for batch in val_bacthes:
        #         x1.append(batch[0])
        #         x2.append(batch[1])
        #     mat_x1 = numpy.array(x1).reshape(7*model_config['batch_size'],model_config['max_cap_length'])
        #     mat_x2 = numpy.array(x2).reshape(7*model_config['batch_size'], model_config['dim_cnn'])
        #     dummy = numpy.zeros(shape=(len(mat_x1), model_config['output_dim'] * 2))
        #     return [mat_x1,mat_x2], dummy
        #

        def train_generator(batch_size):
            def gen(batch_size):
                batches = [[x, im] for x, im in train_iter]
                dummy = numpy.zeros(shape=(batch_size,
                                           model_config['output_dim'] * 2))
                for batch in batches:
                    yield (batch, dummy)

            return gen

        #uncomment for model selection and add  callbacks=[early_stopping] when calling fit_generator
        #ModelCheckpoint('/home/igor/PycharmProjects/GRU/models', monitor='val_loss', verbose=0, save_best_only=False, mode='auto')
        #early_stopping = EarlyStopping(monitor='val_loss', patience=50)

        train_hist = model.fit_generator(
            train_generator(batch_size=model_config['batch_size']),
            samples_per_epoch=(model_config['worddict'] /
                               model_config['batch_size'] *
                               model_config['batch_size']),
            nb_epoch=model_config['epoch'],
            verbose=2,
            class_weight=None,
            max_q_size=0)

        model.save_weights('my_model_weights.h5')

        print(train_hist.history)

        # uncomment in order to load model weights
        #model.load_weights('my_model_weights.h5')

        def eval_model():
            print('evaluating model...')
            weights = model.get_weights()
            emb_w = weights[0]
            im_w = weights[1]
            im_b = weights[2]
            gru_weights = weights[3:12]

            test_model_im = Model(input=image_input, output=emb_image)
            test_model_im.set_weights([im_w, im_b])
            test_model_im.compile(optimizer='adam', loss=contrastive_loss)
            test_model_cap = Model(input=cap_input, output=emb_cap)
            test_model_cap.set_weights([emb_w] + gru_weights)
            test_model_cap.compile(optimizer='adam', loss=contrastive_loss)

            test_cap, test_im = test_iter.all()
            all_caps = numpy.zeros(shape=(len(test_cap),
                                          model_config['max_cap_length']))
            all_images = numpy.zeros(shape=(len(test_cap),
                                            model_config['dim_cnn']))
            pred_cap = test_model_cap.predict(test_cap)
            pred_im = test_model_im.predict(test_im)
            test_errs = compute_errors(pred_cap, pred_im)

            r10_c, rmean_c = t2i(test_errs)
            r10_i, rmean_i = i2t(test_errs)
            print("Image to text: %.1f %.1f" % (r10_i, rmean_i))
            print("Text to image: %.1f %.1f" % (r10_c, rmean_c))

        #evaluate model - recall@10 & mean_rank metric
        eval_model()

        # uncomment for model selection
        #return {'loss': train_hist.history['loss'][0], 'status': STATUS_OK, 'model': model}

    except:
        raise
예제 #4
0
def train(params):

    try:

        #GRID SEARCH
        print(params)

        global model_config
        model_config['margin'] = params[
            'margin'] if 'margin' in params else model_config['margin']
        model_config['output_dim'] = params[
            'output_dim'] if 'output_dim' in params else model_config[
                'output_dim']
        model_config['max_cap_length'] = params[
            'max_cap_length'] if 'max_cap_length' in params else model_config[
                'max_cap_length']
        model_config['optimizer'] = params[
            'optimizer'] if 'optimizer' in params else model_config[
                'optimizer'],
        model_config['dim_word'] = params[
            'dim_word'] if 'dim_word' in params else model_config['dim_word']

        # Load training and development sets
        print('Loading dataset')
        dataset = load_dataset(model_config['data'], cnn=model_config['cnn'])

        train = dataset['train']
        #train['ims'] = train['ims'][0:1000]
        #train['caps'] = train['caps'][0:5000]
        for key, value in train.items():
            print('Size: ' + key + ': ')
            print(len(value))
        test = dataset['test']
        val = dataset['dev']

        # Create dictionary
        print('Creating dictionary')

        worddict = build_dictionary(train['caps'] + val['caps'])
        print('Dictionary size: ' + str(len(worddict)))
        model_config['worddict'] = len(worddict)

        embeddings_index = {}
        f = open(model_config['glove.path'])
        for line in f:
            values = line.split()
            word = values[0]
            try:
                coefs = numpy.asarray(values[1:], dtype='float32')
            except:
                print(values[1:])
            embeddings_index[word] = coefs
        f.close()
        print('Found %s word vectors.' % len(embeddings_index))

        embedding_matrix = numpy.zeros(
            (len(worddict) + 1, model_config['dim_word']))
        for word, i in worddict.items():
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector

        print('Loading data')
        train_iter = datasource.Datasource(
            train, batch_size=model_config['batch_size'], worddict=worddict)
        val_iter = datasource.Datasource(val,
                                         batch_size=model_config['batch_size'],
                                         worddict=worddict)
        test_iter = datasource.Datasource(
            test, batch_size=model_config['batch_size'], worddict=worddict)

        print("Image model loading")
        # # this returns a tensor of emb_image
        image_input = Input(shape=(model_config['dim_cnn'], ),
                            name='image_input')
        X = Dense(model_config['output_dim'])(image_input)
        X = Lambda(lambda x: l2norm(x))(X)
        emb_image = Lambda(lambda x: abs(x))(X)

        print("Text model loading")
        # this returns a tensor of emb_cap
        cap_input = Input(shape=(model_config['max_cap_length'], ),
                          dtype='int32',
                          name='cap_input')
        X = Masking(mask_value=0,
                    input_shape=(model_config['max_cap_length'],
                                 model_config['output_dim']))(cap_input)
        X = Embedding(output_dim=model_config['dim_word'],
                      input_dim=model_config['worddict'] + 2,
                      input_length=model_config['max_cap_length'])(cap_input)
        #X = Embedding(output_dim=model_config['dim_word'], input_dim=len(worddict)+1, input_length=model_config['max_cap_length'], weights=[embedding_matrix], trainable=False)(cap_input)
        X = GRU(output_dim=model_config['output_dim'],
                return_sequences=False)(X)
        X = Lambda(lambda x: l2norm(x))(X)
        emb_cap = Lambda(lambda x: abs(x))(X)

        print("loading the joined model")
        # merged = _Merge( mode='concat')([emb_cap, emb_image])
        merged = concatenate([emb_cap, emb_image])
        model = Model(inputs=[cap_input, image_input], outputs=[merged])

        print("compiling the model")
        model.compile(optimizer=model_config['optimizer'][0],
                      loss=contrastive_loss)

        # uncomment for model selection and add  validation_data=(gen_val_data()) when calling fit_generator
        # def gen_val_data():
        #     val_bacthes = [[x, im] for x, im in val_iter]
        #     x1 = []
        #     x2 = []
        #     for batch in val_bacthes:
        #         x1.append(batch[0])
        #         x2.append(batch[1])
        #     mat_x1 = numpy.array(x1).reshape(7*model_config['batch_size'],model_config['max_cap_length'])
        #     mat_x2 = numpy.array(x2).reshape(7*model_config['batch_size'], model_config['dim_cnn'])
        #     dummy = numpy.zeros(shape=(len(mat_x1), model_config['output_dim'] * 2))
        #     return [mat_x1,mat_x2], dummy
        #

        #def train_generator(batch_size):
        #    def gen(batch_size):
        #        batches = [[x, im] for x, im in train_iter]
        #        dummy = numpy.zeros(shape=(batch_size, model_config['output_dim'] * 2))
        #        for batch in batches:
        #            yield (batch, dummy)
        #    return gen

        def train_generator(batch_size):
            while True:
                batches = [[x, im] for x, im in train_iter]
                dummy = numpy.zeros(shape=(batch_size,
                                           model_config['output_dim'] * 2))
                for batch in batches:
                    yield (batch, dummy)

        #uncomment for model selection and add  callbacks=[early_stopping] when calling fit_generator
        #ModelCheckpoint('/home/igor/PycharmProjects/GRU/models', monitor='val_loss', verbose=0, save_best_only=False, mode='auto')
        #early_stopping = EarlyStopping(monitor='val_loss', patience=50)

        print(model_config['worddict'] / model_config['batch_size'] / 100)

        # uncomment in order to load model weights
        #model.load_weights('my_model_weights.h5')

        def eval_model():
            print('evaluating model...')
            weights = model.get_weights()
            for j in range(len(weights)):
                print(weights[j].shape)
            emb_w = weights[0]
            im_w = weights[4]
            im_b = weights[5]
            gru_weights = weights[1:4]

            test_model_im = Model(inputs=image_input, outputs=emb_image)
            test_model_im.set_weights([im_w, im_b])
            test_model_im.compile(optimizer='adam', loss=contrastive_loss)
            test_model_cap = Model(inputs=cap_input, outputs=emb_cap)
            test_model_cap.set_weights([emb_w] + gru_weights)
            test_model_cap.compile(optimizer='adam', loss=contrastive_loss)

            test_cap, test_im = test_iter.all()
            all_caps = numpy.zeros(shape=(len(test_cap),
                                          model_config['max_cap_length']))
            all_images = numpy.zeros(shape=(len(test_cap),
                                            model_config['dim_cnn']))
            pred_cap = test_model_cap.predict(test_cap)
            pred_im = test_model_im.predict(test_im)
            test_errs = compute_errors(pred_cap, pred_im)

            r10_c, rmean_c = t2i(test_errs)
            r10_i, rmean_i = i2t(test_errs)
            print("Image to text: %.1f %.1f" % (r10_i, rmean_i))
            print("Text to image: %.1f %.1f" % (r10_c, rmean_c))

        for ip in range(10):

            model.load_weights('my_model_weights_' + str(ip) + '.h5',
                               by_name=True)
            #      print(train_hist.history)

            #evaluate model - recall@10 & mean_rank metric
            eval_model()

        # uncomment for model selection
        #return {'loss': train_hist.history['loss'][0], 'status': STATUS_OK, 'model': model}

    except:
        raise
def trainer(**kwargs):
    """
    Train the model according to input params
    Info about input params is available in parameters.py
    """
    # Timing
    print('Starting time:', datetime.now())
    sys.stdout.flush()
    t_start_train = time.time()

    # Model options
    # load old model, including parameters, but overwrite with new options

    # Extract model options from arguments
    model_options = {}
    for k, v in kwargs.iteritems():
        model_options[k] = v

    # Print input options
    print('PARAMETERS BEFORE LOADING:')
    for k, v in model_options.items():
        print('{:>26}: {}'.format(k, v))
    sys.stdout.flush()

    # Reload options if required
    curr_model = dict()
    if model_options['reload_']:
        # Reload model parameters
        opt_filename_reload = get_opt_filename(model_options, previous=True)
        print('reloading...', opt_filename_reload)
        sys.stdout.flush()
        try:
            with open(opt_filename_reload, 'rb') as f:
                curr_model = pkl.load(f)
        except:
            print(
                'Failed to reload parameters, try to use only feeded parameters'
            )
            curr_model['options'] = {}

        # Check if we reload from best model or last model
        if model_options['load_from'] in ['Best', 'best', 'B', 'b']:
            load_from_best = True
            print('Loading from Best saved model in validation results')
        elif model_options['load_from'] in ['Last', 'last', 'L', 'l']:
            load_from_best = False
            print('Loading from Last saved model')
        else:
            print('Unkown choice for "load_from" parameter',
                  model_options['load_from'])
            print('Please choose one of:', ['Best', 'best', 'B', 'b'],
                  ['Last', 'last', 'L', 'l'])
            print('Using Last as default')
            load_from_best = False

        # Reload end-point parameters
        state_filename = get_sol_filename(model_options,
                                          best=load_from_best,
                                          previous=True)
        print('reloading...', state_filename)
        sys.stdout.flush()
        try:
            with open(state_filename, 'rb') as f:
                state_params = pkl.load(f)
            if load_from_best:
                init_epoch = state_params['epoch']
                solution = state_params
            else:
                init_epoch = state_params['epoch_done'] + 1
                solution = state_params['solution']
            best_val_score = solution['best_val_score']
            n_samples = solution['samples_seen']
        except:
            print('Failed to reload state parameters, starting from 0')
            init_epoch = 0
            best_val_score = 0
            n_samples = 0

    else:
        curr_model['options'] = {}
        init_epoch = 0
        best_val_score = 0
        n_samples = 0

    # Overwrite loaded options with input options
    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v
    model_options = curr_model['options']

    # Print final options loaded
    if model_options['reload_']:
        print('PARAMETERS AFTER LOADING:')
        for k, v in model_options.items():
            print('{:>26}: {}'.format(k, v))
        sys.stdout.flush()

    # Load training and development sets
    print('Loading dataset')
    sys.stdout.flush()

    dataset = load_dataset(dataset_name=model_options['data'],
                           embedding=model_options['embedding'],
                           path_to_data=model_options['data_path'],
                           test_subset=model_options['test_subset'],
                           load_train=True,
                           fold=0)
    train = dataset['train']
    dev = dataset['val']

    # Create word dictionary
    print('Creating dictionary')
    sys.stdout.flush()
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print('Dictionary size: ' + str(len(worddict)))
    sys.stdout.flush()
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    opt_filename_save = get_opt_filename(model_options, previous=False)
    print('Saving model parameters in', opt_filename_save)
    sys.stdout.flush()
    try:
        os.makedirs(os.path.dirname(opt_filename_save))
    except:
        pass
    pkl.dump(curr_model, open(opt_filename_save, 'wb'))

    # Load data from dataset
    print('Loading data')
    sys.stdout.flush()
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print('Building model')
    sys.stdout.flush()
    params = init_params(model_options)

    # reload network parameters, ie. weights
    if model_options['reload_']:
        params_filename = get_npz_filename(model_options,
                                           best=load_from_best,
                                           previous=True)
        params = load_params(params_filename, params)

    tparams = init_tparams(params)
    inps, cost = build_model(tparams, model_options)

    print('Building sentence encoder')
    sys.stdout.flush()
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print('Building image encoder')
    sys.stdout.flush()
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print('Building f_grad...')
    sys.stdout.flush()
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print('Building errors...')
    sys.stdout.flush()
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print('Building optimizers...')
    sys.stdout.flush()
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    # Get names for the files to save model and solution
    sol_filename_best = get_sol_filename(model_options,
                                         best=True,
                                         previous=False)
    sol_filename_last = get_sol_filename(model_options,
                                         best=False,
                                         previous=False)
    params_filename_best = get_npz_filename(model_options,
                                            best=True,
                                            previous=False)
    params_filename_last = get_npz_filename(model_options,
                                            best=False,
                                            previous=False)

    print('PATHS TO MODELS:')
    for filename in [
            sol_filename_best, sol_filename_last, params_filename_best,
            params_filename_last
    ]:
        print(filename)
        sys.stdout.flush()
        try:
            os.makedirs(os.path.dirname(filename))
        except:
            pass

    # Start optimization
    print('Optimization')
    sys.stdout.flush()

    uidx = 0

    # Timing
    t_start = time.time()
    print('Starting time:', datetime.now())

    for eidx in range(init_epoch, model_options['max_epochs']):
        t_start_epoch = time.time()
        print('Epoch ', eidx)
        sys.stdout.flush()

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                sys.stdout.flush()
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)
                sys.stdout.flush()

            if numpy.mod(uidx, model_options['validFreq']) == 0:
                print('Computing results...')
                sys.stdout.flush()

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr) = i2t(dev_errs)
                (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs)
                print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1i, r5i, r10i, medri, meanri))
                sys.stdout.flush()
                print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1, r5, r10, medr, meanr))
                sys.stdout.flush()

                # Score
                val_score = r1 + r5 + r10 + r1i + r5i + r10i
                if val_score > best_val_score:

                    print('BEST MODEL FOUND')
                    print('Score:', val_score)
                    print('Previous best score:', best_val_score)
                    best_val_score = val_score
                    # Join in a results dict
                    results_dict = build_results_dict(r1, r5, r10, medr, r1i,
                                                      r5i, r10i, medri)

                    # Save parameters
                    print('Saving...', end=' ')
                    sys.stdout.flush()
                    numpy.savez(params_filename_best, **unzip(tparams))
                    print('Done')
                    sys.stdout.flush()

                    # Update solution
                    solution = OrderedDict([
                        ('epoch', eidx), ('update', uidx),
                        ('samples_seen', n_samples),
                        ('best_val_score', best_val_score),
                        ('best_val_res', results_dict),
                        ('time_until_results',
                         str(timedelta(seconds=(time.time() - t_start_train))))
                    ])
                    pkl.dump(solution, open(sol_filename_best, 'wb'))

        print('Seen %d samples' % n_samples)
        sys.stdout.flush()

        # Timing
        t_epoch = time.time() - t_start_epoch
        t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch))
        print('Time for this epoch:', str(timedelta(seconds=t_epoch)),
              'Average:', str(timedelta(seconds=t_epoch_avg)))
        t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1))
        print('Time since start session:',
              str(timedelta(seconds=time.time() - t_start)),
              'Estimated time to complete training:',
              str(timedelta(seconds=t_2_complete)))
        print('Current time:', datetime.now())
        sys.stdout.flush()

        # Save current model
        try:
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        except:
            solution = OrderedDict([
                ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples),
                ('best_val_score', best_val_score),
                ('time_until_results',
                 str(timedelta(seconds=(time.time() - t_start_train))))
            ])
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        pkl.dump(state_params, open(sol_filename_last, 'wb'))

        # Save parameters
        print('Saving LAST npz...', end=' ')
        sys.stdout.flush()
        numpy.savez(params_filename_last, **unzip(tparams))
        print('Done')
        sys.stdout.flush()

    return solution
예제 #6
0
        def eval_model():

            print('evaluating model...')
            weights = model.get_weights()

            # weights
            emb_w = weights[0]
            im_w = weights[4]
            im_b = weights[5]
            gru_weights = weights[1:4]

            # image model
            test_model_im = Model(inputs=image_input, outputs=emb_image)
            test_model_im.set_weights([im_w, im_b])
            test_model_im.compile(optimizer='adam', loss=contrastive_loss)

            test_iter = datasource.Datasource(test, worddict=worddict)
            _, test_ims = test_iter.all()

            # predicted images
            pred_ims = test_model_im.predict(test_ims)

            # caption model
            test_model_cap = Model(inputs=cap_input, outputs=emb_cap)
            test_model_cap.set_weights([emb_w] + gru_weights)
            test_model_cap.compile(optimizer='adam', loss=contrastive_loss)

            caps = []
            #input_cap = test['caps'][100]
            input_cap = input("Insert caption: ").encode('ascii')
            caps.append(input_cap.strip())
            print(input_cap)

            test_input = {}
            test_input['ims'] = []
            test_input['caps'] = caps

            test_iter = datasource.Datasource(test_input,
                                              batch_size=1,
                                              worddict=worddict)
            test_cap, _ = test_iter.all()

            # predicted caption
            pred_cap = test_model_cap.predict(test_cap)

            # compute error matrix
            test_errs = compute_errors(pred_cap, pred_ims)

            # indices of 10 most likely pictures in test set
            ind_ims = input2image(test_errs)
            print(ind_ims)

            for i in ind_ims[0]:
                print(val['caps'][5 * i])

            directory = '../data/coco/'
            #imgs = sorted(os.listdir(directory))

            with open('../data/coco/test_path.txt', 'r') as f:
                imgs = f.readlines()

            for i in ind_ims[0]:

                img_path = directory + imgs[i][1:-2]
                print('Image: ', os.fsdecode(imgs[i]))

                img = image.load_img(img_path, target_size=(224, 224))

                plt.imshow(img)
                plt.show()