def train(params): try: #GRID SEARCH print (params) global model_config model_config['margin'] = params['margin'] if 'margin' in params else model_config['margin'] model_config['output_dim'] = params['output_dim'] if 'output_dim' in params else model_config['output_dim'] model_config['max_cap_length'] = params['max_cap_length'] if 'max_cap_length' in params else model_config['max_cap_length'] model_config['optimizer'] = params['optimizer'] if 'optimizer' in params else model_config['optimizer'], model_config['dim_word'] = params['dim_word'] if 'dim_word' in params else model_config['dim_word'] # Load training and development sets print ('Loading dataset') dataset = load_dataset(model_config['data'], cnn=model_config['cnn']) train = dataset['train'] #train['ims'] = train['ims'][0:1000] #train['caps'] = train['caps'][0:5000] for key, value in train.items(): print('Size: ' + key + ': ') print(len(value)) test = dataset['test'] val = dataset['dev'] # Create dictionary print ('Creating dictionary') worddict = build_dictionary(train['caps'] + val['caps']) print ('Dictionary size: ' + str(len(worddict))) model_config['worddict'] = len(worddict) embeddings_index = {} f = open(model_config['glove.path']) num_nonreg = 0 for line in f: values = line.split() try: word = values[0].encode('ascii') except: print(values[0]) num_nonreg += 1 # word = values[0].encode('ascii', 'ignore') try: coefs = numpy.asarray(values[1:], dtype='float32') except: print(values[1:]) embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) print('Found %s non-regular words.' % num_nonreg) embedding_matrix = numpy.zeros((len(worddict) + 2, model_config['dim_word'])) for word, i in worddict.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector else: print(word) if i < 5: print(i) print(word) print(embedding_matrix[i+2]) print(embedding_matrix[0]) print(embedding_matrix[4]) print ('Loading data') train_iter = datasource.Datasource(train, batch_size=model_config['batch_size'], worddict=worddict) val_iter = datasource.Datasource(val, batch_size=model_config['batch_size'], worddict=worddict) test_iter = datasource.Datasource(test, batch_size=model_config['batch_size'], worddict=worddict) print ("Image model loading") # # this returns a tensor of emb_image image_input = Input(shape=(model_config['dim_cnn'],), name='image_input') X = Dense(model_config['output_dim'])(image_input) emb_image = Lambda(lambda x: l2norm(x))(X) #emb_image = Lambda(lambda x: abs(x))(X) print ("Text model loading") # this returns a tensor of emb_cap cap_input = Input(shape=(model_config['max_cap_length'],), dtype='int32', name='cap_input') X = Masking(mask_value=0,input_shape=(model_config['max_cap_length'], model_config['output_dim']))(X) # from scratch #X = Embedding(output_dim=model_config['dim_word'], input_dim=model_config['worddict']+2, input_length=model_config['max_cap_length'])(cap_input) # pretrained GloVe X = Embedding(output_dim=model_config['dim_word'], input_dim=len(worddict)+2, input_length=model_config['max_cap_length'], weights=[embedding_matrix], trainable=True)(cap_input) # GRU activation X = GRU(output_dim=model_config['output_dim'], return_sequences=False)(X) # LSTM activation #X = LSTM(output_dim=model_config['output_dim'], return_sequences=False)(X) emb_cap = Lambda(lambda x: l2norm(x))(X) #emb_cap = Lambda(lambda x: abs(x))(X) print ("loading the joined model") # merged = _Merge( mode='concat')([emb_cap, emb_image]) merged = concatenate([emb_cap, emb_image]) model = Model(inputs=[cap_input, image_input], outputs=[merged]) print ("compiling the model") model.compile(optimizer=model_config['optimizer'][0], loss=contrastive_loss) def train_generator(batch_size): while True: batches = [[x, im] for x, im in train_iter] dummy = numpy.zeros(shape=(batch_size, model_config['output_dim'] * 2)) for batch in batches: yield (batch, dummy) print(model_config['worddict'] / model_config['batch_size'] / 100) # uncomment in order to load model weights #model.load_weights('my_model_weights.h5') def eval_model(): print ('evaluating model...') weights = model.get_weights() for j in range(len(weights)): print(weights[j].shape) emb_w = weights[0] im_w = weights[4] im_b = weights[5] gru_weights = weights[1:4] test_model_im = Model(inputs=image_input, outputs=emb_image) test_model_im.set_weights([im_w, im_b]) test_model_im.compile(optimizer='adam', loss=contrastive_loss) test_model_cap = Model(inputs=cap_input, outputs=emb_cap) test_model_cap.set_weights([emb_w]+ gru_weights) test_model_cap.compile(optimizer='adam', loss=contrastive_loss) test_cap, test_im = test_iter.all() all_caps = numpy.zeros(shape=(len(test_cap),model_config['max_cap_length'])) all_images = numpy.zeros(shape=(len(test_cap), model_config['dim_cnn'])) pred_cap = test_model_cap.predict(test_cap) pred_im = test_model_im.predict(test_im) test_errs = compute_errors(pred_cap, pred_im) r10_c, rmean_c = t2i(test_errs) r10_i, rmean_i = i2t(test_errs) print ("Image to text: %.1f %.1f" % (r10_i, rmean_i)) print ("Text to image: %.1f %.1f" % (r10_c, rmean_c)) for ip in range(model_config['epoch']): print('Epoch: %s ...' % str(ip+1)) train_hist = model.fit_generator(train_generator(batch_size=model_config['batch_size']), steps_per_epoch=( len(train['ims']) / model_config['batch_size']), #steps_per_epoch=5, epochs=model_config['epoch']/model_config['epoch'], verbose=1, class_weight=None, max_queue_size=1) model.save_weights('../results/from_scratch/my_model_weights_' + str(ip) + '.h5') print(train_hist.history) #evaluate model - recall@10 & mean_rank metric eval_model() except: raise
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl' % load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime( '%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps'] + dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1, r5, r10, medr, meanr) log.update( { 'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr }, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri, meanri) log.update( { 'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri }, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open( 'vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open('vis/roc/index.json', 'r')) except IOError: index = {model_options['data']: []} models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples' % n_samples
def train(params): try: #GRID SEARCH print(params) global model_config model_config['margin'] = params[ 'margin'] if 'margin' in params else model_config['margin'] model_config['output_dim'] = params[ 'output_dim'] if 'output_dim' in params else model_config[ 'output_dim'] model_config['max_cap_length'] = params[ 'max_cap_length'] if 'max_cap_length' in params else model_config[ 'max_cap_length'] model_config['optimizer'] = params[ 'optimizer'] if 'optimizer' in params else model_config[ 'optimizer'], model_config['dim_word'] = params[ 'dim_word'] if 'dim_word' in params else model_config['dim_word'] # Load training and development sets print('Loading dataset') dataset = load_dataset(model_config['data'], cnn=model_config['cnn']) train = dataset['train'] test = dataset['test'] val = dataset['dev'] # Create dictionary print('Creating dictionary') worddict = build_dictionary(train['caps'] + val['caps']) print('Dictionary size: ' + str(len(worddict))) model_config['worddict'] = len(worddict) print('Loading data') train_iter = datasource.Datasource( train, batch_size=model_config['batch_size'], worddict=worddict) val_iter = datasource.Datasource(val, batch_size=model_config['batch_size'], worddict=worddict) test_iter = datasource.Datasource( test, batch_size=model_config['batch_size'], worddict=worddict) print("Image model loading") # # this returns a tensor of emb_image image_input = Input(shape=(model_config['dim_cnn'], ), name='image_input') X = Dense(model_config['output_dim'], )(image_input) X = Lambda(lambda x: l2norm(x))(X) emb_image = Lambda(lambda x: abs(x))(X) print("Text model loading") # this returns a tensor of emb_cap cap_input = Input(shape=(model_config['max_cap_length'], ), dtype='int32', name='cap_input') X = Masking(mask_value=0, input_shape=(model_config['max_cap_length'], model_config['output_dim']))(cap_input) X = Embedding(output_dim=model_config['dim_word'], input_dim=model_config['worddict'], input_length=model_config['max_cap_length'])(cap_input) X = GRU(output_dim=model_config['output_dim'], return_sequences=False)(X) X = Lambda(lambda x: l2norm(x))(X) emb_cap = Lambda(lambda x: abs(x))(X) print("loading the joined model") merged = Merge(mode='concat')([emb_cap, emb_image]) model = Model(input=[cap_input, image_input], output=[merged]) print("compiling the model") model.compile(optimizer=model_config['optimizer'][0], loss=contrastive_loss) # uncomment for model selection and add validation_data=(gen_val_data()) when calling fit_generator # def gen_val_data(): # val_bacthes = [[x, im] for x, im in val_iter] # x1 = [] # x2 = [] # for batch in val_bacthes: # x1.append(batch[0]) # x2.append(batch[1]) # mat_x1 = numpy.array(x1).reshape(7*model_config['batch_size'],model_config['max_cap_length']) # mat_x2 = numpy.array(x2).reshape(7*model_config['batch_size'], model_config['dim_cnn']) # dummy = numpy.zeros(shape=(len(mat_x1), model_config['output_dim'] * 2)) # return [mat_x1,mat_x2], dummy # def train_generator(batch_size): def gen(batch_size): batches = [[x, im] for x, im in train_iter] dummy = numpy.zeros(shape=(batch_size, model_config['output_dim'] * 2)) for batch in batches: yield (batch, dummy) return gen #uncomment for model selection and add callbacks=[early_stopping] when calling fit_generator #ModelCheckpoint('/home/igor/PycharmProjects/GRU/models', monitor='val_loss', verbose=0, save_best_only=False, mode='auto') #early_stopping = EarlyStopping(monitor='val_loss', patience=50) train_hist = model.fit_generator( train_generator(batch_size=model_config['batch_size']), samples_per_epoch=(model_config['worddict'] / model_config['batch_size'] * model_config['batch_size']), nb_epoch=model_config['epoch'], verbose=2, class_weight=None, max_q_size=0) model.save_weights('my_model_weights.h5') print(train_hist.history) # uncomment in order to load model weights #model.load_weights('my_model_weights.h5') def eval_model(): print('evaluating model...') weights = model.get_weights() emb_w = weights[0] im_w = weights[1] im_b = weights[2] gru_weights = weights[3:12] test_model_im = Model(input=image_input, output=emb_image) test_model_im.set_weights([im_w, im_b]) test_model_im.compile(optimizer='adam', loss=contrastive_loss) test_model_cap = Model(input=cap_input, output=emb_cap) test_model_cap.set_weights([emb_w] + gru_weights) test_model_cap.compile(optimizer='adam', loss=contrastive_loss) test_cap, test_im = test_iter.all() all_caps = numpy.zeros(shape=(len(test_cap), model_config['max_cap_length'])) all_images = numpy.zeros(shape=(len(test_cap), model_config['dim_cnn'])) pred_cap = test_model_cap.predict(test_cap) pred_im = test_model_im.predict(test_im) test_errs = compute_errors(pred_cap, pred_im) r10_c, rmean_c = t2i(test_errs) r10_i, rmean_i = i2t(test_errs) print("Image to text: %.1f %.1f" % (r10_i, rmean_i)) print("Text to image: %.1f %.1f" % (r10_c, rmean_c)) #evaluate model - recall@10 & mean_rank metric eval_model() # uncomment for model selection #return {'loss': train_hist.history['loss'][0], 'status': STATUS_OK, 'model': model} except: raise
def train(params): try: #GRID SEARCH print(params) global model_config model_config['margin'] = params[ 'margin'] if 'margin' in params else model_config['margin'] model_config['output_dim'] = params[ 'output_dim'] if 'output_dim' in params else model_config[ 'output_dim'] model_config['max_cap_length'] = params[ 'max_cap_length'] if 'max_cap_length' in params else model_config[ 'max_cap_length'] model_config['optimizer'] = params[ 'optimizer'] if 'optimizer' in params else model_config[ 'optimizer'], model_config['dim_word'] = params[ 'dim_word'] if 'dim_word' in params else model_config['dim_word'] # Load training and development sets print('Loading dataset') dataset = load_dataset(model_config['data'], cnn=model_config['cnn']) train = dataset['train'] #train['ims'] = train['ims'][0:1000] #train['caps'] = train['caps'][0:5000] for key, value in train.items(): print('Size: ' + key + ': ') print(len(value)) test = dataset['test'] val = dataset['dev'] # Create dictionary print('Creating dictionary') worddict = build_dictionary(train['caps'] + val['caps']) print('Dictionary size: ' + str(len(worddict))) model_config['worddict'] = len(worddict) embeddings_index = {} f = open(model_config['glove.path']) for line in f: values = line.split() word = values[0] try: coefs = numpy.asarray(values[1:], dtype='float32') except: print(values[1:]) embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) embedding_matrix = numpy.zeros( (len(worddict) + 1, model_config['dim_word'])) for word, i in worddict.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector print('Loading data') train_iter = datasource.Datasource( train, batch_size=model_config['batch_size'], worddict=worddict) val_iter = datasource.Datasource(val, batch_size=model_config['batch_size'], worddict=worddict) test_iter = datasource.Datasource( test, batch_size=model_config['batch_size'], worddict=worddict) print("Image model loading") # # this returns a tensor of emb_image image_input = Input(shape=(model_config['dim_cnn'], ), name='image_input') X = Dense(model_config['output_dim'])(image_input) X = Lambda(lambda x: l2norm(x))(X) emb_image = Lambda(lambda x: abs(x))(X) print("Text model loading") # this returns a tensor of emb_cap cap_input = Input(shape=(model_config['max_cap_length'], ), dtype='int32', name='cap_input') X = Masking(mask_value=0, input_shape=(model_config['max_cap_length'], model_config['output_dim']))(cap_input) X = Embedding(output_dim=model_config['dim_word'], input_dim=model_config['worddict'] + 2, input_length=model_config['max_cap_length'])(cap_input) #X = Embedding(output_dim=model_config['dim_word'], input_dim=len(worddict)+1, input_length=model_config['max_cap_length'], weights=[embedding_matrix], trainable=False)(cap_input) X = GRU(output_dim=model_config['output_dim'], return_sequences=False)(X) X = Lambda(lambda x: l2norm(x))(X) emb_cap = Lambda(lambda x: abs(x))(X) print("loading the joined model") # merged = _Merge( mode='concat')([emb_cap, emb_image]) merged = concatenate([emb_cap, emb_image]) model = Model(inputs=[cap_input, image_input], outputs=[merged]) print("compiling the model") model.compile(optimizer=model_config['optimizer'][0], loss=contrastive_loss) # uncomment for model selection and add validation_data=(gen_val_data()) when calling fit_generator # def gen_val_data(): # val_bacthes = [[x, im] for x, im in val_iter] # x1 = [] # x2 = [] # for batch in val_bacthes: # x1.append(batch[0]) # x2.append(batch[1]) # mat_x1 = numpy.array(x1).reshape(7*model_config['batch_size'],model_config['max_cap_length']) # mat_x2 = numpy.array(x2).reshape(7*model_config['batch_size'], model_config['dim_cnn']) # dummy = numpy.zeros(shape=(len(mat_x1), model_config['output_dim'] * 2)) # return [mat_x1,mat_x2], dummy # #def train_generator(batch_size): # def gen(batch_size): # batches = [[x, im] for x, im in train_iter] # dummy = numpy.zeros(shape=(batch_size, model_config['output_dim'] * 2)) # for batch in batches: # yield (batch, dummy) # return gen def train_generator(batch_size): while True: batches = [[x, im] for x, im in train_iter] dummy = numpy.zeros(shape=(batch_size, model_config['output_dim'] * 2)) for batch in batches: yield (batch, dummy) #uncomment for model selection and add callbacks=[early_stopping] when calling fit_generator #ModelCheckpoint('/home/igor/PycharmProjects/GRU/models', monitor='val_loss', verbose=0, save_best_only=False, mode='auto') #early_stopping = EarlyStopping(monitor='val_loss', patience=50) print(model_config['worddict'] / model_config['batch_size'] / 100) # uncomment in order to load model weights #model.load_weights('my_model_weights.h5') def eval_model(): print('evaluating model...') weights = model.get_weights() for j in range(len(weights)): print(weights[j].shape) emb_w = weights[0] im_w = weights[4] im_b = weights[5] gru_weights = weights[1:4] test_model_im = Model(inputs=image_input, outputs=emb_image) test_model_im.set_weights([im_w, im_b]) test_model_im.compile(optimizer='adam', loss=contrastive_loss) test_model_cap = Model(inputs=cap_input, outputs=emb_cap) test_model_cap.set_weights([emb_w] + gru_weights) test_model_cap.compile(optimizer='adam', loss=contrastive_loss) test_cap, test_im = test_iter.all() all_caps = numpy.zeros(shape=(len(test_cap), model_config['max_cap_length'])) all_images = numpy.zeros(shape=(len(test_cap), model_config['dim_cnn'])) pred_cap = test_model_cap.predict(test_cap) pred_im = test_model_im.predict(test_im) test_errs = compute_errors(pred_cap, pred_im) r10_c, rmean_c = t2i(test_errs) r10_i, rmean_i = i2t(test_errs) print("Image to text: %.1f %.1f" % (r10_i, rmean_i)) print("Text to image: %.1f %.1f" % (r10_c, rmean_c)) for ip in range(10): model.load_weights('my_model_weights_' + str(ip) + '.h5', by_name=True) # print(train_hist.history) #evaluate model - recall@10 & mean_rank metric eval_model() # uncomment for model selection #return {'loss': train_hist.history['loss'][0], 'status': STATUS_OK, 'model': model} except: raise
def trainer(**kwargs): """ Train the model according to input params Info about input params is available in parameters.py """ # Timing print('Starting time:', datetime.now()) sys.stdout.flush() t_start_train = time.time() # Model options # load old model, including parameters, but overwrite with new options # Extract model options from arguments model_options = {} for k, v in kwargs.iteritems(): model_options[k] = v # Print input options print('PARAMETERS BEFORE LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Reload options if required curr_model = dict() if model_options['reload_']: # Reload model parameters opt_filename_reload = get_opt_filename(model_options, previous=True) print('reloading...', opt_filename_reload) sys.stdout.flush() try: with open(opt_filename_reload, 'rb') as f: curr_model = pkl.load(f) except: print( 'Failed to reload parameters, try to use only feeded parameters' ) curr_model['options'] = {} # Check if we reload from best model or last model if model_options['load_from'] in ['Best', 'best', 'B', 'b']: load_from_best = True print('Loading from Best saved model in validation results') elif model_options['load_from'] in ['Last', 'last', 'L', 'l']: load_from_best = False print('Loading from Last saved model') else: print('Unkown choice for "load_from" parameter', model_options['load_from']) print('Please choose one of:', ['Best', 'best', 'B', 'b'], ['Last', 'last', 'L', 'l']) print('Using Last as default') load_from_best = False # Reload end-point parameters state_filename = get_sol_filename(model_options, best=load_from_best, previous=True) print('reloading...', state_filename) sys.stdout.flush() try: with open(state_filename, 'rb') as f: state_params = pkl.load(f) if load_from_best: init_epoch = state_params['epoch'] solution = state_params else: init_epoch = state_params['epoch_done'] + 1 solution = state_params['solution'] best_val_score = solution['best_val_score'] n_samples = solution['samples_seen'] except: print('Failed to reload state parameters, starting from 0') init_epoch = 0 best_val_score = 0 n_samples = 0 else: curr_model['options'] = {} init_epoch = 0 best_val_score = 0 n_samples = 0 # Overwrite loaded options with input options for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # Print final options loaded if model_options['reload_']: print('PARAMETERS AFTER LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Load training and development sets print('Loading dataset') sys.stdout.flush() dataset = load_dataset(dataset_name=model_options['data'], embedding=model_options['embedding'], path_to_data=model_options['data_path'], test_subset=model_options['test_subset'], load_train=True, fold=0) train = dataset['train'] dev = dataset['val'] # Create word dictionary print('Creating dictionary') sys.stdout.flush() worddict = build_dictionary(train['caps'] + dev['caps']) print('Dictionary size: ' + str(len(worddict))) sys.stdout.flush() curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model opt_filename_save = get_opt_filename(model_options, previous=False) print('Saving model parameters in', opt_filename_save) sys.stdout.flush() try: os.makedirs(os.path.dirname(opt_filename_save)) except: pass pkl.dump(curr_model, open(opt_filename_save, 'wb')) # Load data from dataset print('Loading data') sys.stdout.flush() train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print('Building model') sys.stdout.flush() params = init_params(model_options) # reload network parameters, ie. weights if model_options['reload_']: params_filename = get_npz_filename(model_options, best=load_from_best, previous=True) params = load_params(params_filename, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print('Building sentence encoder') sys.stdout.flush() inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print('Building image encoder') sys.stdout.flush() inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print('Building f_grad...') sys.stdout.flush() grads = tensor.grad(cost, wrt=itemlist(tparams)) print('Building errors...') sys.stdout.flush() inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print('Building optimizers...') sys.stdout.flush() # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) # Get names for the files to save model and solution sol_filename_best = get_sol_filename(model_options, best=True, previous=False) sol_filename_last = get_sol_filename(model_options, best=False, previous=False) params_filename_best = get_npz_filename(model_options, best=True, previous=False) params_filename_last = get_npz_filename(model_options, best=False, previous=False) print('PATHS TO MODELS:') for filename in [ sol_filename_best, sol_filename_last, params_filename_best, params_filename_last ]: print(filename) sys.stdout.flush() try: os.makedirs(os.path.dirname(filename)) except: pass # Start optimization print('Optimization') sys.stdout.flush() uidx = 0 # Timing t_start = time.time() print('Starting time:', datetime.now()) for eidx in range(init_epoch, model_options['max_epochs']): t_start_epoch = time.time() print('Epoch ', eidx) sys.stdout.flush() for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print('NaN detected') sys.stdout.flush() return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud) sys.stdout.flush() if numpy.mod(uidx, model_options['validFreq']) == 0: print('Computing results...') sys.stdout.flush() # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr) = i2t(dev_errs) (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs) print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)) sys.stdout.flush() print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) sys.stdout.flush() # Score val_score = r1 + r5 + r10 + r1i + r5i + r10i if val_score > best_val_score: print('BEST MODEL FOUND') print('Score:', val_score) print('Previous best score:', best_val_score) best_val_score = val_score # Join in a results dict results_dict = build_results_dict(r1, r5, r10, medr, r1i, r5i, r10i, medri) # Save parameters print('Saving...', end=' ') sys.stdout.flush() numpy.savez(params_filename_best, **unzip(tparams)) print('Done') sys.stdout.flush() # Update solution solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('best_val_res', results_dict), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) pkl.dump(solution, open(sol_filename_best, 'wb')) print('Seen %d samples' % n_samples) sys.stdout.flush() # Timing t_epoch = time.time() - t_start_epoch t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch)) print('Time for this epoch:', str(timedelta(seconds=t_epoch)), 'Average:', str(timedelta(seconds=t_epoch_avg))) t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1)) print('Time since start session:', str(timedelta(seconds=time.time() - t_start)), 'Estimated time to complete training:', str(timedelta(seconds=t_2_complete))) print('Current time:', datetime.now()) sys.stdout.flush() # Save current model try: state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) except: solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) pkl.dump(state_params, open(sol_filename_last, 'wb')) # Save parameters print('Saving LAST npz...', end=' ') sys.stdout.flush() numpy.savez(params_filename_last, **unzip(tparams)) print('Done') sys.stdout.flush() return solution
def eval_model(): print('evaluating model...') weights = model.get_weights() # weights emb_w = weights[0] im_w = weights[4] im_b = weights[5] gru_weights = weights[1:4] # image model test_model_im = Model(inputs=image_input, outputs=emb_image) test_model_im.set_weights([im_w, im_b]) test_model_im.compile(optimizer='adam', loss=contrastive_loss) test_iter = datasource.Datasource(test, worddict=worddict) _, test_ims = test_iter.all() # predicted images pred_ims = test_model_im.predict(test_ims) # caption model test_model_cap = Model(inputs=cap_input, outputs=emb_cap) test_model_cap.set_weights([emb_w] + gru_weights) test_model_cap.compile(optimizer='adam', loss=contrastive_loss) caps = [] #input_cap = test['caps'][100] input_cap = input("Insert caption: ").encode('ascii') caps.append(input_cap.strip()) print(input_cap) test_input = {} test_input['ims'] = [] test_input['caps'] = caps test_iter = datasource.Datasource(test_input, batch_size=1, worddict=worddict) test_cap, _ = test_iter.all() # predicted caption pred_cap = test_model_cap.predict(test_cap) # compute error matrix test_errs = compute_errors(pred_cap, pred_ims) # indices of 10 most likely pictures in test set ind_ims = input2image(test_errs) print(ind_ims) for i in ind_ims[0]: print(val['caps'][5 * i]) directory = '../data/coco/' #imgs = sorted(os.listdir(directory)) with open('../data/coco/test_path.txt', 'r') as f: imgs = f.readlines() for i in ind_ims[0]: img_path = directory + imgs[i][1:-2] print('Image: ', os.fsdecode(imgs[i])) img = image.load_img(img_path, target_size=(224, 224)) plt.imshow(img) plt.show()