def ranking_eval_5fold(model, split='dev'): """ Evaluate a trained model on either dev or test of the dataset it was trained on Evaluate separately on 5 1000-image splits, and average the metrics """ data = model['options']['data'] cnn = model['options']['cnn'] results = [] for fold in range(5): print 'Loading fold ' + str(fold) dataset = datasets.load_dataset(data, cnn, load_train=False, fold=fold) caps, ims = Datasource(dataset[split], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[5:])
def ranking_eval_5fold(model, split='dev'): """ Evaluate a trained model on either dev or test of the dataset it was trained on Evaluate separately on 5 1000-image splits, and average the metrics """ data = model['options']['data'] cnn = model['options']['cnn'] results = [] for fold in range(5): print 'Loading fold ' + str(fold) dataset = datasets.load_dataset(data, cnn, load_train=False, fold=fold) caps, ims = Datasource(dataset[split], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(mean_metrics[5:])
def ranking_eval_Nfold(model, n_fold=1, subset='val'): """ Evaluate a trained model on either val or test of the dataset it was trained on Evaluate separately on n_fold image splits, and average the metrics Parameters: ----------- model: dict Dictionay containing the parameters of the current model n_fold: int Number of image splits to be evaluated on. Only supported n_fold=1 with provided datasets. subset: str subset to perform the evaluation on. One of: 'val', 'test' Returns: -------- results_dict: dict Dictionary containing the evaluaton results. Structured as results_dict['cap_ret', 'img_ret']['r1', 'r5', 'r10', 'medr'] score: float Score obtained, the sum of recalls for both problems caption retrival and image retrieval. """ results = [] for fold in range(n_fold): print 'Loading fold ' + str(fold) dataset = load_dataset(dataset_name=model['options']['data'], embedding=model['options']['embedding'], path_to_data=model['options']['data_path'], test_subset=model['options']['test_subset'], load_train=False, fold=fold) caps, ims = Datasource(dataset[subset], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[5:]) # Join everything in a dict results_dict = OrderedDict([('cap_ret', OrderedDict([])), ('img_ret', OrderedDict([]))]) # Caption retrieval (image to text) results_dict["cap_ret"]["r1"] = mean_metrics[5] results_dict["cap_ret"]["r5"] = mean_metrics[6] results_dict["cap_ret"]["r10"] = mean_metrics[7] results_dict["cap_ret"]["medr"] = mean_metrics[8] # Image retrieval (text to image) results_dict["img_ret"]["r1"] = mean_metrics[0] results_dict["img_ret"]["r5"] = mean_metrics[1] results_dict["img_ret"]["r10"] = mean_metrics[2] results_dict["img_ret"]["medr"] = mean_metrics[3] score = mean_metrics[0:3].sum() + mean_metrics[5:8].sum() return results_dict, score
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl' % load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime( '%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps'] + dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1, r5, r10, medr, meanr) log.update( { 'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr }, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri, meanri) log.update( { 'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri }, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open( 'vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open('vis/roc/index.json', 'r')) except IOError: index = {model_options['data']: []} models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples' % n_samples
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print "reloading..." + load_from with open("%s.pkl" % load_from, "rb") as f: curr_model = pkl.load(f) else: curr_model["options"] = {} for k, v in kwargs.iteritems(): curr_model["options"][k] = v model_options = curr_model["options"] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name from logger import Log log = Log( name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1 ) print curr_model["options"] # Load training and development sets print "Loading dataset" dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True) train = dataset["train"] dev = dataset["dev"] # Create dictionary print "Creating dictionary" worddict = build_dictionary(train["caps"] + dev["caps"]) print "Dictionary size: " + str(len(worddict)) curr_model["worddict"] = worddict curr_model["options"]["n_words"] = len(worddict) + 2 # save model pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb")) print "Loading data" train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print "Building model" params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print "Building sentence encoder" inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print "Building image encoder" inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print "Building f_grad...", grads = tensor.grad(cost, wrt=itemlist(tparams)) print "Building errors.." inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model["f_senc"] = f_senc curr_model["f_ienc"] = f_ienc curr_model["f_err"] = f_err if model_options["grad_clip"] > 0.0: grads = [maxnorm(g, model_options["grad_clip"]) for g in grads] lr = tensor.scalar(name="lr") print "Building optimizers...", # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost) print "Optimization" uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options["max_epochs"]): print "Epoch ", eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options["lrate"]) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print "NaN detected" return 1.0, 1.0, 1.0 if numpy.mod(uidx, model_options["dispFreq"]) == 0: print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud log.update({"Error": float(cost)}, n_samples) if numpy.mod(uidx, model_options["validFreq"]) == 0: print "Computing results..." # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"]) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples) print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update( { "Image2Caption_R@1": r1i, "Image2Caption_R@5": r5i, "Image2CaptionR@10": r10i, "Image2Caption_median_rank": medri, "Image2Caption_mean_rank": meanri, }, n_samples, ) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print "Saving...", numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams)) print "Done" vis_details["hyperparams"] = model_options # Save visualization details with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open("vis/roc/index.json", "r")) except IOError: index = {model_options["data"]: []} models = index[model_options["data"]] if timestampedName not in models: models.append(timestampedName) with open("vis/roc/index.json", "w") as f: json.dump(index, f) print "Seen %d samples" % n_samples
def trainer(**kwargs): """ Train the model according to input params Info about input params is available in parameters.py """ # Timing print('Starting time:', datetime.now()) sys.stdout.flush() t_start_train = time.time() # Model options # load old model, including parameters, but overwrite with new options # Extract model options from arguments model_options = {} for k, v in kwargs.iteritems(): model_options[k] = v # Print input options print('PARAMETERS BEFORE LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Reload options if required curr_model = dict() if model_options['reload_']: # Reload model parameters opt_filename_reload = get_opt_filename(model_options, previous=True) print('reloading...', opt_filename_reload) sys.stdout.flush() try: with open(opt_filename_reload, 'rb') as f: curr_model = pkl.load(f) except: print( 'Failed to reload parameters, try to use only feeded parameters' ) curr_model['options'] = {} # Check if we reload from best model or last model if model_options['load_from'] in ['Best', 'best', 'B', 'b']: load_from_best = True print('Loading from Best saved model in validation results') elif model_options['load_from'] in ['Last', 'last', 'L', 'l']: load_from_best = False print('Loading from Last saved model') else: print('Unkown choice for "load_from" parameter', model_options['load_from']) print('Please choose one of:', ['Best', 'best', 'B', 'b'], ['Last', 'last', 'L', 'l']) print('Using Last as default') load_from_best = False # Reload end-point parameters state_filename = get_sol_filename(model_options, best=load_from_best, previous=True) print('reloading...', state_filename) sys.stdout.flush() try: with open(state_filename, 'rb') as f: state_params = pkl.load(f) if load_from_best: init_epoch = state_params['epoch'] solution = state_params else: init_epoch = state_params['epoch_done'] + 1 solution = state_params['solution'] best_val_score = solution['best_val_score'] n_samples = solution['samples_seen'] except: print('Failed to reload state parameters, starting from 0') init_epoch = 0 best_val_score = 0 n_samples = 0 else: curr_model['options'] = {} init_epoch = 0 best_val_score = 0 n_samples = 0 # Overwrite loaded options with input options for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # Print final options loaded if model_options['reload_']: print('PARAMETERS AFTER LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Load training and development sets print('Loading dataset') sys.stdout.flush() dataset = load_dataset(dataset_name=model_options['data'], embedding=model_options['embedding'], path_to_data=model_options['data_path'], test_subset=model_options['test_subset'], load_train=True, fold=0) train = dataset['train'] dev = dataset['val'] # Create word dictionary print('Creating dictionary') sys.stdout.flush() worddict = build_dictionary(train['caps'] + dev['caps']) print('Dictionary size: ' + str(len(worddict))) sys.stdout.flush() curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model opt_filename_save = get_opt_filename(model_options, previous=False) print('Saving model parameters in', opt_filename_save) sys.stdout.flush() try: os.makedirs(os.path.dirname(opt_filename_save)) except: pass pkl.dump(curr_model, open(opt_filename_save, 'wb')) # Load data from dataset print('Loading data') sys.stdout.flush() train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print('Building model') sys.stdout.flush() params = init_params(model_options) # reload network parameters, ie. weights if model_options['reload_']: params_filename = get_npz_filename(model_options, best=load_from_best, previous=True) params = load_params(params_filename, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print('Building sentence encoder') sys.stdout.flush() inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print('Building image encoder') sys.stdout.flush() inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print('Building f_grad...') sys.stdout.flush() grads = tensor.grad(cost, wrt=itemlist(tparams)) print('Building errors...') sys.stdout.flush() inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print('Building optimizers...') sys.stdout.flush() # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) # Get names for the files to save model and solution sol_filename_best = get_sol_filename(model_options, best=True, previous=False) sol_filename_last = get_sol_filename(model_options, best=False, previous=False) params_filename_best = get_npz_filename(model_options, best=True, previous=False) params_filename_last = get_npz_filename(model_options, best=False, previous=False) print('PATHS TO MODELS:') for filename in [ sol_filename_best, sol_filename_last, params_filename_best, params_filename_last ]: print(filename) sys.stdout.flush() try: os.makedirs(os.path.dirname(filename)) except: pass # Start optimization print('Optimization') sys.stdout.flush() uidx = 0 # Timing t_start = time.time() print('Starting time:', datetime.now()) for eidx in range(init_epoch, model_options['max_epochs']): t_start_epoch = time.time() print('Epoch ', eidx) sys.stdout.flush() for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print('NaN detected') sys.stdout.flush() return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud) sys.stdout.flush() if numpy.mod(uidx, model_options['validFreq']) == 0: print('Computing results...') sys.stdout.flush() # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr) = i2t(dev_errs) (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs) print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)) sys.stdout.flush() print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) sys.stdout.flush() # Score val_score = r1 + r5 + r10 + r1i + r5i + r10i if val_score > best_val_score: print('BEST MODEL FOUND') print('Score:', val_score) print('Previous best score:', best_val_score) best_val_score = val_score # Join in a results dict results_dict = build_results_dict(r1, r5, r10, medr, r1i, r5i, r10i, medri) # Save parameters print('Saving...', end=' ') sys.stdout.flush() numpy.savez(params_filename_best, **unzip(tparams)) print('Done') sys.stdout.flush() # Update solution solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('best_val_res', results_dict), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) pkl.dump(solution, open(sol_filename_best, 'wb')) print('Seen %d samples' % n_samples) sys.stdout.flush() # Timing t_epoch = time.time() - t_start_epoch t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch)) print('Time for this epoch:', str(timedelta(seconds=t_epoch)), 'Average:', str(timedelta(seconds=t_epoch_avg))) t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1)) print('Time since start session:', str(timedelta(seconds=time.time() - t_start)), 'Estimated time to complete training:', str(timedelta(seconds=t_2_complete))) print('Current time:', datetime.now()) sys.stdout.flush() # Save current model try: state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) except: solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) pkl.dump(state_params, open(sol_filename_last, 'wb')) # Save parameters print('Saving LAST npz...', end=' ') sys.stdout.flush() numpy.savez(params_filename_last, **unzip(tparams)) print('Done') sys.stdout.flush() return solution
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl'%load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps']+dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index index = json.load(open('vis/roc/index.json', 'r')) models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples'%n_samples