def train_model(params): """ Main function """ if(params['RELOAD'] > 0): logging.info('Resuming training.') ########### Load data dataset = build_dataset(params) ########### ########### Build model if params['REUSE_MODEL_NAME'] is not None and params['REUSE_MODEL_RELOAD'] > 0: ing_model = loadModel(params['REUSE_MODEL_NAME'], params['REUSE_MODEL_RELOAD']) ing_model.setName(model_name=params['MODEL_NAME'], store_path=params['STORE_PATH']) ing_model.changeClassifier(params, last_layer=params['LAST_LAYER']) ing_model.updateLogger(force=True) elif(params['RELOAD'] == 0): # build new model ing_model = Ingredients_Model(params, type=params['MODEL_TYPE'], verbose=params['VERBOSE'], model_name=params['MODEL_NAME'], store_path=params['STORE_PATH']) # Define the inputs and outputs mapping from our Dataset instance to our model ing_model.setInputsMapping(params['INPUTS_MAPPING']) ing_model.setOutputsMapping(params['OUTPUTS_MAPPING']) else: # resume from previously trained model ing_model = loadModel(params['STORE_PATH'], params['RELOAD']) # Update optimizer either if we are loading or building a model ing_model.params = params ing_model.setOptimizer() ########### ########### Callbacks callbacks = buildCallbacks(params, ing_model, dataset) ########### ########### Training total_start_time = timer() logger.debug('Starting training!') training_params = {'n_epochs': params['MAX_EPOCH'], 'batch_size': params['BATCH_SIZE'], 'lr_decay': params['LR_DECAY'], 'lr_gamma': params['LR_GAMMA'], 'epochs_for_save': params['EPOCHS_FOR_SAVE'], 'verbose': params['VERBOSE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'extra_callbacks': callbacks, 'reload_epoch': params['RELOAD'], 'epoch_offset': params['RELOAD'], 'data_augmentation': params['DATA_AUGMENTATION'], 'patience': params['PATIENCE'], 'metric_check': params['STOP_METRIC'] } ing_model.trainNet(dataset, training_params) total_end_time = timer() time_difference = total_end_time - total_start_time logging.info('Total time spent {0:.2f}s = {1:.2f}m'.format(time_difference, time_difference / 60.0))
def InceptionResNetV2_PlusFC_LMW(self, params): assert len(params['INPUTS'].keys()) == 1, 'Number of inputs must be one.' assert params['INPUTS'][params['INPUTS'].keys()[0]]['type'] == 'raw-image', 'Input must be of type "raw-image".' self.ids_inputs = params['INPUTS'].keys() self.ids_outputs = params['OUTPUTS'].keys() model_folder = '/home/eduardo/Documents/workspaces/ifood2019_recognition/models/inception_resnet_v2_logmealv3/models' model_reload_epoch = 9 # Load model base_model = loadModel(model_folder, model_reload_epoch).model ################################################## x = base_model.get_layer('avg_pool').output ################################################## # Define outputs outputs_list = [] for id_name, data in params['OUTPUTS'].iteritems(): # Count the number of output classes num_classes = 0 with open(params['DATA_ROOT_PATH']+'/'+data['classes'], 'r') as f: for line in f: num_classes += 1 if params['EMPTY_LABEL']: num_classes += 1 # Define only a FC output layer (+ activation) per output out = Dense(num_classes)(x) out_act = Activation(data['activation'], name=id_name)(out) outputs_list.append(out_act) self.model = Model(input=base_model.input, output=outputs_list)
def InceptionResNetV2_Ensemble(self, params): #assert len(params['INPUTS'].keys()) == 1, 'Number of inputs must be one.' #assert params['INPUTS'][params['INPUTS'].keys()[0]]['type'] == 'raw-image', 'Input must be of type "raw-image".' self.ids_inputs = params['INPUTS'].keys() self.ids_outputs = params['OUTPUTS'].keys() input_shape = params['INPUTS'][params['INPUTS'].keys() [0]]['img_size_crop'] image = Input(name=self.ids_inputs[0], shape=input_shape) models_folder = [ 'inceptionresnetv2_LMW_adam_1', 'inceptionresnetv2_LMW_adam_2', 'inceptionresnetv2_LMW_adam_3', 'inceptionresnetv2_LMW_adam_4', 'inceptionresnetv2_LMW_adam_5', 'inceptionresnetv2_LMW_adam_6', 'inceptionresnetv2_LMW_adam_7' ] models_reload_epoch = [9, 13, 14, 11, 17, 10, 12] models = [] for idx, bmodel_folder in enumerate(models_folder): model_folder = '/home/eduardo/Documents/workspaces/ifood2019_recognition/models/' + bmodel_folder model_reload_epoch = models_reload_epoch[idx] # Load model base_model = loadModel(model_folder, model_reload_epoch).model base_model.name = "emodel_" + str(idx) models.append(base_model) #models = [base_model_1, base_model_2] merged_models = [] for j in range(len(models)): base_model = models[j] for i, layer in enumerate(base_model.layers[1:]): layer.trainable = False print layer.name merged_models.append(base_model(image)) x = Merge()(merged_models) ################################################## # Define outputs outputs_list = [] for id_name, data in params['OUTPUTS'].iteritems(): # Count the number of output classes num_classes = 0 with open(params['DATA_ROOT_PATH'] + '/' + data['classes'], 'r') as f: for line in f: num_classes += 1 if data['type'] == 'binary': num_classes += 1 # empty label # Define only a FC output layer (+ activation) per output x = Dense(num_classes * len(models), activation="relu")(x) out = Dense(num_classes, kernel_initializer="ones")(x) out_act = Activation(data['activation'], name=id_name)(out) outputs_list.append(out_act) self.model = Model(input=image, output=outputs_list)
def test_models_allclose(model, model_init=None, model_next=None, rtol=1e-05, atol=1e-08, verbose=0): if isinstance(model, str): from keras_wrapper.cnn_model import loadModel model = loadModel(model, -1, full_path=True) model_init = model.model_init model_next = model.model_next model = model.model logging.info("Checking all models are close") model_names = map(str, model.weights) if model_init is None and model_next is None: logging.warning( "Checking of models_allclose won't be performed, because model_init and model_next are None" ) return True if verbose > 0: print("Checking model next weights") if model_next is not None: model_next_names = map(str, model_next.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model_names.index(name) if not np.allclose(model.weights[index_model].get_value(), model_next.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model index: ' + str(index_model) + ' model_next index ' + str(index_next) + ')') if verbose > 0: print("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if verbose > 0: print("Checking model init weights") if model_init is not None: model_init_names = map(str, model_init.weights) for (index_init, name) in list(enumerate(model_init_names)): index_model = model_names.index(name) if not np.allclose(model.weights[index_model].get_value(), model_init.weights[index_init].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model index: ' + str(index_model) + ' model_init index ' + str(index_init) + ')') if verbose > 0: print("Weights", name, "(position ", index_init, "at model_init - position", index_model, "at model are close") return True
def score_corpus(args, params): print "Using an ensemble of %d models" % len(args.models) models = [loadModel(m, -1, full_path=True) for m in args.models] dataset = loadDataset(args.dataset) if args.source is not None: dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits, output_text_filename=args.target, compute_state_below=True) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] # Apply scoring extra_vars = dict() extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD']) for s in args.splits: # Apply model predictions params_prediction = {'max_batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s]} if params['BEAM_SEARCH']: params_prediction['beam_size'] = params['BEAM_SIZE'] params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST'] params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET'] params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False) params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False) params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False) params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = params.get('POS_UNK', False) params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True) params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True) params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2) beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, verbose=args.verbose) scores = beam_searcher.scoreNet()[s] # Store result if args.dest is not None: filepath = args.dest # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, scores) elif params['SAMPLING_SAVE_MODE'] == 'numpy': numpy2file(filepath, scores) else: raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.') else: print scores
def apply_model(params): """ Function for using a previously trained model for predicting. """ ########### Load data dataset = build_dataset(params) ########### ########### Load model ing_model = loadModel(params['STORE_PATH'], params['RELOAD']) ing_model.setOptimizer() ########### ########### Apply sampling for s in params["EVAL_ON_SETS"]: # Apply model predictions params_prediction = { 'batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s], 'normalize': params['NORMALIZE_IMAGES'], 'mean_substraction': params['MEAN_SUBSTRACTION'] } predictions = ing_model.predictNet(dataset, params_prediction)[s] # Format predictions predictions = decode_multilabel( predictions, # not used dataset.extra_variables['idx2word_binary'], min_val=params['MIN_PRED_VAL'], verbose=1) # Store result filepath = ing_model.model_path + '/' + s + '_labels.pred' # results file listoflists2file(filepath, predictions) ## Evaluate result extra_vars = dict() extra_vars[s] = dict() extra_vars[s]['word2idx'] = dataset.extra_variables['word2idx_binary'] exec("extra_vars[s]['references'] = dataset.Y_" + s + "[params['OUTPUTS_IDS_DATASET'][0]]") for metric in params['METRICS']: logging.info('Evaluating on metric ' + metric) # Evaluate on the chosen metric metrics = evaluation.select[metric](pred_list=predictions, verbose=1, extra_vars=extra_vars, split=s)
def __init__(self): self.session = tf.Session() self.graph = tf.get_default_graph() with self.graph.as_default(): with self.session.as_default(): dataset = loadDataset("dataset/Dataset_tutorial_dataset.pkl") nmt_model = loadModel("", epoch_num) params = nmt_model.params inputMapping = dict() for i, id_in in enumerate(params['INPUTS_IDS_DATASET']): pos_source = dataset.ids_inputs.index(id_in) id_dest = nmt_model.ids_inputs[i] inputMapping[id_dest] = pos_source nmt_model.setInputsMapping(inputMapping) outputMapping = dict() for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']): pos_target = dataset.ids_outputs.index(id_out) id_dest = nmt_model.ids_outputs[i] outputMapping[id_dest] = pos_target nmt_model.setOutputsMapping(outputMapping) params_prediction = { 'language': 'en', 'tokenize_f': eval('dataset.' + 'tokenize_basic'), 'beam_size': 2, 'optimized_search': True, 'model_inputs': params['INPUTS_IDS_MODEL'], 'model_outputs': params['OUTPUTS_IDS_MODEL'], 'dataset_inputs': params['INPUTS_IDS_DATASET'], 'dataset_outputs': params['OUTPUTS_IDS_DATASET'], 'n_parallel_loaders': 1, 'maxlen': 50, 'model_inputs': ['source_text', 'state_below'], 'model_outputs': ['target_text'], 'dataset_inputs': ['source_text', 'state_below'], 'dataset_outputs': ['target_text'], 'normalize': True, 'pos_unk': True, 'heuristic': 0, 'state_below_maxlen': -1, 'length_norm_factor': 1.0, 'length_penalty': True, 'predict_on_sets': ['test'], 'verbose': 0, } self.params = params self.dataset = dataset self.nmt_model = nmt_model self.params_prediction = params_prediction
def apply_VQA_model(params): """ Function for using a previously trained model for sampling. """ ########### Load data dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] ########### ########### Load model vqa = loadModel(params['STORE_PATH'], params['RELOAD']) vqa.setOptimizer() ########### ########### Apply sampling for s in params["EVAL_ON_SETS"]: # Apply model predictions params_prediction = { 'batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s] } predictions = vqa.predictNet(dataset, params_prediction)[s] # Convert predictions into sentences vocab = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'] [0]]['idx2words'] predictions = vqa.decode_predictions( predictions, 1, # always set temperature to 1 vocab, params['SAMPLING'], verbose=params['VERBOSE']) # Store result filepath = vqa.model_path + '/' + s + '_sampling.txt' # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, predictions) elif params['SAMPLING_SAVE_MODE'] == 'vqa': exec('question_ids = dataset.X_' + s + '["' + params['INPUTS_IDS_DATASET'][0] + '_ids"]') list2vqa(filepath, predictions, question_ids)
def test_model(params, s, i): food_model = loadModel(params['STORE_PATH'], i) food_model.setOptimizer() dataset = build_dataset_val_test(params) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][1]] params_prediction = { 'predict_on_sets': [s], 'normalize': False, 'n_parallel_loaders': 1, 'verbose': True } predictions = food_model.predictNet(dataset, params_prediction)[s] with open("%s/data/predictions_%s.txt" % (Path.DATA_FOLDER, s), "wb") as fp: pickle.dump(predictions, fp) total, acc, r_loss = 0, 0, list() index = open("%s/data/index_%s.txt" % (Path.DATA_FOLDER, s), 'r') outs = open("%s/data/new_outs_%s.txt" % (Path.DATA_FOLDER, s), 'r') i_content = [int(x.strip()) for x in index.readlines()] o_content = [int(x.strip()) for x in outs.readlines()] prev_i = 0 for i in i_content: total += 1 r_loss.append( label_ranking_loss( [o_content[prev_i:prev_i + i]], [[x[0] for x in predictions[prev_i:prev_i + i].tolist()]])) max_o = np.argmax(o_content[prev_i:prev_i + i]) sorted_i = np.argsort([-x[0] for x in predictions[prev_i:prev_i + i]]) acc += (i - list(sorted_i).index(max_o)) * 1.0 / i prev_i += i print("Acc: %s" % (acc / total)) print("Ranking Loss: %s" % np.mean(r_loss)) print("Total: %s" % total) return (acc / total), np.mean(r_loss)
def apply_model(params): """ Function for using a previously trained model for predicting. """ ########### Load data dataset = build_dataset(params) ########### ########### Load model ing_model = loadModel(params['STORE_PATH'], params['RELOAD']) ing_model.setOptimizer() ########### ########### Apply sampling callbacks = buildCallbacks(params, ing_model, dataset) callbacks[0].evaluate(params['RELOAD'], 'epoch') """
def apply_model(params): """ Function for using a previously trained model for sampling. """ ########### Load data dataset = build_dataset(params) # Keep original images size if IMAGE_RESIZE == False if not params['IMAGE_CROPPING']: dataset.img_size_crop = dataset.img_size ########### ########### Load model model = loadModel(params['STORE_PATH'], params['RELOAD'], custom_objects={"AttentionComplex": AttentionComplex}) model.setOptimizer() ########### ########### Apply sampling callbacks = buildCallbacks(params,model,dataset) callbacks[0].evaluate(params['RELOAD'], 'epoch')
def apply_VQA_model(params): """ Function for using a previously trained model for sampling. """ ########### Load data dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] ########### ########### Load model vqa = loadModel(params['STORE_PATH'], params['RELOAD']) vqa.setOptimizer() ########### ########### Apply sampling for s in params["EVAL_ON_SETS"]: # Apply model predictions params_prediction = {'batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s]} predictions = vqa.predictNet(dataset, params_prediction)[s] # Convert predictions into sentences vocab = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'][0]]['idx2words'] predictions = vqa.decode_predictions(predictions, 1, # always set temperature to 1 vocab, params['SAMPLING'], verbose=params['VERBOSE']) # Store result filepath = vqa.model_path+'/'+ s +'_sampling.txt' # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, predictions) elif params['SAMPLING_SAVE_MODE'] == 'vqa': exec('question_ids = dataset.X_'+s+'["'+params['INPUTS_IDS_DATASET'][0]+'_ids"]') list2vqa(filepath, predictions, question_ids)
def test_models_allclose(model, model_init=None, model_next=None, rtol=1e-05, atol=1e-08, verbose=0): if isinstance(model, str): from keras_wrapper.cnn_model import loadModel model = loadModel(model, -1, full_path=True) model_init = model.model_init model_next = model.model_next model = model.model logging.info("Checking all models are close") model_names = map(str, model.weights) if model_init is None and model_next is None: logging.warning("Checking of models_allclose won't be performed, because model_init and model_next are None") return True if verbose > 0: print ("Checking model next weights") if model_next is not None: model_next_names = map(str, model_next.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model_names.index(name) if not np.allclose(model.weights[index_model].get_value(), model_next.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model index: ' + str(index_model) + ' model_next index ' + str(index_next) + ')') if verbose > 0: print ("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if verbose > 0: print ("Checking model init weights") if model_init is not None: model_init_names = map(str, model_init.weights) for (index_init, name) in list(enumerate(model_init_names)): index_model = model_names.index(name) if not np.allclose(model.weights[index_model].get_value(), model_init.weights[index_init].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model index: ' + str(index_model) + ' model_init index ' + str(index_init) + ')') if verbose > 0: print ("Weights", name, "(position ", index_init, "at model_init - position", index_model, "at model are close") return True
import re """## 3. Decoding with a trained Neural Machine Translation Model Now, we'll load from disk the model we just trained and we'll apply it for translating new text. In this case, we want to translate the 'test' split from our dataset. Since we want to translate a new data split ('test') we must add it to the dataset instance, just as we did before (at the first tutorial). In case we also had the refences of the test split and we wanted to evaluate it, we can add it to the dataset. Note that this is not mandatory and we could just predict without evaluating. """ DATA_PATH = os.path.join(os.getcwd(), 'data/PersonaChat/') MODEL_PATH = os.path.join(os.getcwd(), 'models/persona_chat_context_lstm_13_de_layers') dataset = loadDataset(os.path.join(MODEL_PATH, "dataset/Dataset_tutorial_dataset.pkl")) epoch_choice = 17 # Load model nmt_model = loadModel(MODEL_PATH, epoch_choice) params = load_parameters() params_prediction = { 'language': 'en', 'tokenize_f': eval('dataset.' + 'tokenize_basic'), 'beam_size': 6, 'optimized_search': True, 'model_inputs': params['INPUTS_IDS_MODEL'], 'model_outputs': params['OUTPUTS_IDS_MODEL'], 'dataset_inputs': params['INPUTS_IDS_DATASET'], 'dataset_outputs': params['OUTPUTS_IDS_DATASET'], 'n_parallel_loaders': 1, 'maxlen': 50, 'model_inputs': ['source_text', 'state_below'],
def average_models(models, output_model, weights=None): from keras_wrapper.cnn_model import loadModel, saveModel if not isinstance(models, list): raise AssertionError('You must give a list of models to average.') if len(models) == 0: raise AssertionError( 'You provided an empty list of models to average!') model_weights = np.asarray( [1. / len(models)] * len(models), dtype=np.float32) if (weights is None) or ( weights == []) else np.asarray(weights, dtype=np.float32) if len(model_weights) != len(models): raise AssertionError( 'You must give a list of weights of the same size than the list of models.' ) loaded_models = [loadModel(m, -1, full_path=True) for m in models] # Check that all models are compatible if not all( [hasattr(loaded_model, 'model') for loaded_model in loaded_models]): raise AssertionError('Not all models have the attribute "model".') if not (all([ hasattr(loaded_model, 'model_init') for loaded_model in loaded_models ]) or all([ not hasattr(loaded_model, 'model_init') for loaded_model in loaded_models ])): raise AssertionError('Not all models have the attribute "model_init".') if not (all([ hasattr(loaded_model, 'model_next') for loaded_model in loaded_models ]) or all([ not hasattr(loaded_model, 'model_next') for loaded_model in loaded_models ])): raise AssertionError('Not all models have the attribute "model_next".') # Check all layers are the same if not (all([[ str(loaded_models[0].model.weights[i]) == str( loaded_model.model.weights[i]) for i in range(len(loaded_models[0].model.weights)) ] for loaded_model in loaded_models])): raise AssertionError('Not all models have the same weights!') if hasattr(loaded_models[0], 'model_init'): if not all([[ str(loaded_models[0].model_init.weights[i]) == str( loaded_model.model_init.weights[i]) for i in range(len(loaded_models[0].model.weights)) ] for loaded_model in loaded_models]): raise AssertionError('Not all models have the same weights!') if not all([[ str(loaded_models[0].model.weights[i]) == str( loaded_model.model.weights[i]) for i in range(len(loaded_models[0].model_init.weights)) ] for loaded_model in loaded_models]): raise AssertionError('Not all model_inits have the same weights!') if hasattr(loaded_models[0], 'model_next'): if not all([[ str(loaded_models[0].model_next.weights[i]) == str( loaded_model.model_next.weights[i]) for i in range(len(loaded_models[0].model_next.weights)) ] for loaded_model in loaded_models]): raise AssertionError('Not all model_nexts have the same weights!') # Retrieve weights, weigh them and overwrite in model[0]. current_weights = loaded_models[0].model.get_weights() loaded_models[0].model.set_weights([ current_weights[matrix_index] * model_weights[0] for matrix_index in range(len(current_weights)) ]) # We have model_init if hasattr(loaded_models[0], 'model_init'): current_weights = loaded_models[0].model_init.get_weights() loaded_models[0].model_init.set_weights([ current_weights[matrix_index] * model_weights[0] for matrix_index in range(len(current_weights)) ]) # We have model_next if hasattr(loaded_models[0], 'model_next'): current_weights = loaded_models[0].model_next.get_weights() loaded_models[0].model_next.set_weights([ current_weights[matrix_index] * model_weights[0] for matrix_index in range(len(current_weights)) ]) # Weighted sum of all models for m in range(1, len(models)): current_weights = loaded_models[m].model.get_weights() prev_weights = loaded_models[0].model.get_weights() loaded_models[0].model.set_weights([ current_weights[matrix_index] * model_weights[m] + prev_weights[matrix_index] for matrix_index in range(len(current_weights)) ]) # We have model_init if hasattr(loaded_models[0], 'model_init'): current_weights = loaded_models[m].model_init.get_weights() prev_weights = loaded_models[0].model_init.get_weights() loaded_models[0].model_init.set_weights([ current_weights[matrix_index] * model_weights[m] + prev_weights[matrix_index] for matrix_index in range(len(current_weights)) ]) # We have model_next if hasattr(loaded_models[0], 'model_next'): current_weights = loaded_models[m].model_next.get_weights() prev_weights = loaded_models[0].model_next.get_weights() loaded_models[0].model_next.set_weights([ current_weights[matrix_index] * model_weights[m] + prev_weights[matrix_index] for matrix_index in range(len(current_weights)) ]) # Save averaged model saveModel(loaded_models[0], -1, path=output_model, full_path=True, store_iter=False)
def sample_ensemble(args, params): from data_engine.prepare_data import update_dataset_from_file from keras_wrapper.model_ensemble import BeamSearchEnsemble from keras_wrapper.cnn_model import loadModel from keras_wrapper.dataset import loadDataset from keras_wrapper.utils import decode_predictions_beam_search logging.info("Using an ensemble of %d models" % len(args.models)) models = [loadModel(m, -1, full_path=True) for m in args.models] dataset = loadDataset(args.dataset) dataset = update_dataset_from_file(dataset, args.text, params, splits=args.splits, remove_outputs=True) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] # For converting predictions into sentences index2word_y = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'] [0]]['idx2words'] if params.get('APPLY_DETOKENIZATION', False): detokenize_function = eval('dataset.' + params['DETOKENIZATION_METHOD']) params_prediction = dict() params_prediction['max_batch_size'] = params.get('BATCH_SIZE', 20) params_prediction['n_parallel_loaders'] = params.get('PARALLEL_LOADERS', 1) params_prediction['beam_size'] = params.get('BEAM_SIZE', 6) params_prediction['maxlen'] = params.get('MAX_OUTPUT_TEXT_LEN_TEST', 100) params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET'] params_prediction['search_pruning'] = params.get('SEARCH_PRUNING', False) params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False) params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False) params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False) params_prediction['length_norm_factor'] = params.get( 'LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = params.get( 'COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = params.get('POS_UNK', False) params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = params.get( 'MAXLEN_GIVEN_X', True) params_prediction['output_max_length_depending_on_x_factor'] = params.get( 'MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = params.get( 'MINLEN_GIVEN_X', True) params_prediction['output_min_length_depending_on_x_factor'] = params.get( 'MINLEN_GIVEN_X_FACTOR', 2) params_prediction['attend_on_output'] = params.get( 'ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower()) heuristic = params.get('HEURISTIC', 0) mapping = None if dataset.mapping == dict() else dataset.mapping model_weights = args.weights if model_weights is not None and model_weights != []: assert len(model_weights) == len( models ), 'You should give a weight to each model. You gave %d models and %d weights.' % ( len(models), len(model_weights)) model_weights = map(lambda x: float(x), model_weights) if len(model_weights) > 1: logger.info('Giving the following weights to each model: %s' % str(model_weights)) for s in args.splits: # Apply model predictions params_prediction['predict_on_sets'] = [s] beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, model_weights=model_weights, n_best=args.n_best, verbose=args.verbose) if args.n_best: predictions, n_best = beam_searcher.predictBeamSearchNet()[s] else: predictions = beam_searcher.predictBeamSearchNet()[s] n_best = None if params_prediction['pos_unk']: samples = predictions[0] alphas = predictions[1] sources = [ x.strip() for x in open(args.text, 'r').read().split('\n') ] sources = sources[:-1] if len(sources[-1]) == 0 else sources else: samples = predictions alphas = None heuristic = None sources = None predictions = decode_predictions_beam_search(samples, index2word_y, alphas=alphas, x_text=sources, heuristic=heuristic, mapping=mapping, verbose=args.verbose) # Apply detokenization function if needed if params.get('APPLY_DETOKENIZATION', False): predictions = map(detokenize_function, predictions) if args.n_best: n_best_predictions = [] for i, (n_best_preds, n_best_scores, n_best_alphas) in enumerate(n_best): n_best_sample_score = [] for n_best_pred, n_best_score, n_best_alpha in zip( n_best_preds, n_best_scores, n_best_alphas): pred = decode_predictions_beam_search( [n_best_pred], index2word_y, alphas=[n_best_alpha] if params_prediction['pos_unk'] else None, x_text=[sources[i]] if params_prediction['pos_unk'] else None, heuristic=heuristic, mapping=mapping, verbose=args.verbose) # Apply detokenization function if needed if params.get('APPLY_DETOKENIZATION', False): pred = map(detokenize_function, pred) n_best_sample_score.append([i, pred, n_best_score]) n_best_predictions.append(n_best_sample_score) # Store result if args.dest is not None: filepath = args.dest # results file if params.get('SAMPLING_SAVE_MODE', 'list'): list2file(filepath, predictions) if args.n_best: nbest2file(filepath + '.nbest', n_best_predictions) else: raise Exception( 'Only "list" is allowed in "SAMPLING_SAVE_MODE"') else: list2stdout(predictions) if args.n_best: logging.info('Storing n-best sentences in ./' + s + '.nbest') nbest2file('./' + s + '.nbest', n_best_predictions) logging.info('Sampling finished')
Since we want to translate a new data split ('test') we must add it to the dataset instance, just as we did before (at the first tutorial). In case we also had the refences of the test split and we wanted to evaluate it, we can add it to the dataset. Note that this is not mandatory and we could just predict without evaluating. """ MODEL_PATH1 = os.path.join(os.getcwd(), 'models/empathy_100_hidden') MODEL_PATH2 = os.path.join(os.getcwd(), 'models/persona_chat_lstm') epoch_choice1 = 6 epoch_choice2 = 8 dataset1 = loadDataset(os.path.join(MODEL_PATH1, "dataset/Dataset_tutorial_dataset.pkl")) dataset2 = loadDataset(os.path.join(MODEL_PATH1, "dataset/Dataset_tutorial_dataset.pkl")) dataset2 = update_dataset_from_file(dataset2, args.text, params, splits=args.splits, remove_outputs=True) # Load model nmt_model1 = loadModel(MODEL_PATH1, epoch_choice1) nmt_model2 = loadModel(MODEL_PATH2, epoch_choice2) params = nmt_model1.params # Define the inputs and outputs mapping from our Dataset instance to our model inputMapping = dict() for i, id_in in enumerate(params['INPUTS_IDS_DATASET']): pos_source = dataset.ids_inputs.index(id_in) id_dest = nmt_model.ids_inputs[i] inputMapping[id_dest] = pos_source nmt_model.setInputsMapping(inputMapping) outputMapping = dict() for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']):
def test_2models_allclose(model1, model2, rtol=1e-05, atol=1e-08, verbose=0): if isinstance(model1, str): from keras_wrapper.cnn_model import loadModel model1 = loadModel(model1, -1, full_path=True) model1_init = model1.model_init model1_next = model1.model_next model1 = model1.model if isinstance(model2, str): from keras_wrapper.cnn_model import loadModel model2 = loadModel(model2, -1, full_path=True) model2_init = model2.model_init model2_next = model2.model_next model2 = model2.model logging.info("Checking all models (from model 1) are close...") if test_models_allclose(model1, model1_init, model1_next, rtol=rtol, atol=atol, verbose=verbose): logging.info("All close") else: logging.info("Not close!") logging.info("Checking all models (from model 2) are close...") if test_models_allclose(model2, model2_init, model2_next, rtol=rtol, atol=atol, verbose=verbose): logging.info("All close") else: logging.info("Not close!") model1_names = map(str, model1.weights) model2_names = map(str, model2.weights) if verbose > 0: print ("===========================") print ("Checking model weights") logging.info("Checking model_next 1 is close to model_next 2") if model1 is not None: model_next_names = map(str, model1.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model2_names.index(name) if not np.allclose(model2.weights[index_model].get_value(), model1.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model2 index: ' + str(index_model) + ' model1 index ' + str(index_next) + ')') if verbose > 0: print ("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if model2 is not None: model_next_names = map(str, model2.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model1_names.index(name) if not np.allclose(model1.weights[index_model].get_value(), model2.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model1 index: ' + str(index_model) + ' model2 index ' + str(index_next) + ')') if verbose > 0: print ("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if verbose > 0: print ("Checking model next weights") logging.info("Checking model_next 1 is close to model_next 2") if model1_next is not None: model_next_names = map(str, model1_next.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model2_names.index(name) if not np.allclose(model2.weights[index_model].get_value(), model1_next.weights[index_next].get_value(), rtol=rtol, atol=atol): raise ('Parameters ' + name + ' are not close! (model2 index: ' + str(index_model) + ' model1_next index ' + str(index_next) + ')') if verbose > 0: print ("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if model2_next is not None: model_next_names = map(str, model2_next.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model1_names.index(name) if not np.allclose(model1.weights[index_model].get_value(), model2_next.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model1 index: ' + str(index_model) + ' model2_next index ' + str(index_next) + ')') if verbose > 0: print ("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if verbose > 0: print ("Checking model init weights") logging.info("Checking model_next 1 is close to model_next 2") if model1_init is not None: model_next_names = map(str, model1_init.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model2_names.index(name) if not np.allclose(model2.weights[index_model].get_value(), model1_init.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model2 index: ' + str(index_model) + ' model1_init index ' + str(index_next) + ')') if verbose > 0: print ("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if model2_init is not None: model_next_names = map(str, model2_init.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model1_names.index(name) if not np.allclose(model1.weights[index_model].get_value(), model2_init.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model1 index: ' + str(index_model) + ' model2_next index ' + str(index_next) + ')') if verbose > 0: print ("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") return True
def test_train_and_load(self): if theano.config.device == 'gpu': def test_train(): params = load_parameters() params['REBUILD_DATASET'] = True params['DATASET_STORE_PATH'] = './' dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = \ dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = \ dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] params['SOURCE_TEXT_EMBEDDING_SIZE'] = 2 params['TARGET_TEXT_EMBEDDING_SIZE'] = 2 params['ENCODER_HIDDEN_SIZE'] = 2 params['DECODER_HIDDEN_SIZE'] = 2 params['ATTENTION_SIZE'] = 2 params['SKIP_VECTORS_HIDDEN_SIZE'] = 2 params['DEEP_OUTPUT_LAYERS'] = [('linear', 2)] params['STORE_PATH'] = './' nmt_model = \ TranslationModel(params, model_type=params['MODEL_TYPE'], verbose=params['VERBOSE'], model_name=params['MODEL_NAME'], vocabularies=dataset.vocabulary, store_path=params['STORE_PATH'], clear_dirs=False) # Check Inputs inputMapping = dict() for i, id_in in enumerate(params['INPUTS_IDS_DATASET']): pos_source = dataset.ids_inputs.index(id_in) id_dest = nmt_model.ids_inputs[i] inputMapping[id_dest] = pos_source nmt_model.setInputsMapping(inputMapping) outputMapping = dict() for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']): pos_target = dataset.ids_outputs.index(id_out) id_dest = nmt_model.ids_outputs[i] outputMapping[id_dest] = pos_target nmt_model.setOutputsMapping(outputMapping) callbacks = buildCallbacks(params, nmt_model, dataset) training_params = { 'n_epochs': 1, 'batch_size': 50, 'homogeneous_batches': False, 'maxlen': 10, 'joint_batches': params['JOINT_BATCHES'], 'lr_decay': params['LR_DECAY'], 'lr_gamma': params['LR_GAMMA'], 'epochs_for_save': 1, 'verbose': params['VERBOSE'], 'eval_on_sets': params['EVAL_ON_SETS_KERAS'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'extra_callbacks': callbacks, 'reload_epoch': 0, 'epoch_offset': 0, 'data_augmentation': False, 'patience': 1, # early stopping parameters 'metric_check': 'Bleu_4', 'eval_on_epochs': True, 'each_n_epochs': 1, 'start_eval_on_epoch': 0 } nmt_model.trainNet(dataset, training_params) return True test_train() params = load_parameters() params['REBUILD_DATASET'] = True params['DATASET_STORE_PATH'] = './' dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = \ dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = \ dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] # Load model nmt_model = loadModel('./', 1, reload_epoch=True) nmt_model.setOptimizer() for s in ['val']: # Evaluate training extra_vars = { 'language': params.get('TRG_LAN', 'en'), 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'tokenize_f': eval('dataset.' + params['TOKENIZATION_METHOD']), 'detokenize_f': eval('dataset.' + params['DETOKENIZATION_METHOD']), 'apply_detokenization': params['APPLY_DETOKENIZATION'], 'tokenize_hypotheses': params['TOKENIZE_HYPOTHESES'], 'tokenize_references': params['TOKENIZE_REFERENCES'] } vocab = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'] [0]]['idx2words'] extra_vars[s] = dict() extra_vars[s]['references'] = dataset.extra_variables[s][ params['OUTPUTS_IDS_DATASET'][0]] input_text_id = None vocab_src = None if params['BEAM_SIZE']: extra_vars['beam_size'] = params.get('BEAM_SIZE', 6) extra_vars['state_below_index'] = params.get( 'BEAM_SEARCH_COND_INPUT', -1) extra_vars['maxlen'] = params.get( 'MAX_OUTPUT_TEXT_LEN_TEST', 30) extra_vars['optimized_search'] = params.get( 'OPTIMIZED_SEARCH', True) extra_vars['model_inputs'] = params['INPUTS_IDS_MODEL'] extra_vars['model_outputs'] = params['OUTPUTS_IDS_MODEL'] extra_vars['dataset_inputs'] = params['INPUTS_IDS_DATASET'] extra_vars['dataset_outputs'] = params[ 'OUTPUTS_IDS_DATASET'] extra_vars['normalize_probs'] = params.get( 'NORMALIZE_SAMPLING', False) extra_vars['alpha_factor'] = params.get( 'ALPHA_FACTOR', 1.0) extra_vars['coverage_penalty'] = params.get( 'COVERAGE_PENALTY', False) extra_vars['length_penalty'] = params.get( 'LENGTH_PENALTY', False) extra_vars['length_norm_factor'] = params.get( 'LENGTH_NORM_FACTOR', 0.0) extra_vars['coverage_norm_factor'] = params.get( 'COVERAGE_NORM_FACTOR', 0.0) extra_vars['pos_unk'] = params['POS_UNK'] if params['POS_UNK']: extra_vars['heuristic'] = params['HEURISTIC'] input_text_id = params['INPUTS_IDS_DATASET'][0] vocab_src = dataset.vocabulary[input_text_id][ 'idx2words'] if params['HEURISTIC'] > 0: extra_vars['mapping'] = dataset.mapping callback_metric = PrintPerformanceMetricOnEpochEndOrEachNUpdates( nmt_model, dataset, gt_id=params['OUTPUTS_IDS_DATASET'][0], metric_name=params['METRICS'], set_name=params['EVAL_ON_SETS'], batch_size=params['BATCH_SIZE'], each_n_epochs=params['EVAL_EACH'], extra_vars=extra_vars, reload_epoch=1, is_text=True, input_text_id=input_text_id, save_path=nmt_model.model_path, index2word_y=vocab, index2word_x=vocab_src, sampling_type=params['SAMPLING'], beam_search=params['BEAM_SEARCH'], start_eval_on_epoch=0, write_samples=True, write_type=params['SAMPLING_SAVE_MODE'], eval_on_epochs=params['EVAL_EACH_EPOCHS'], save_each_evaluation=False, verbose=params['VERBOSE']) callback_metric.evaluate( 1, counter_name='epoch' if params['EVAL_EACH_EPOCHS'] else 'update') return True else: pass
def bpe_loading(args): logging.info("Using an ensemble of %d models" % len(args["models"])) models = [loadModel(m, -1, full_path=True) for m in args["models"]] dataset = loadDataset(args["dataset"]) return models, dataset
id='source_text', pad_on_batch=True, tokenization='tokenize_basic', fill='end', max_text_len=100, min_occ=0) dataset.setInput(None, 'test', type='ghost', id='state_below', required=False) ## get model predictions params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] Control_model = loadModel('trained_models/Control_M7', 36) params_prediction = { 'max_batch_size': 50, 'predict_on_sets': ['test'], 'beam_size': 12, 'maxlen': 50, 'model_inputs': ['source_text', 'state_below'], 'model_outputs': ['target_text'], 'dataset_inputs': ['source_text', 'state_below'], 'dataset_outputs': ['target_text'], 'normalize': True, 'alpha_factor': 0.6 } Control_predictions = Control_model.predictBeamSearchNet(
def classifyFood101(): from keras_wrapper.cnn_model import CNN_Model, loadModel, saveModel logging.info('Defining CNN model and training it.') # Load food classification dataset dataset_name = 'Food101' ds = loadDataset('Datasets/Dataset_' + dataset_name + '.pkl') # The network we are going to use needs an image input size of [224,224,3] # for this reason we have to communicate this to the dataset instance in charge of loading the data ds.img_size_crop['images'] = [224, 224, 3] # Create VGG model and load weights model_name = 'VGG_16_FunctionalAPI' net = CNN_Model(type='VGG_16_FunctionalAPI', model_name=model_name, input_shape=[224, 224, 3], weights_path='/media/HDD_2TB/CNN_MODELS/VGG/vgg16_weights.h5', seq_to_functional=True) # we are setting the weights of a Sequential model into a FunctionalAPI one # Reformat net output layer for the number of classes in our dataset n_classes = len(ds.classes['labels']) vis_input = net.model.get_layer('vis_input').output # input layer drop = net.model.get_layer('last_dropout').output # layer before final FC output = Dense(n_classes, activation='softmax', name='output')(drop) # redefine FC-softmax layer net.model = Model(input=vis_input, output=output) # define inputs and outputs # Compile net.setOptimizer(lr=0.001, metrics=['accuracy']) # Define the inputs and outputs mapping from our Dataset instance to our CNN_Model instance # set input and output mappings from dataset to network pos_images = ds.types_inputs.index('image') pos_labels = ds.types_outputs.index('categorical') # the first input of our dataset (pos_images) will also be the first input of our model (named vis_input) inputMapping = {'vis_input': pos_images} net.setInputsMapping(inputMapping) # the first output of our dataset (pos_labels) will also be the first output of our model (named output) outputMapping = {'output': pos_labels} net.setOutputsMapping(outputMapping, acc_output='output') # Save model saveModel(net, 0) # Load model net = loadModel('Models/' + model_name, 0) # the model must be compiled again when loaded net.setOptimizer(lr=0.001, metrics=['accuracy']) # Apply short training (1 epoch) # training_params = {'n_epochs': 1, 'batch_size': 50, # 'lr_decay': 2, 'lr_gamma': 0.8, # 'epochs_for_save': 1, 'verbose': 1, 'eval_on_sets': ['val']} # net.trainNet(ds, training_params) # Test network on test set test_params = {'batch_size': 50} # net.testNet(ds, test_params) # Predict network on all sets test_params['predict_on_sets'] = ['val'] predictions = net.predictNet(ds, test_params) logging.info("Predicted %d samples." % (len(predictions))) logging.info("Done")
def apply_Video_model(params): """ Function for using a previously trained model for sampling. """ ########### Load data dataset = build_dataset(params) params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] ########### ########### Load model video_model = loadModel(params['STORE_PATH'], params['RELOAD']) video_model.setOptimizer() ########### ########### Apply sampling extra_vars = dict() extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD']) extra_vars['language'] = params.get('TRG_LAN', 'en') for s in params["EVAL_ON_SETS"]: # Apply model predictions params_prediction = { 'max_batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s] } # Convert predictions into sentences vocab = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'] [0]]['idx2words'] if params['BEAM_SEARCH']: params_prediction['beam_size'] = params['BEAM_SIZE'] params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST'] params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params[ 'OUTPUTS_IDS_DATASET'] params_prediction['normalize_probs'] = params['NORMALIZE_SAMPLING'] params_prediction['alpha_factor'] = params['ALPHA_FACTOR'] predictions = video_model.predictBeamSearchNet( dataset, params_prediction)[s] predictions = video_model.decode_predictions_beam_search( predictions, vocab, verbose=params['VERBOSE']) else: predictions = video_model.predictNet(dataset, params_prediction)[s] predictions = video_model.decode_predictions( predictions, 1, # always set temperature to 1 vocab, params['SAMPLING'], verbose=params['VERBOSE']) # Store result filepath = video_model.model_path + '/' + s + '_sampling.pred' # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, predictions) else: raise Exception, 'Only "list" is allowed in "SAMPLING_SAVE_MODE"' # Evaluate if any metric in params['METRICS'] for metric in params['METRICS']: logging.info('Evaluating on metric ' + metric) filepath = video_model.model_path + '/' + s + '_sampling.' + metric # results file # Evaluate on the chosen metric extra_vars[s] = dict() extra_vars[s]['references'] = dataset.extra_variables[s][ params['OUTPUTS_IDS_DATASET'][0]] metrics = evaluation.select[metric](pred_list=predictions, verbose=1, extra_vars=extra_vars, split=s) # Print results to file with open(filepath, 'w') as f: header = '' line = '' for metric_ in sorted(metrics): value = metrics[metric_] header += metric_ + ',' line += str(value) + ',' f.write(header + '\n') f.write(line + '\n') logging.info('Done evaluating on metric ' + metric)
def train_model(params): """ Training function. Sets the training parameters from params. Build or loads the model and launches the training. :param params: Dictionary of network hyperparameters. :return: None """ if params['RELOAD'] > 0: logging.info('Resuming training.') check_params(params) # Load data dataset = build_dataset(params) params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] # Build model if (params['RELOAD'] == 0): # build new model video_model = VideoDesc_Model(params, type=params['MODEL_TYPE'], verbose=params['VERBOSE'], model_name=params['MODEL_NAME'], vocabularies=dataset.vocabulary, store_path=params['STORE_PATH']) dict2pkl(params, params['STORE_PATH'] + '/config') # Define the inputs and outputs mapping from our Dataset instance to our model inputMapping = dict() for i, id_in in enumerate(params['INPUTS_IDS_DATASET']): if len(video_model.ids_inputs) > i: pos_source = dataset.ids_inputs.index(id_in) id_dest = video_model.ids_inputs[i] inputMapping[id_dest] = pos_source video_model.setInputsMapping(inputMapping) outputMapping = dict() for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']): if len(video_model.ids_outputs) > i: pos_target = dataset.ids_outputs.index(id_out) id_dest = video_model.ids_outputs[i] outputMapping[id_dest] = pos_target video_model.setOutputsMapping(outputMapping) else: # resume from previously trained model video_model = loadModel(params['STORE_PATH'], params['RELOAD']) video_model.setOptimizer() ########### ########### Callbacks callbacks = buildCallbacks(params, video_model, dataset) ########### ########### Training total_start_time = timer() logger.debug('Starting training!') training_params = { 'n_epochs': params['MAX_EPOCH'], 'batch_size': params['BATCH_SIZE'], 'homogeneous_batches': params['HOMOGENEOUS_BATCHES'], 'maxlen': params['MAX_OUTPUT_TEXT_LEN'], 'lr_decay': params['LR_DECAY'], 'lr_gamma': params['LR_GAMMA'], 'epochs_for_save': params['EPOCHS_FOR_SAVE'], 'verbose': params['VERBOSE'], 'eval_on_sets': params['EVAL_ON_SETS_KERAS'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'extra_callbacks': callbacks, 'reload_epoch': params['RELOAD'], 'epoch_offset': params['RELOAD'], 'data_augmentation': params['DATA_AUGMENTATION'], 'patience': params.get('PATIENCE', 0), 'metric_check': params.get('STOP_METRIC', None) } video_model.trainNet(dataset, training_params) total_end_time = timer() time_difference = total_end_time - total_start_time logging.info('In total is {0:.2f}s = {1:.2f}m'.format( time_difference, time_difference / 60.0))
def get_model_predictions(asts_path): print("os.getcwd()", os.getcwd()) cur_dir = os.path.dirname(os.path.abspath(__file__)) print("cur_dir", cur_dir) # if not os.path.isdir(os.path.join(os.getcwd(), 'keras')): # print(subprocess.run(f'git clone https://github.com/MarcBS/keras.git', shell=True, # stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)) # # nmt_keras_dir = os.path.join(os.getcwd, 'nmt-keras') # if not os.path.isdir(os.path.join(os.getcwd(), 'nmt-keras')): # print(subprocess.run(f'git clone https://github.com/lvapeab/nmt-keras && cd "nmt-keras" && pipenv install -e .', shell=True, # stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)) # # print(subprocess.run(f'cd {nmt_keras_dir} && pipenv install -e .', shell=True, # # stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)) # print("ran cmds!!!") # sys.path.insert(0, os.path.join(os.getcwd(), 'nmt-keras')) # print("sys path!!!", sys.path) dataset = loadDataset( f'{cur_dir}/assets/epoch_{MODEL_EPOCH}_model_wrapper.pkl') with open('{cur_dir}/assets/params.json', 'r') as params_file: params = json.load(params_file) dataset.setInput(asts_path, 'test', type='text', id='source_text', pad_on_batch=True, tokenization=params['tokenize_x'], fill='end', max_text_len=params['x_max_text_len'], min_occ=0) dataset.setInput(None, 'test', type='ghost', id='state_below', required=False) dataset.setRawInput(asts_path, 'test', type='file-name', id='raw_source_text', overwrite_split=True) nmt_model = loadModel('{cur_dir}/assets', MODEL_EPOCH) prediction_params = get_prediction_params() predictions = nmt_model.predictBeamSearchNet(dataset, params_prediction)['test'] vocab = dataset.vocabulary['target_text']['idx2words'] samples = predictions['samples'] # Get word indices from the samples. predictions = decode_predictions_beam_search(samples, vocab, verbose=params['VERBOSE']) return predictions
def score_corpus(args, params): """ Use one or several translation models for scoring source--target pairs- :param argparse.Namespace args: Arguments given to the method: * dataset: Dataset instance with data. * source: Text file with source sentences. * target: Text file with target sentences. * splits: Splits to sample. Should be already included in the dataset object. * dest: Output file to save scores. * weights: Weight given to each model in the ensemble. You should provide the same number of weights than models. By default, it applies the same weight to each model (1/N). * verbose: Be verbose or not. * config: Config .pkl for loading the model configuration. If not specified, hyperparameters are read from config.py. * models: Path to the models. :param dict params: parameters of the translation model. """ from data_engine.prepare_data import update_dataset_from_file from keras_wrapper.dataset import loadDataset from keras_wrapper.cnn_model import loadModel from keras_wrapper.model_ensemble import BeamSearchEnsemble logging.info("Using an ensemble of %d models" % len(args.models)) models = [loadModel(m, -1, full_path=True) for m in args.models] dataset = loadDataset(args.dataset) dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits, output_text_filename=args.target, compute_state_below=True) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] # Apply scoring extra_vars = dict() extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD']) model_weights = args.weights if model_weights is not None and model_weights != []: assert len(model_weights) == len( models ), 'You should give a weight to each model. You gave %d models and %d weights.' % ( len(models), len(model_weights)) model_weights = map(float, model_weights) if len(model_weights) > 1: logger.info('Giving the following weights to each model: %s' % str(model_weights)) for s in args.splits: # Apply model predictions params_prediction = { 'max_batch_size': params['BATCH_SIZE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'predict_on_sets': [s] } if params['BEAM_SEARCH']: params_prediction['beam_size'] = params['BEAM_SIZE'] params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST'] params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params[ 'OUTPUTS_IDS_DATASET'] params_prediction['normalize_probs'] = params.get( 'NORMALIZE_SAMPLING', False) params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = params.get( 'COVERAGE_PENALTY', False) params_prediction['length_penalty'] = params.get( 'LENGTH_PENALTY', False) params_prediction['length_norm_factor'] = params.get( 'LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = params.get( 'COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = params.get('POS_UNK', False) params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = params.get( 'MAXLEN_GIVEN_X', True) params_prediction[ 'output_max_length_depending_on_x_factor'] = params.get( 'MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = params.get( 'MINLEN_GIVEN_X', True) params_prediction[ 'output_min_length_depending_on_x_factor'] = params.get( 'MINLEN_GIVEN_X_FACTOR', 2) params_prediction['attend_on_output'] = params.get( 'ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower()) beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, model_weights=model_weights, verbose=args.verbose) scores = beam_searcher.scoreNet()[s] # Store result if args.dest is not None: filepath = args.dest # results file if params['SAMPLING_SAVE_MODE'] == 'list': list2file(filepath, scores) elif params['SAMPLING_SAVE_MODE'] == 'numpy': numpy2file(filepath, scores) else: raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.') else: print(scores)
def sample_ensemble(args, params): """ Use several translation models for obtaining predictions from a source text file. :param argparse.Namespace args: Arguments given to the method: * dataset: Dataset instance with data. * text: Text file with source sentences. * splits: Splits to sample. Should be already included in the dataset object. * dest: Output file to save scores. * weights: Weight given to each model in the ensemble. You should provide the same number of weights than models. By default, it applies the same weight to each model (1/N). * n_best: Write n-best list (n = beam size). * config: Config .pkl for loading the model configuration. If not specified, hyperparameters are read from config.py. * models: Path to the models. * verbose: Be verbose or not. :param params: parameters of the translation model. """ from data_engine.prepare_data import update_dataset_from_file from keras_wrapper.model_ensemble import BeamSearchEnsemble from keras_wrapper.cnn_model import loadModel from keras_wrapper.dataset import loadDataset from keras_wrapper.utils import decode_predictions_beam_search logger.info("Using an ensemble of %d models" % len(args.models)) models = [loadModel(m, -1, full_path=True) for m in args.models] dataset = loadDataset(args.dataset) dataset = update_dataset_from_file(dataset, args.text, params, splits=args.splits, remove_outputs=True) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] # For converting predictions into sentences index2word_y = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'][0]]['idx2words'] if params.get('APPLY_DETOKENIZATION', False): detokenize_function = eval('dataset.' + params['DETOKENIZATION_METHOD']) params_prediction = dict() params_prediction['max_batch_size'] = params.get('BATCH_SIZE', 20) params_prediction['n_parallel_loaders'] = params.get('PARALLEL_LOADERS', 1) params_prediction['beam_size'] = params.get('BEAM_SIZE', 6) params_prediction['maxlen'] = params.get('MAX_OUTPUT_TEXT_LEN_TEST', 100) params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET'] params_prediction['search_pruning'] = params.get('SEARCH_PRUNING', False) params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False) params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False) params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False) params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = params.get('POS_UNK', False) params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True) params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True) params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2) params_prediction['attend_on_output'] = params.get('ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower()) params_prediction['glossary'] = params.get('GLOSSARY', None) heuristic = params.get('HEURISTIC', 0) mapping = None if dataset.mapping == dict() else dataset.mapping model_weights = args.weights if args.glossary is not None: glossary = pkl2dict(args.glossary) elif params_prediction['glossary'] is not None: glossary = pkl2dict(params_prediction['glossary']) else: glossary = None if model_weights: assert len(model_weights) == len( models), 'You should give a weight to each model. You gave %d models and %d weights.' % ( len(models), len(model_weights)) model_weights = list(map(float, model_weights)) if len(model_weights) > 1: logger.info('Giving the following weights to each model: %s' % str(model_weights)) for s in args.splits: # Apply model predictions params_prediction['predict_on_sets'] = [s] beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, model_weights=model_weights, n_best=args.n_best, verbose=args.verbose) predictions = beam_searcher.predictBeamSearchNet()[s] samples = predictions['samples'] alphas = predictions['alphas'] if params_prediction['pos_unk'] else None if params_prediction['pos_unk']: sources = [x.strip() for x in open(args.text, 'r').read().split('\n')] sources = sources[:-1] if len(sources[-1]) == 0 else sources else: sources = None decoded_predictions = decode_predictions_beam_search(samples, index2word_y, glossary=glossary, alphas=alphas, x_text=sources, heuristic=heuristic, mapping=mapping, verbose=args.verbose) # Apply detokenization function if needed if params.get('APPLY_DETOKENIZATION', False): decoded_predictions = list(map(detokenize_function, decoded_predictions)) if args.n_best: n_best_predictions = [] for i, (n_best_preds, n_best_scores, n_best_alphas) in enumerate(predictions['n_best']): n_best_sample_score = [] for n_best_pred, n_best_score, n_best_alpha in zip(n_best_preds, n_best_scores, n_best_alphas): pred = decode_predictions_beam_search([n_best_pred], index2word_y, glossary=glossary, alphas=[n_best_alpha] if params_prediction[ 'pos_unk'] else None, x_text=[sources[i]] if params_prediction['pos_unk'] else None, heuristic=heuristic, mapping=mapping, verbose=args.verbose) # Apply detokenization function if needed if params.get('APPLY_DETOKENIZATION', False): pred = list(map(detokenize_function, pred)) n_best_sample_score.append([i, pred, n_best_score]) n_best_predictions.append(n_best_sample_score) # Store result if args.dest is not None: filepath = args.dest # results file if params.get('SAMPLING_SAVE_MODE', 'list'): list2file(filepath, decoded_predictions) if args.n_best: nbest2file(filepath + '.nbest', n_best_predictions) else: raise Exception('Only "list" is allowed in "SAMPLING_SAVE_MODE"') else: list2stdout(decoded_predictions) if args.n_best: logger.info('Storing n-best sentences in ./' + s + '.nbest') nbest2file('./' + s + '.nbest', n_best_predictions) logger.info('Sampling finished')
def apply_NMT_model(params, load_dataset=None): """ Sample from a previously trained model. :param params: Dictionary of network hyperparameters. :param load_dataset: Load dataset from file or build it from the parameters. :return: None """ # Load data if load_dataset is None: dataset = build_dataset(params) else: dataset = loadDataset(load_dataset) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] # Load model nmt_model = loadModel(params['STORE_PATH'], params['RELOAD'], reload_epoch=params['RELOAD_EPOCH']) # Evaluate training extra_vars = { 'language': params.get('TRG_LAN', 'en'), 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'tokenize_f': eval('dataset.' + params['TOKENIZATION_METHOD']), 'detokenize_f': eval('dataset.' + params['DETOKENIZATION_METHOD']), 'apply_detokenization': params['APPLY_DETOKENIZATION'], 'tokenize_hypotheses': params['TOKENIZE_HYPOTHESES'], 'tokenize_references': params['TOKENIZE_REFERENCES'], } input_text_id = params['INPUTS_IDS_DATASET'][0] vocab_x = dataset.vocabulary[input_text_id]['idx2words'] vocab_y = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'][0]]['idx2words'] if params['BEAM_SEARCH']: extra_vars['beam_size'] = params.get('BEAM_SIZE', 6) extra_vars['state_below_index'] = params.get('BEAM_SEARCH_COND_INPUT', -1) extra_vars['maxlen'] = params.get('MAX_OUTPUT_TEXT_LEN_TEST', 30) extra_vars['optimized_search'] = params.get('OPTIMIZED_SEARCH', True) extra_vars['model_inputs'] = params['INPUTS_IDS_MODEL'] extra_vars['model_outputs'] = params['OUTPUTS_IDS_MODEL'] extra_vars['dataset_inputs'] = params['INPUTS_IDS_DATASET'] extra_vars['dataset_outputs'] = params['OUTPUTS_IDS_DATASET'] extra_vars['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False) extra_vars['search_pruning'] = params.get('SEARCH_PRUNING', False) extra_vars['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0) extra_vars['coverage_penalty'] = params.get('COVERAGE_PENALTY', False) extra_vars['length_penalty'] = params.get('LENGTH_PENALTY', False) extra_vars['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0) extra_vars['coverage_norm_factor'] = params.get( 'COVERAGE_NORM_FACTOR', 0.0) extra_vars['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \ else params.get('MAX_OUTPUT_TEXT_LEN', 50) extra_vars['pos_unk'] = params['POS_UNK'] extra_vars['output_max_length_depending_on_x'] = params.get( 'MAXLEN_GIVEN_X', True) extra_vars['output_max_length_depending_on_x_factor'] = params.get( 'MAXLEN_GIVEN_X_FACTOR', 3) extra_vars['output_min_length_depending_on_x'] = params.get( 'MINLEN_GIVEN_X', True) extra_vars['output_min_length_depending_on_x_factor'] = params.get( 'MINLEN_GIVEN_X_FACTOR', 2) extra_vars['attend_on_output'] = params.get( 'ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower()) if params['POS_UNK']: extra_vars['heuristic'] = params['HEURISTIC'] if params['HEURISTIC'] > 0: extra_vars['mapping'] = dataset.mapping for s in params["EVAL_ON_SETS"]: extra_vars[s] = dict() extra_vars[s]['references'] = dataset.extra_variables[s][ params['OUTPUTS_IDS_DATASET'][0]] callback_metric = PrintPerformanceMetricOnEpochEndOrEachNUpdates( nmt_model, dataset, gt_id=params['OUTPUTS_IDS_DATASET'][0], metric_name=params['METRICS'], set_name=params['EVAL_ON_SETS'], batch_size=params['BATCH_SIZE'], each_n_epochs=params['EVAL_EACH'], extra_vars=extra_vars, reload_epoch=params['RELOAD'], is_text=True, input_text_id=input_text_id, save_path=nmt_model.model_path, index2word_y=vocab_y, index2word_x=vocab_x, sampling_type=params['SAMPLING'], beam_search=params['BEAM_SEARCH'], start_eval_on_epoch=params['START_EVAL_ON_EPOCH'], write_samples=True, write_type=params['SAMPLING_SAVE_MODE'], eval_on_epochs=params['EVAL_EACH_EPOCHS'], save_each_evaluation=False, verbose=params['VERBOSE']) callback_metric.evaluate( params['RELOAD'], counter_name='epoch' if params['EVAL_EACH_EPOCHS'] else 'update')
def test_2models_allclose(model1, model2, rtol=1e-05, atol=1e-08, verbose=0): if isinstance(model1, str): from keras_wrapper.cnn_model import loadModel model1 = loadModel(model1, -1, full_path=True) model1_init = model1.model_init model1_next = model1.model_next model1 = model1.model if isinstance(model2, str): from keras_wrapper.cnn_model import loadModel model2 = loadModel(model2, -1, full_path=True) model2_init = model2.model_init model2_next = model2.model_next model2 = model2.model logging.info("Checking all models (from model 1) are close...") if test_models_allclose(model1, model1_init, model1_next, rtol=rtol, atol=atol, verbose=verbose): logging.info("All close") else: logging.info("Not close!") logging.info("Checking all models (from model 2) are close...") if test_models_allclose(model2, model2_init, model2_next, rtol=rtol, atol=atol, verbose=verbose): logging.info("All close") else: logging.info("Not close!") model1_names = map(str, model1.weights) model2_names = map(str, model2.weights) if verbose > 0: print("===========================") print("Checking model weights") logging.info("Checking model_next 1 is close to model_next 2") if model1 is not None: model_next_names = map(str, model1.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model2_names.index(name) if not np.allclose(model2.weights[index_model].get_value(), model1.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model2 index: ' + str(index_model) + ' model1 index ' + str(index_next) + ')') if verbose > 0: print("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if model2 is not None: model_next_names = map(str, model2.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model1_names.index(name) if not np.allclose(model1.weights[index_model].get_value(), model2.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model1 index: ' + str(index_model) + ' model2 index ' + str(index_next) + ')') if verbose > 0: print("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if verbose > 0: print("Checking model next weights") logging.info("Checking model_next 1 is close to model_next 2") if model1_next is not None: model_next_names = map(str, model1_next.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model2_names.index(name) if not np.allclose(model2.weights[index_model].get_value(), model1_next.weights[index_next].get_value(), rtol=rtol, atol=atol): raise ('Parameters ' + name + ' are not close! (model2 index: ' + str(index_model) + ' model1_next index ' + str(index_next) + ')') if verbose > 0: print("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if model2_next is not None: model_next_names = map(str, model2_next.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model1_names.index(name) if not np.allclose(model1.weights[index_model].get_value(), model2_next.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model1 index: ' + str(index_model) + ' model2_next index ' + str(index_next) + ')') if verbose > 0: print("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if verbose > 0: print("Checking model init weights") logging.info("Checking model_next 1 is close to model_next 2") if model1_init is not None: model_next_names = map(str, model1_init.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model2_names.index(name) if not np.allclose(model2.weights[index_model].get_value(), model1_init.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model2 index: ' + str(index_model) + ' model1_init index ' + str(index_next) + ')') if verbose > 0: print("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") if model2_init is not None: model_next_names = map(str, model2_init.weights) for (index_next, name) in list(enumerate(model_next_names)): index_model = model1_names.index(name) if not np.allclose(model1.weights[index_model].get_value(), model2_init.weights[index_next].get_value(), rtol=rtol, atol=atol): raise AssertionError('Parameters ' + name + ' are not close! (model1 index: ' + str(index_model) + ' model2_next index ' + str(index_next) + ')') if verbose > 0: print("Weights", name, "(position ", index_next, "at model_next - position", index_model, "at model are close") return True
help="Path to the models") parser.add_argument( "-ch", "--changes", nargs="*", help="Changes to the config. Following the syntax Key=Value", default="") return parser.parse_args() if __name__ == "__main__": args = parse_args() models = args.models logging.info("Using an ensemble of %d models" % len(args.models)) models = [loadModel(m, -1, full_path=True) for m in args.models] if args.config is None: logging.info("Reading parameters from config.py") from config import load_parameters params = load_parameters() else: logging.info("Loading parameters from %s" % str(args.config)) params = pkl2dict(args.config) try: for arg in args.changes: try: k, v = arg.split('=') except ValueError: print 'Overwritten arguments must have the form key=Value. \n Currently are: %s' % str( args.changes) exit(1)
id='source_text', pad_on_batch=True, tokenization='tokenize_basic', fill='end', max_text_len=100, min_occ=0) dataset.setInput(None, 'test', type='ghost', id='state_below', required=False) ## get model predictions params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ params['OUTPUTS_IDS_DATASET'][0]] Ross_model = loadModel('trained_models/Ross_M7', 36) params_prediction = { 'max_batch_size': 50, 'predict_on_sets': ['test'], 'beam_size': 12, 'maxlen': 50, 'model_inputs': ['source_text', 'state_below'], 'model_outputs': ['target_text'], 'dataset_inputs': ['source_text', 'state_below'], 'dataset_outputs': ['target_text'], 'normalize': True, 'alpha_factor': 0.6 } Ross_predictions = Ross_model.predictBeamSearchNet(dataset,
def main(params): """ Main function """ if(params['RELOAD'] > 0): logging.info('Resuming training.') check_params(params) ########### Load data dataset = build_dataset(params) params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]] params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]] ########### ########### Build model if(params['RELOAD'] == 0): # build new model vqa = VQA_Model(params, type=params['MODEL_TYPE'], verbose=params['VERBOSE'], model_name=params['MODEL_NAME'], vocabularies=dataset.vocabulary, store_path=params['STORE_PATH']) # Define the inputs and outputs mapping from our Dataset instance to our model inputMapping = dict() for i, id_in in enumerate(params['INPUTS_IDS_DATASET']): pos_source = dataset.ids_inputs.index(id_in) id_dest = vqa.ids_inputs[i] inputMapping[id_dest] = pos_source vqa.setInputsMapping(inputMapping) outputMapping = dict() for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']): pos_target = dataset.ids_outputs.index(id_out) id_dest = vqa.ids_outputs[i] outputMapping[id_dest] = pos_target vqa.setOutputsMapping(outputMapping) else: # resume from previously trained model vqa = loadModel(params['STORE_PATH'], params['RELOAD']) vqa.setOptimizer() ########### ########### Callbacks callbacks = buildCallbacks(params, vqa, dataset) ########### ########### Training total_start_time = timer() logger.debug('Starting training!') training_params = {'n_epochs': params['MAX_EPOCH'], 'batch_size': params['BATCH_SIZE'], 'lr_decay': params['LR_DECAY'], 'lr_gamma': params['LR_GAMMA'], 'epochs_for_save': params['EPOCHS_FOR_SAVE'], 'verbose': params['VERBOSE'], 'eval_on_sets': params['EVAL_ON_SETS'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'extra_callbacks': callbacks, 'reload_epoch': params['RELOAD']} vqa.trainNet(dataset, training_params) total_end_time = timer() time_difference = total_end_time - total_start_time logging.info('In total is {0:.2f}s = {1:.2f}m'.format(time_difference, time_difference / 60.0))
def train_model(params, specific_params): """ Main function """ ########### Load data if 'SPLITS' in specific_params.keys(): dataset, inputs_mapping, outputs_mapping, class_weights = build_dataset( params, specific_params, empty_label=specific_params['EMPTY_LABEL'], splits=specific_params['SPLITS']) # dataset used for evaluation else: dataset, inputs_mapping, outputs_mapping, class_weights = build_dataset( params, specific_params, empty_label=specific_params['EMPTY_LABEL']) ########### ########### Build model params = mergeParams(params, specific_params) if params['RELOAD'] == 0: food_model = Food_Model(params, type=specific_params['MODEL_TYPE'], verbose=params['VERBOSE'], empty_label=params['EMPTY_LABEL'], model_name=specific_params['MODEL_NAME'], store_path=specific_params['STORE_PATH']) else: # Reload model previusly trained food_model = loadModel(params['STORE_PATH'], params['RELOAD']) # Define the inputs and outputs mapping from our Dataset instance to our model food_model.setInputsMapping(inputs_mapping) food_model.setOutputsMapping(outputs_mapping) # Update optimizer either if we are loading or building a model food_model.params = params food_model.setOptimizer() ########### ########### Callbacks callbacks = buildCallbacks(params, specific_params, food_model, dataset) ########### ########### Training total_start_time = timer() logger.debug('Starting training!') training_params = { 'n_epochs': specific_params['MAX_EPOCH'], 'batch_size': specific_params['BATCH_SIZE'], 'lr_decay': specific_params['LR_DECAY'], 'lr_gamma': specific_params['LR_GAMMA'], 'epochs_for_save': params['EPOCHS_FOR_SAVE'], 'verbose': params['VERBOSE'], 'n_parallel_loaders': params['PARALLEL_LOADERS'], 'extra_callbacks': callbacks, 'reload_epoch': params['RELOAD'], 'epoch_offset': params['RELOAD'], 'data_augmentation': params['DATA_AUGMENTATION'], 'patience': specific_params['PATIENCE'], 'metric_check': specific_params['STOP_METRIC'], 'class_weights': class_weights, } food_model.trainNet(dataset, training_params) total_end_time = timer() time_difference = total_end_time - total_start_time logging.info('Total time spent {0:.2f}s = {1:.2f}m'.format( time_difference, time_difference / 60.0))
def main(): args = parse_args() server_address = (args.address, args.port) httpd = HTTPServer(server_address, NMTHandler) logger.setLevel(args.logging_level) parameters = load_parameters() if args.config is not None: logger.info("Loading parameters from %s" % str(args.config)) parameters = update_parameters(parameters, pkl2dict(args.config)) if args.online: online_parameters = load_parameters_online() parameters = update_parameters(parameters, online_parameters) try: for arg in args.changes: try: k, v = arg.split('=') except ValueError: print( 'Overwritten arguments must have the form key=Value. \n Currently are: %s' % str(args.changes)) exit(1) try: parameters[k] = ast.literal_eval(v) except ValueError: parameters[k] = v except ValueError: print('Error processing arguments: (', k, ",", v, ")") exit(2) dataset = loadDataset(args.dataset) # For converting predictions into sentences # Dataset backwards compatibility bpe_separator = dataset.BPE_separator if hasattr( dataset, "BPE_separator") and dataset.BPE_separator is not None else '@@' # Build BPE tokenizer if necessary if 'bpe' in parameters['TOKENIZATION_METHOD'].lower(): logger.info('Building BPE') if not dataset.BPE_built: dataset.build_bpe(parameters.get( 'BPE_CODES_PATH', parameters['DATA_ROOT_PATH'] + '/training_codes.joint'), separator=bpe_separator) # Build tokenization function tokenize_f = eval('dataset.' + parameters.get('TOKENIZATION_METHOD', 'tokenize_bpe')) detokenize_function = eval( 'dataset.' + parameters.get('DETOKENIZATION_METHOD', 'detokenize_bpe')) dataset.build_moses_tokenizer(language=parameters['SRC_LAN']) dataset.build_moses_detokenizer(language=parameters['TRG_LAN']) tokenize_general = dataset.tokenize_moses detokenize_general = dataset.detokenize_moses # Prediction parameters params_prediction = dict() params_prediction['max_batch_size'] = parameters.get('BATCH_SIZE', 20) params_prediction['n_parallel_loaders'] = parameters.get( 'PARALLEL_LOADERS', 1) params_prediction['beam_size'] = parameters.get('BEAM_SIZE', 6) params_prediction['maxlen'] = parameters.get('MAX_OUTPUT_TEXT_LEN_TEST', 100) params_prediction['optimized_search'] = parameters['OPTIMIZED_SEARCH'] params_prediction['model_inputs'] = parameters['INPUTS_IDS_MODEL'] params_prediction['model_outputs'] = parameters['OUTPUTS_IDS_MODEL'] params_prediction['dataset_inputs'] = parameters['INPUTS_IDS_DATASET'] params_prediction['dataset_outputs'] = parameters['OUTPUTS_IDS_DATASET'] params_prediction['search_pruning'] = parameters.get( 'SEARCH_PRUNING', False) params_prediction['normalize_probs'] = True params_prediction['alpha_factor'] = parameters.get('ALPHA_FACTOR', 1.0) params_prediction['coverage_penalty'] = True params_prediction['length_penalty'] = True params_prediction['length_norm_factor'] = parameters.get( 'LENGTH_NORM_FACTOR', 0.0) params_prediction['coverage_norm_factor'] = parameters.get( 'COVERAGE_NORM_FACTOR', 0.0) params_prediction['pos_unk'] = parameters.get('POS_UNK', False) params_prediction['heuristic'] = parameters.get('HEURISTIC', 0) params_prediction['state_below_index'] = -1 params_prediction['output_text_index'] = 0 params_prediction['state_below_maxlen'] = -1 if parameters.get( 'PAD_ON_BATCH', True) else parameters.get('MAX_OUTPUT_TEXT_LEN', 50) params_prediction['output_max_length_depending_on_x'] = parameters.get( 'MAXLEN_GIVEN_X', True) params_prediction[ 'output_max_length_depending_on_x_factor'] = parameters.get( 'MAXLEN_GIVEN_X_FACTOR', 3) params_prediction['output_min_length_depending_on_x'] = parameters.get( 'MINLEN_GIVEN_X', True) params_prediction[ 'output_min_length_depending_on_x_factor'] = parameters.get( 'MINLEN_GIVEN_X_FACTOR', 2) params_prediction['attend_on_output'] = parameters.get( 'ATTEND_ON_OUTPUT', 'transformer' in parameters['MODEL_TYPE'].lower()) # Manage pos_unk strategies if parameters['POS_UNK']: mapping = None if dataset.mapping == dict() else dataset.mapping else: mapping = None if 'transformer' in parameters['MODEL_TYPE'].lower(): params_prediction['pos_unk'] = False params_prediction['coverage_penalty'] = False # Training parameters parameters_training = dict() if args.online: logger.info('Loading models from %s' % str(args.models)) parameters_training = { # Traning parameters 'n_epochs': parameters['MAX_EPOCH'], 'shuffle': False, 'loss': parameters.get('LOSS', 'categorical_crossentropy'), 'batch_size': parameters.get('BATCH_SIZE', 1), 'homogeneous_batches': False, 'optimizer': parameters.get('OPTIMIZER', 'SGD'), 'lr': parameters.get('LR', 0.1), 'lr_decay': parameters.get('LR_DECAY', None), 'lr_gamma': parameters.get('LR_GAMMA', 1.), 'epochs_for_save': -1, 'verbose': args.verbose, 'eval_on_sets': parameters.get('EVAL_ON_SETS_KERAS', None), 'n_parallel_loaders': parameters['PARALLEL_LOADERS'], 'extra_callbacks': [], # callbacks, 'reload_epoch': parameters['RELOAD'], 'epoch_offset': parameters['RELOAD'], 'data_augmentation': parameters['DATA_AUGMENTATION'], 'patience': parameters.get('PATIENCE', 0), 'metric_check': parameters.get('STOP_METRIC', None), 'eval_on_epochs': parameters.get('EVAL_EACH_EPOCHS', True), 'each_n_epochs': parameters.get('EVAL_EACH', 1), 'start_eval_on_epoch': parameters.get('START_EVAL_ON_EPOCH', 0), 'additional_training_settings': { 'k': parameters.get('K', 1), 'tau': parameters.get('TAU', 1), 'lambda': parameters.get('LAMBDA', 0.5), 'c': parameters.get('C', 0.5), 'd': parameters.get('D', 0.5) } } model_instances = [ TranslationModel( parameters, model_type=parameters['MODEL_TYPE'], verbose=parameters['VERBOSE'], model_name=parameters['MODEL_NAME'] + '_' + str(i), vocabularies=dataset.vocabulary, store_path=parameters['STORE_PATH'], set_optimizer=False) for i in range(len(args.models)) ] models = [ updateModel(model, path, -1, full_path=True) for (model, path) in zip(model_instances, args.models) ] else: models = [loadModel(m, -1, full_path=True) for m in args.models] for nmt_model in models: nmt_model.setParams(parameters) nmt_model.setOptimizer() parameters['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ parameters['INPUTS_IDS_DATASET'][0]] parameters['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[ parameters['OUTPUTS_IDS_DATASET'][0]] # Get word2index and index2word dictionaries index2word_y = dataset.vocabulary[parameters['OUTPUTS_IDS_DATASET'] [0]]['idx2words'] word2index_y = dataset.vocabulary[parameters['OUTPUTS_IDS_DATASET'] [0]]['words2idx'] index2word_x = dataset.vocabulary[parameters['INPUTS_IDS_DATASET'] [0]]['idx2words'] word2index_x = dataset.vocabulary[parameters['INPUTS_IDS_DATASET'] [0]]['words2idx'] excluded_words = None interactive_beam_searcher = NMTSampler(models, dataset, parameters, params_prediction, parameters_training, tokenize_f, detokenize_function, tokenize_general, detokenize_general, mapping=mapping, word2index_x=word2index_x, word2index_y=word2index_y, index2word_y=index2word_y, eos_symbol=args.eos_symbol, excluded_words=excluded_words, online=args.online, verbose=args.verbose) httpd.sampler = interactive_beam_searcher logger.info('Server starting at %s' % str(server_address)) httpd.serve_forever()
def classifyFood101(): from keras_wrapper.cnn_model import CNN_Model, loadModel, saveModel logging.info('Defining CNN model and training it.') # Load food classification dataset dataset_name = 'Food101' ds = loadDataset('Datasets/Dataset_' + dataset_name + '.pkl') # The network we are going to use needs an image input size of [224,224,3] # for this reason we have to communicate this to the dataset instance in charge of loading the data ds.img_size_crop['images'] = [224, 224, 3] # Create VGG model and load weights model_name = 'VGG_16_FunctionalAPI' net = CNN_Model( type='VGG_16_FunctionalAPI', model_name=model_name, input_shape=[224, 224, 3], weights_path='/media/HDD_2TB/CNN_MODELS/VGG/vgg16_weights.h5', seq_to_functional=True ) # we are setting the weights of a Sequential model into a FunctionalAPI one # Reformat net output layer for the number of classes in our dataset n_classes = len(ds.classes['labels']) vis_input = net.model.get_layer('vis_input').output # input layer drop = net.model.get_layer('last_dropout').output # layer before final FC output = Dense(n_classes, activation='softmax', name='output')(drop) # redefine FC-softmax layer net.model = Model(input=vis_input, output=output) # define inputs and outputs # Compile net.setOptimizer(lr=0.001, metrics=['accuracy']) # Define the inputs and outputs mapping from our Dataset instance to our CNN_Model instance # set input and output mappings from dataset to network pos_images = ds.types_inputs.index('image') pos_labels = ds.types_outputs.index('categorical') # the first input of our dataset (pos_images) will also be the first input of our model (named vis_input) inputMapping = {'vis_input': pos_images} net.setInputsMapping(inputMapping) # the first output of our dataset (pos_labels) will also be the first output of our model (named output) outputMapping = {'output': pos_labels} net.setOutputsMapping(outputMapping, acc_output='output') # Save model saveModel(net, 0) # Load model net = loadModel('Models/' + model_name, 0) # the model must be compiled again when loaded net.setOptimizer(lr=0.001, metrics=['accuracy']) # Apply short training (1 epoch) # training_params = {'n_epochs': 1, 'batch_size': 50, # 'lr_decay': 2, 'lr_gamma': 0.8, # 'epochs_for_save': 1, 'verbose': 1, 'eval_on_sets': ['val']} # net.trainNet(ds, training_params) # Test network on test set test_params = {'batch_size': 50} # net.testNet(ds, test_params) # Predict network on all sets test_params['predict_on_sets'] = ['val'] predictions = net.predictNet(ds, test_params) logging.info("Predicted %d samples." % (len(predictions))) logging.info("Done")