def interact(dic_path, model_full_name, dir_model, debug=False, model_specific_dictionary=True, beam_size=2, word_decoding=False, extra_arg_specific_label="", save_attention=False, show_attention=False, beam_decode=False, max_len=MAX_LEN, verbose=2): from model.seq2seq import LexNormalizer assert model_specific_dictionary char_dictionary = None voc_size = None if not debug: pdb.set_trace = lambda: 1 model = LexNormalizer(generator=Generator, voc_size=voc_size, load=True, model_full_name=model_full_name, model_specific_dictionary=model_specific_dictionary, dict_path=dic_path, dir_model=dir_model, extra_arg_specific_label=extra_arg_specific_label, loading_sanity_test=True, word_decoding=word_decoding, char_decoding=not word_decoding, verbose=verbose) model.eval() if show_attention or save_attention: assert model.decoder.attn_layer is not None, "ERROR : no attention to plot " if save_attention: dir_attention = os.path.join(dir_model, "attention_plot") if os.path.isdir(dir_attention): info = "existing" else: os.mkdir(dir_attention) info = "created" printing("Saving to {} {}", var=[info, dir_attention], verbose_level=1, verbose=verbose) else: dir_attention = None decode_interacively(max_len=max_len, model=model, char_dictionary=char_dictionary, sent_mode=True, dir_attention=dir_attention, save_attention=save_attention, show_attention=show_attention, beam_decode=beam_decode, beam_size=beam_size, showing_attention=show_attention, verbose=verbose)
def predict(batch_size, data_path, dict_path, model_full_name, bucket=False, model_specific_dictionary=True, print_raw=False, dir_normalized=None, dir_original=None, get_batch_mode=False, normalization=True, debug=False, use_gpu=None, verbose=0): assert model_specific_dictionary, "ERROR : only model_specific_dictionary = True supported now" # NB : now : you have to load dictionary when evaluating (cannot recompute) (could add in the LexNormalizer ability) use_gpu = use_gpu_(use_gpu) hardware_choosen = "GPU" if use_gpu else "CPU" printing("{} mode ", var=([hardware_choosen]), verbose_level=0, verbose=verbose) if not debug: pdb.set_trace = lambda: 1 model = LexNormalizer(generator=Generator, load=True, model_full_name=model_full_name, voc_size=None, use_gpu=use_gpu, dict_path=dict_path, model_specific_dictionary=True, dir_model=os.path.join(PROJECT_PATH, "checkpoints", model_full_name + "-folder"), char_decoding=True, word_decoding=False, verbose=verbose ) data_read = conllu_data.read_data_to_variable(data_path, model.word_dictionary, model.char_dictionary, model.pos_dictionary, model.xpos_dictionary, model.type_dictionary, use_gpu=use_gpu, norm_not_norm=model.auxilliary_task_norm_not_norm, symbolic_end=True, symbolic_root=True, dry_run=0, lattice=False, verbose=verbose, normalization=normalization, bucket=bucket, add_start_char=1, add_end_char=1) batchIter = data_gen_conllu(data_read, model.word_dictionary, model.char_dictionary, batch_size=batch_size, get_batch_mode=False, normalization=normalization, print_raw=print_raw, verbose=verbose) model.eval() greedy_decode_batch(char_dictionary=model.char_dictionary, verbose=verbose, gold_output=False, use_gpu=use_gpu, write_output=True, label_data=REPO_DATASET[data_path], batchIter=batchIter, model=model, dir_normalized=dir_normalized, dir_original=dir_original, batch_size=batch_size)
def evaluate(model_full_name, dir, nbatches=50, V=5, batch=2, seq_len=10, generalize_extra=0, verbose=2): model = LexNormalizer(load=True, dir_model=dir, model_full_name=model_full_name, generator=Generator, voc_size=9, verbose=verbose) model.eval() loss = run_epoch( data_gen_dummy(V, batch=batch, nbatches=10, seed=SEED, sent_len=seq_len + generalize_extra, verbose=verbose), model, LossCompute(model.generator)) print("Final Loss {} ".format(loss)) return loss
def train_1_epoch(epochs=EPOCHS, seq_len=10, generalize_extra=0, nbatches=50, verbose=2, lr=0.001, V=5, batch=2): model = LexNormalizer(generator=Generator, char_embedding_dim=5, hidden_size_encoder=11, voc_size=9, hidden_size_sent_encoder=9, output_dim=10, hidden_size_decoder=11, verbose=verbose) # optimizer adam = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9) for epoch in tqdm(range(epochs), disable_tqdm_level(verbose=verbose, verbose_level=0)): model.train() run_epoch(data_gen_dummy(V=V, batch=batch, nbatches=nbatches, sent_len=seq_len, verbose=verbose, seed=SEED), model, LossCompute(model.generator, opt=adam, verbose=verbose), verbose=verbose, i_epoch=epoch, n_epochs=EPOCHS, n_batches=nbatches) loss = run_epoch( data_gen_dummy(V, batch=batch, nbatches=10, seed=SEED, sent_len=seq_len + generalize_extra, verbose=verbose), model, LossCompute(model.generator)) print("Final Loss {} ".format(loss)) dir, model_full_name = model.save("./test_logs", model, verbose=verbose, info_checkpoint=TEMPLATE_INFO_CHECKPOINT) return dir, model_full_name, loss
xpos_dictionary, type_dictionary = conllu_data.create_dict(dict_path=dict_path, train_path=test_path, dev_path=test_path, test_path=test_path, word_embed_dict={}, dry_run=False, add_start_char=add_start_char, vocab_trim=True) V = len(char_dictionary.instance2index) + 1 print("Character vocabulary is {} length".format(V)) model = LexNormalizer(generator=Generator, char_embedding_dim=5, voc_size=V, hidden_size_encoder=11, output_dim=10, hidden_size_decoder=11, verbose=verbose) batchIter = data_gen_conllu(test_path, word_dictionary, char_dictionary, pos_dictionary, xpos_dictionary, type_dictionary, normalization=True, add_start_char=add_start_char, add_end_char=0, batch_size=2, nbatch=nbatches,
from io_.data_iterator import data_gen_dummy from model.loss import LossCompute import matplotlib.pyplot as plt from tracking.plot_loss import simple_plot from tqdm import tqdm from io_.info_print import disable_tqdm_level import pdb # hyperparameters V = 5 lr = 0.001 # optimizer verbose = 1 model = LexNormalizer(generator=Generator, char_embedding_dim=5, hidden_size_encoder=12, voc_size=9, output_dim=50, hidden_size_sent_encoder=10, hidden_size_decoder=11, verbose=verbose) adam = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9) # reporting training_loss = [] nbatches = 1 EPOCHS = 20 seq_len = 10 generalize_extra = 5 def _test_overfit_dummy(): pdb.set_trace = lambda: 1 for epoch in tqdm(range(EPOCHS), disable_tqdm_level(verbose=verbose, verbose_level=0)): model.train() run_epoch(data_gen_dummy(V=V, batch=2, nbatches=nbatches, sent_len=seq_len, verbose=verbose),
def train(train_path, dev_path, n_epochs, normalization, dict_path=None, pos_specific_path=None, expand_vocab_dev_test=False, checkpointing_metric="loss-dev-all", batch_size=10, test_path=None, label_train="", label_dev="", use_gpu=None, lr=0.001, n_layers_word_encoder=1, n_layers_sent_cell=1, get_batch_mode_all=True, dropout_sent_encoder_cell=0, dropout_word_encoder_cell=0, dropout_word_decoder_cell=0, dropout_bridge=0, drop_out_word_encoder_out=0, drop_out_sent_encoder_out=0, dir_word_encoder=1, word_embed=False, word_embedding_dim=None, word_embedding_projected_dim=None, mode_word_encoding="cat", char_level_embedding_projection_dim=0, word_recurrent_cell_encoder=None, word_recurrent_cell_decoder=None, drop_out_char_embedding_decoder=0, hidden_size_encoder=None, output_dim=None, char_embedding_dim=None, hidden_size_decoder=None, hidden_size_sent_encoder=None, freq_scoring=5, compute_scoring_curve=False, score_to_compute_ls=None, mode_norm_ls=None, checkpointing=True, freq_checkpointing=None, freq_writer=None, model_dir=None, reload=False, model_full_name=None, model_id_pref="", print_raw=False, model_specific_dictionary=False, dir_sent_encoder=1, add_start_char=None, add_end_char=1, overall_label="DEFAULT", overall_report_dir=CHECKPOINT_DIR, compute_mean_score_per_sent=False, weight_binary_loss=1, dense_dim_auxilliary=None, dense_dim_auxilliary_2=None, unrolling_word=False, char_src_attention=False, debug=False, timing=False, dev_report_loss=True, bucketing=True, policy=None, teacher_force=True, proportion_pred_train=None, shared_context="all", clipping=None, extend_n_batch=1, stable_decoding_state=False, init_context_decoder=True, dense_dim_auxilliary_pos=None, dense_dim_auxilliary_pos_2=None, tasks=None, word_decoding=False, char_decoding=True, dense_dim_word_pred=None, dense_dim_word_pred_2=None, dense_dim_word_pred_3=None, symbolic_root=False, symbolic_end=False, extern_emb_dir=None, activation_word_decoder=None, activation_char_decoder=None, extra_arg_specific_label="", freezing_mode=False, freeze_ls_param_prefix=None, multi_task_loss_ponderation=None, max_char_len=None, attention_tagging=False, dropout_input=None, optimizer="adam", verbose=1): if multi_task_loss_ponderation is not None: sanity_check_loss_poneration(multi_task_loss_ponderation, verbose=verbose) if teacher_force: assert proportion_pred_train is None, "proportion_pred_train should be None as teacher_force mode" else: assert 100 > proportion_pred_train > 0, "proportion_pred_train should be between 0 and 100" auxilliary_task_norm_not_norm = "norm_not_norm" in tasks # auxilliary_task_norm_not_norm auxilliary_task_pos = "pos" in tasks if "normalize" not in tasks: word_decoding = False char_decoding = False if not unrolling_word: assert not char_src_attention, "ERROR attention requires step by step unrolling " printing("WARNING bucketing is {} ", var=bucketing, verbose=verbose, verbose_level=1) if freq_writer is None: freq_writer = freq_checkpointing printing("REPORTING freq_writer set to freq_checkpointing {}", var=[freq_checkpointing], verbose=verbose, verbose_level=1) if auxilliary_task_norm_not_norm: printing( "MODEL : training model with auxillisary task (loss weighted with {})", var=[weight_binary_loss], verbose=verbose, verbose_level=1) #if compute_scoring_curve: #assert score_to_compute_ls is not None and mode_norm_ls is not None and freq_scoring is not None, \ # "ERROR score_to_compute_ls and mode_norm_ls should not be None" use_gpu = use_gpu_(use_gpu) hardware_choosen = "GPU" if use_gpu else "CPU" printing("{} hardware mode ", var=([hardware_choosen]), verbose_level=0, verbose=verbose) freq_checkpointing = int( n_epochs / 10 ) if checkpointing and freq_checkpointing is None else freq_checkpointing assert add_start_char == 1, "ERROR : add_start_char must be activated due decoding behavior of output_text_" printing("WARNING : add_start_char is {} and add_end_char {} ".format( add_start_char, add_end_char), verbose=verbose, verbose_level=0) printing("TRAINING : checkpointing every {} epoch", var=freq_checkpointing, verbose=verbose, verbose_level=1) if reload: assert model_full_name is not None and len( model_id_pref ) == 0 and model_dir is not None and dict_path is not None else: assert model_full_name is None and model_dir is None if not debug: pdb.set_trace = lambda: None loss_training = [] loss_developing = [] # was not able to use the template cause no more reinitialization of the variable loss_details_template = { 'loss_seq_prediction': [], 'other': {}, 'loss_binary': [], 'loss_overall': [] } if auxilliary_task_norm_not_norm else None # used for computed scores for early stoping if checkpoint_metric != loss and for curves plot evaluation_set_reporting = dev_path mode_norm_ls = ["all"] score_to_compute_ls = ["exact_match"] print( "WARNING :train.py overwriting mode_norm_ls score_to_compute_ls argument " ) curve_scores = { score + "-" + mode_norm + "-" + REPO_DATASET[data]: [] for score in score_to_compute_ls for mode_norm in mode_norm_ls for data in evaluation_set_reporting } if compute_scoring_curve else None printing("WARNING : lr {} ".format(lr, add_start_char, add_end_char), verbose=verbose, verbose_level=0) printing( "INFO : dictionary is computed (re)created from scratch on train_path {} and dev_path {}" .format(train_path, dev_path), verbose=verbose, verbose_level=1) if not model_specific_dictionary: word_dictionary, char_dictionary, pos_dictionary, \ xpos_dictionary, type_dictionary = \ conllu_data.load_dict(dict_path=dict_path, train_path=train_path, dev_path=dev_path, test_path=test_path, word_embed_dict={}, dry_run=False, force_new_dic=True, add_start_char=add_start_char, verbose=1) voc_size = len(char_dictionary.instance2index) + 1 word_voc_input_size = len(word_dictionary.instance2index) + 1 printing("DICTIONARY ; character vocabulary is len {} : {} ", var=str( len(char_dictionary.instance2index) + 1, char_dictionary.instance2index), verbose=verbose, verbose_level=0) _train_path, _dev_path, _add_start_char = None, None, None else: voc_size = None word_voc_input_size = 0 if not reload: # we need to feed the model the data so that it computes the model_specific_dictionary _train_path = train_path _dev_path = dev_path _test_path = test_path _add_start_char = add_start_char else: # as it reload : we don't need data _train_path, _dev_path, _test_path, _add_start_char = None, None, None, None model = LexNormalizer( generator=Generator, expand_vocab_dev_test=expand_vocab_dev_test, dense_dim_auxilliary=dense_dim_auxilliary, dense_dim_auxilliary_2=dense_dim_auxilliary_2, tasks=tasks, weight_binary_loss=weight_binary_loss, dense_dim_auxilliary_pos=dense_dim_auxilliary_pos, dense_dim_auxilliary_pos_2=dense_dim_auxilliary_pos_2, load=reload, char_embedding_dim=char_embedding_dim, voc_size=voc_size, dir_model=model_dir, use_gpu=use_gpu, dict_path=dict_path, word_recurrent_cell_decoder=word_recurrent_cell_decoder, word_recurrent_cell_encoder=word_recurrent_cell_encoder, train_path=_train_path, dev_path=_dev_path, pos_specific_path=pos_specific_path, add_start_char=_add_start_char, model_specific_dictionary=model_specific_dictionary, dir_word_encoder=dir_word_encoder, drop_out_sent_encoder_cell=dropout_sent_encoder_cell, drop_out_word_encoder_cell=dropout_word_encoder_cell, drop_out_word_decoder_cell=dropout_word_decoder_cell, drop_out_bridge=dropout_bridge, drop_out_char_embedding_decoder=drop_out_char_embedding_decoder, drop_out_word_encoder_out=drop_out_word_encoder_out, drop_out_sent_encoder_out=drop_out_sent_encoder_out, n_layers_word_encoder=n_layers_word_encoder, dir_sent_encoder=dir_sent_encoder, n_layers_sent_cell=n_layers_sent_cell, hidden_size_encoder=hidden_size_encoder, output_dim=output_dim, model_id_pref=model_id_pref, model_full_name=model_full_name, hidden_size_sent_encoder=hidden_size_sent_encoder, shared_context=shared_context, unrolling_word=unrolling_word, char_src_attention=char_src_attention, word_decoding=word_decoding, dense_dim_word_pred=dense_dim_word_pred, dense_dim_word_pred_2=dense_dim_word_pred_2, dense_dim_word_pred_3=dense_dim_word_pred_3, char_decoding=char_decoding, mode_word_encoding=mode_word_encoding, char_level_embedding_projection_dim=char_level_embedding_projection_dim, stable_decoding_state=stable_decoding_state, init_context_decoder=init_context_decoder, symbolic_root=symbolic_root, symbolic_end=symbolic_end, word_embed=word_embed, word_embedding_dim=word_embedding_dim, word_embedding_projected_dim=word_embedding_projected_dim, word_embed_dir=extern_emb_dir, word_voc_input_size=word_voc_input_size, teacher_force=teacher_force, activation_char_decoder=activation_char_decoder, activation_word_decoder=activation_word_decoder, test_path=_test_path, extend_vocab_with_test=_test_path is not None, attention_tagging=attention_tagging, multi_task_loss_ponderation= multi_task_loss_ponderation, # needed for save/reloading purposes hidden_size_decoder=hidden_size_decoder, verbose=verbose, timing=timing) pos_batch = auxilliary_task_pos if use_gpu: model = model.cuda() printing("TYPE model is cuda : {} ", var=(next(model.parameters()).is_cuda), verbose=verbose, verbose_level=4) #model.decoder.attn_layer = model.decoder.attn_layer.cuda() if not model_specific_dictionary: model.word_dictionary, model.char_dictionary, model.pos_dictionary, \ model.xpos_dictionary, model.type_dictionary = word_dictionary, char_dictionary, pos_dictionary, xpos_dictionary, type_dictionary starting_epoch = model.arguments["info_checkpoint"][ "n_epochs"] if reload else 1 reloading = "" if not reload else " reloaded from " + str(starting_epoch) n_epochs += starting_epoch if freezing_mode: assert freeze_ls_param_prefix is not None, "freeze_ls_param_prefix should not be None" printing("TRAINING : freezing is on for layers {} ", var=[freeze_ls_param_prefix], verbose=verbose, verbose_level=1) for name, param in model.named_parameters(): for freeze_param in freeze_ls_param_prefix: if name.startswith(freeze_param): param.requires_grad = False printing("TRAINING : freezing {} parameter ", var=[name], verbose=verbose, verbose_level=1) _loss_dev = 1000 checkpoint_score_saved = 1000 _loss_train = 1000 counter_no_deacrease = 0 saved_epoch = 1 if reload: printing( "TRAINING : RELOADED MODE , starting from checkpointed epoch {} ", var=starting_epoch, verbose_level=0, verbose=verbose) printing( "TRAINING : Running from {} to {} epochs : training on {} evaluating on {}", var=(starting_epoch, n_epochs, train_path, dev_path), verbose=verbose, verbose_level=0) starting_time = time.time() total_time = 0 x_axis_epochs = [] epoch_ls_dev = [] epoch_ls_train = [] train_path = [train_path] if isinstance(train_path, str) else train_path dev_path = [dev_path] if isinstance(dev_path, str) else dev_path readers_train = readers_load( datasets=train_path, tasks=tasks, word_dictionary=model.word_dictionary, word_dictionary_norm=model.word_nom_dictionary, char_dictionary=model.char_dictionary, pos_dictionary=model.pos_dictionary, xpos_dictionary=model.xpos_dictionary, type_dictionary=model.type_dictionary, use_gpu=use_gpu, norm_not_norm=auxilliary_task_norm_not_norm, word_decoder=word_decoding, add_start_char=add_start_char, add_end_char=add_end_char, symbolic_end=symbolic_end, symbolic_root=symbolic_root, bucket=bucketing, max_char_len=max_char_len, verbose=verbose) readers_dev = readers_load(datasets=dev_path, tasks=tasks, word_dictionary=model.word_dictionary, word_dictionary_norm=model.word_nom_dictionary, char_dictionary=model.char_dictionary, pos_dictionary=model.pos_dictionary, xpos_dictionary=model.xpos_dictionary, type_dictionary=model.type_dictionary, use_gpu=use_gpu, norm_not_norm=auxilliary_task_norm_not_norm, word_decoder=word_decoding, add_start_char=add_start_char, add_end_char=add_end_char, symbolic_end=symbolic_end, symbolic_root=symbolic_root, bucket=bucketing, max_char_len=max_char_len, verbose=verbose) dir_writer = os.path.join(overall_report_dir, "runs", "{}-model".format(model.model_full_name)) writer = SummaryWriter(log_dir=dir_writer) printing( "REPORT : run \ntensorboard --logdir={} --host=localhost --port=9101 " "(run tensorboard remotely : sh $EXPERIENCE/track/run_tensorboard_serveo.sh $log_dir $port ) ", var=[dir_writer], verbose=verbose, verbose_level=1) printing("REPORT : summary writer will be located {}", var=[dir_writer], verbose_level=1, verbose=verbose) step_train = 0 step_dev = 0 if ADAPTABLE_SCORING: printing("WARNING : scoring epochs not regular (more at the begining ", verbose_level=1, verbose=verbose) freq_scoring = 1 checkpoint_dir_former = None for epoch in tqdm(range(starting_epoch, n_epochs), disable_tqdm_level(verbose=verbose, verbose_level=0)): index_look = 25 #parameters = filter(lambda p: p.requires_grad, model.parameters()) decay_rate = 1 opt = dptx.get_optimizer(model.parameters(), lr=lr * decay_rate**epoch, optimizer="adam") assert policy in AVAILABLE_SCHEDULING_POLICIES policy_dic = eval(policy)(epoch) if policy is not None else None #TODO : no need of re-ouptuting multi_task_mode : tasks should be harmonized to read multi_task_mode, ponderation_normalize_loss, weight_binary_loss, weight_pos_loss = scheduling_policy( epoch=epoch, phases_ls=policy_dic, tasks=tasks) printing( "TRAINING Tasks scheduling : ponderation_normalize_loss is {} weight_binary_loss is {}" " weight_pos_loss is {} mode is {} ", var=[ ponderation_normalize_loss, weight_binary_loss, weight_pos_loss, multi_task_mode ], verbose=verbose, verbose_level=2) printing("TRAINING : Starting {} epoch out of {} ", var=(epoch + 1, n_epochs), verbose=verbose, verbose_level=1) model.train() #batchIter = data_gen_conllu(data_read_train,model.word_dictionary, model.char_dictionary,normalization=normalization,get_batch_mode=get_batch_mode_all,batch_size=batch_size, extend_n_batch=extend_n_batch,print_raw=print_raw, timing=timing, pos_dictionary=model.pos_dictionary,verbose=verbose) batchIter = data_gen_multi_task_sampling_batch( tasks=tasks, readers=readers_train, batch_size=batch_size, word_dictionary=model.word_dictionary, char_dictionary=model.char_dictionary, pos_dictionary=model.pos_dictionary, word_dictionary_norm=model.word_nom_dictionary, get_batch_mode=get_batch_mode_all, extend_n_batch=extend_n_batch, dropout_input=dropout_input, verbose=verbose) start = time.time() printing( "TRAINING : TEACHER FORCE : Schedule Sampling proportion of train on prediction is {} ", var=[proportion_pred_train], verbose=verbose, verbose_level=2) #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch=epoch) loss_train, loss_details_train, step_train = run_epoch( batchIter, model, LossCompute( model.generator, opt=opt, multi_task_loss_ponderation=model.multi_task_loss_ponderation, auxilliary_task_norm_not_norm=auxilliary_task_norm_not_norm, model=model, writer=writer, use="train", use_gpu=use_gpu, verbose=verbose, tasks=tasks, char_decoding=char_decoding, word_decoding=word_decoding, pos_pred=auxilliary_task_pos, vocab_char_size=len( list(model.char_dictionary.instance2index.keys())) + 1, timing=timing), verbose=verbose, i_epoch=epoch, multi_task_mode=multi_task_mode, n_epochs=n_epochs, timing=timing, weight_binary_loss=weight_binary_loss, weight_pos_loss=weight_pos_loss, ponderation_normalize_loss=ponderation_normalize_loss, step=step_train, clipping=clipping, pos_batch=pos_batch, proportion_pred_train=proportion_pred_train, log_every_x_batch=100) writer_weights_and_grad(model=model, freq_writer=freq_writer, epoch=epoch, writer=writer, verbose=verbose) _train_ep_time, start = get_timing(start) model.eval() # TODO : should be added in the freq_checkpointing orhterwise useless #batchIter_eval = data_gen_conllu(data_read_dev,model.word_dictionary, model.char_dictionary,batch_size=batch_size, get_batch_mode=False,normalization=normalization, extend_n_batch=1,pos_dictionary=model.pos_dictionary, verbose=verbose) batchIter_eval = data_gen_multi_task_sampling_batch( tasks=tasks, readers=readers_dev, batch_size=batch_size, word_dictionary=model.word_dictionary, char_dictionary=model.char_dictionary, word_dictionary_norm=model.word_nom_dictionary, pos_dictionary=model.pos_dictionary, dropout_input=0, extend_n_batch=1, get_batch_mode=False, verbose=verbose) _create_iter_time, start = get_timing(start) # TODO : should be able o factorize this to have a single run_epoch() for train and dev (I think the computaiton would be same ) # TODO : should not evaluate for each epoch : should evalaute every x epoch : check if it decrease and checkpoint if (dev_report_loss and (epoch % freq_checkpointing == 0)) or (epoch + 1 == n_epochs): printing("EVALUATION : computing loss on dev epoch {} ", var=epoch, verbose=verbose, verbose_level=1) loss_obj = LossCompute( model.generator, use_gpu=use_gpu, verbose=verbose, multi_task_loss_ponderation=model.multi_task_loss_ponderation, writer=writer, use="dev", vocab_char_size=len( list(model.char_dictionary.instance2index.keys())) + 1, pos_pred=auxilliary_task_pos, tasks=tasks, char_decoding=char_decoding, word_decoding=word_decoding, auxilliary_task_norm_not_norm=auxilliary_task_norm_not_norm) loss_dev, loss_details_dev, step_dev = run_epoch( batchIter_eval, model, loss_compute=loss_obj, i_epoch=epoch, n_epochs=n_epochs, verbose=verbose, timing=timing, step=step_dev, weight_binary_loss=weight_binary_loss, ponderation_normalize_loss=ponderation_normalize_loss, weight_pos_loss=weight_pos_loss, pos_batch=pos_batch, log_every_x_batch=100) loss_developing.append(loss_dev) epoch_ls_dev.append(epoch) if auxilliary_task_norm_not_norm: # in this case we report loss detail for ind, loss_key in enumerate(loss_details_dev.keys()): if loss_key != "other": loss_details_template[loss_key].append( loss_details_dev[loss_key]) else: loss_details_template = None _eval_time, start = get_timing(start) loss_training.append(loss_train) epoch_ls_train.append(epoch) time_per_epoch = time.time() - starting_time total_time += time_per_epoch starting_time = time.time() # computing exact/edit score exact_only = False overall_report_ls = None # MODIFIED FREQ SCORING TO FREQ CHECKPOINTING if compute_scoring_curve and ( (epoch % (freq_checkpointing if checkpointing_metric != "loss-dev-all" else freq_scoring) == 0) or (epoch + 1 == n_epochs)): if epoch < 1 and ADAPTABLE_SCORING: freq_scoring *= 5 if epoch > 4 and epoch < 6 and ADAPTABLE_SCORING: freq_scoring *= 3 if epoch > 14 and epoch < 15 and ADAPTABLE_SCORING: freq_scoring *= 2 if (epoch + 1 == n_epochs): printing("EVALUATION : final scoring ", verbose, verbose_level=0) x_axis_epochs.append(epoch) printing("EVALUATION : Computing score on {} and {} ", var=(score_to_compute_ls, mode_norm_ls), verbose=verbose, verbose_level=1) overall_report_ls = [] for task, eval_data in zip(tasks, evaluation_set_reporting): eval_label = REPO_DATASET[eval_data] assert len(set(evaluation_set_reporting)) == len(evaluation_set_reporting),\ "ERROR : twice the same dataset has been provided for reporting which will mess up the loss" printing("EVALUATION on {} ", var=[eval_data], verbose=verbose, verbose_level=1) scores = evaluate( data_path=eval_data, use_gpu=use_gpu, overall_label=overall_label, overall_report_dir=overall_report_dir, score_to_compute_ls=score_to_compute_ls, mode_norm_ls=mode_norm_ls, label_report=eval_label, model=model, normalization=normalization, print_raw=False, model_specific_dictionary=True, get_batch_mode_evaluate=False, compute_mean_score_per_sent=compute_mean_score_per_sent, batch_size=batch_size, word_decoding=word_decoding, dir_report=model.dir_model, debug=debug, evaluated_task=task, tasks=tasks, verbose=verbose) # we keep everythinghere in case we want to do some fancy early stopping metric overall_report_ls.extend(scores) # dirty but do the job exact_only = True DEPRECIATED = False if DEPRECIATED: curve_scores = update_curve_dic( score_to_compute_ls=score_to_compute_ls, mode_norm_ls=mode_norm_ls, eval_data=eval_label, former_curve_scores=curve_scores, scores=scores, exact_only=exact_only) curve_ls_tuple = [ (loss_ls, label) for label, loss_ls in curve_scores.items() if isinstance(loss_ls, list) ] curves = [tupl[0] for tupl in curve_ls_tuple] val_ls = [ tupl[1] + "({}tok)".format(info_token) for tupl in curve_ls_tuple for data, info_token in curve_scores.items() if not isinstance(info_token, list) if tupl[1].endswith(data) ] score_to_compute_ls = ["exact" ] if exact_only else score_to_compute_ls if DEPRECIATED: for score_plot in score_to_compute_ls: # dirty but do the job print(val_ls) if exact_only: val_ls = [ val for val in val_ls if val.startswith("exact-all") or val.startswith("exact-NORMED") or val.startswith("exact-NEED_NORM") ] #val_ls = ["{}-all-{}".format(metric,REPO_DATASET[eval]) for eval in evaluation_set_reporting for metric in ["exact", "edit"]] curves = [curve for curve in curves if len(curve) > 0] simple_plot_ls(losses_ls=curves, labels=val_ls, final_loss="", save=True, filter_by_label=score_plot, x_axis=x_axis_epochs, dir=model.dir_model, prefix=model.model_full_name, epochs=str(epoch) + reloading, verbose=verbose, lr=lr, label_color_0=REPO_DATASET[ evaluation_set_reporting[0]], label_color_1=REPO_DATASET[ evaluation_set_reporting[1]]) # WARNING : only saving if we decrease not loading former model if we relaod if (checkpointing and epoch % freq_checkpointing == 0) or (epoch + 1 == n_epochs): if checkpointing_metric != "loss-dev-all" and epoch < STARTING_CHECKPOINTING_WITH_SCORE: _checkpointing_metric = "loss-dev-all" elif checkpointing_metric != "loss-dev-all": _checkpointing_metric = checkpointing_metric if epoch == STARTING_CHECKPOINTING_WITH_SCORE: checkpoint_score_saved = -report["score"] printing("Checkoint info : switching " "checkpoint_score_saved to {} : {}".format( checkpointing_metric, checkpoint_score_saved), verbose_level=1, verbose=verbose) elif checkpointing_metric == "loss-dev-all": _checkpointing_metric = checkpointing_metric else: raise (Exception("You missed a case")) dir_plot_detailed = simple_plot( final_loss=0, epoch_ls_1=epoch_ls_dev, epoch_ls_2=epoch_ls_dev, loss_2=loss_details_template.get("loss_binary", None), loss_ls=loss_details_template["loss_seq_prediction"], epochs=str(epoch) + reloading, label="dev-seq_prediction", label_2="dev-binary", save=True, dir=model.dir_model, verbose=verbose, verbose_level=1, lr=lr, prefix=model.model_full_name + "-details", show=False) if loss_details_template is not None else None dir_plot = simple_plot(final_loss=loss_train, loss_2=loss_developing, loss_ls=loss_training, epochs=str(epoch) + reloading, epoch_ls_1=epoch_ls_train, epoch_ls_2=epoch_ls_dev, label=label_train + "-train", label_2=label_dev + "-dev", save=True, dir=model.dir_model, verbose=verbose, verbose_level=1, lr=lr, prefix=model.model_full_name, show=False) sanity_check_checkpointing_metric( tasks, checkpointing_metric=_checkpointing_metric) if _checkpointing_metric != "loss-dev-all" or \ (epoch == (STARTING_CHECKPOINTING_WITH_SCORE-1) and checkpointing_metric != "loss-dev-all"): # for now only useful when different from loss --> compute metric on dev all and default always # assuing unitask thanks to sanity check assert overall_report_ls is not None, "ERROR overall_report_ls was not defined " report = rep_tl.get_score( overall_report_ls, metric=TASKS_PARAMETER[tasks[0]].get("default_metric"), data=REPO_DATASET[dev_path[0]], info_score="all", task=tasks[0]) # Negative cause it's an accuracy checkpoint_score = -report["score"] else: checkpoint_score = loss_dev model, checkpoint_score_saved, counter_no_deacrease, saved_epoch, checkpoint_dir_former = \ checkpoint(loss_saved=checkpoint_score_saved, loss=checkpoint_score, checkpointing_metric=_checkpointing_metric, model=model, counter_no_decrease=counter_no_deacrease, checkpoint_dir_former=checkpoint_dir_former, saved_epoch=saved_epoch, model_dir=model.dir_model, extra_checkpoint_label="1st_train" if not reload else "start_{}_ep-{}".format(starting_epoch, extra_arg_specific_label), extra_arg_specific_label=extra_arg_specific_label, info_checkpoint={"n_epochs": epoch, "batch_size": batch_size, "optimizer": optimizer, "gradient_clipping": clipping, "tasks_schedule_policy": policy, "teacher_force": teacher_force, "proportion_pred_train": proportion_pred_train, "train_data_path": train_path, "dev_data_path": dev_path, "other": {"error_curves": dir_plot, "loss": loss_dev, "sanity_test": {"loss": loss_dev, "data": [REPO_DATASET[_dev_path] for _dev_path in dev_path], "batch_size": batch_size}, "error_curves_details": dir_plot_detailed, "dropout_input": dropout_input, "checkpointing_metric": _checkpointing_metric, "multi_task_loss_ponderation": multi_task_loss_ponderation, "weight_binary_loss": weight_binary_loss*int(auxilliary_task_norm_not_norm), "weight_pos_loss": weight_pos_loss*int(auxilliary_task_pos), "ponderation_normalize_loss": ponderation_normalize_loss, "data": "dev", "seed(np/torch)": (SEED_NP, SEED_TORCH), "extend_n_batch": extend_n_batch, "lr": lr, "optim_strategy": "lr_constant", "time_training(min)": "{0:.2f}".format(total_time/60), "average_per_epoch(min)": "{0:.2f}".format((total_time/n_epochs)/60)}}, epoch=epoch, epochs=n_epochs-1, keep_all_checkpoint=False if epoch > starting_epoch else True,# we have nothing to remove after 1st epoch verbose=verbose) if counter_no_deacrease * freq_checkpointing >= BREAKING_NO_DECREASE: printing( "CHECKPOINTING : Breaking training : loss did not decrease on dev for 10 checkpoints " "so keeping model from {} epoch ".format(saved_epoch), verbose=verbose, verbose_level=0) break printing( "LOSS train {:.3f}, dev {:.3f} for epoch {} out of {} epochs ", var=(loss_train, loss_dev, epoch, n_epochs), verbose=verbose, verbose_level=1) if timing: print("Summary : {}".format( OrderedDict([("_train_ep_time", _train_ep_time), ("_create_iter_time", _create_iter_time), ("_eval_time", _eval_time)]))) writer.close() printing( "REPORT : run : \n tensorboard --logdir={} --host=localhost --port=9101 ", var=[dir_writer], verbose=verbose, verbose_level=1) #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch="LAST") simple_plot(final_loss=loss_dev, loss_ls=loss_training, loss_2=loss_developing, epoch_ls_1=epoch_ls_train, epoch_ls_2=epoch_ls_dev, epochs=n_epochs, save=True, dir=model.dir_model, label=label_train, label_2=label_dev, lr=lr, prefix=model.model_full_name + "-LAST", verbose=verbose) return model.model_full_name
word_dictionary, char_dictionary, pos_dictionary,\ xpos_dictionary, type_dictionary = \ conllu_data.create_dict(dict_path=dict_path, train_path=test_path, dev_path=test_path, test_path=None, word_embed_dict={}, dry_run=False, vocab_trim=True, add_start_char=add_start_char, ) printing("char_dictionary".format(char_dictionary.instance2index), verbose=verbose, verbose_level=0) V = len(char_dictionary.instance2index)+1 printing("Character vocabulary is {} length".format(V), verbose=verbose, verbose_level=0) model = LexNormalizer(generator=Generator, char_embedding_dim=20, voc_size=V, hidden_size_encoder=50, output_dim=50, hidden_size_sent_encoder=10, hidden_size_decoder=50, verbose=verbose) adam = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9) loss_former = 1000 checkpointing_freq = 10 if checkpointing and False: model_dir = os.path.join("../checkpoints","{}-folder".format(model.model_full_name)) os.mkdir(model_dir) printing("Dir {} created".format(model_dir), verbose=verbose, verbose_level=0) for epoch in tqdm(range(epochs), disable_tqdm_level(verbose=verbose, verbose_level=0)): printing("Starting new epoch {} ".format(epoch), verbose=verbose, verbose_level=1) model.train() batchIter = data_gen_conllu(test_path, word_dictionary, char_dictionary, pos_dictionary, xpos_dictionary,
def evaluate(batch_size, data_path, tasks, evaluated_task, write_report=True, dir_report=None, dict_path=None, model_full_name=None, score_to_compute_ls=None, mode_norm_ls=None, get_batch_mode_evaluate=True, overall_label="ALL_MODELS", overall_report_dir=CHECKPOINT_DIR, bucket=False, model_specific_dictionary=True, label_report="", print_raw=False, model=None, compute_mean_score_per_sent=False, write_output=False, word_decoding=False, char_decoding=True, extra_arg_specific_label="", scoring_func_sequence_pred="BLUE", max_char_len=None, normalization=True, debug=False, force_new_dic=False, use_gpu=None, verbose=0): assert model_specific_dictionary, "ERROR : only model_specific_dictionary = True supported now" # NB : now : you have to load dictionary when evaluating (cannot recompute) (could add in the LexNormalizer ability) use_gpu = use_gpu_(use_gpu) hardware_choosen = "GPU" if use_gpu else "CPU" printing("{} mode ", var=([hardware_choosen]), verbose_level=0, verbose=verbose) printing("EVALUATION : evaluating with compute_mean_score_per_sent {}".format(compute_mean_score_per_sent), verbose=verbose, verbose_level=1) if mode_norm_ls is None: mode_norm_ls = ["all", "NORMED", "NEED_NORM"] if write_report: assert dir_report is not None if model is not None: assert model_full_name is None and dict_path is None, \ "ERROR as model is provided : model_full_name and dict_path should be None" else: assert model_full_name is not None and dict_path is not None,\ "ERROR : model_full_name and dict_path required to load model " voc_size = None if not debug: pdb.set_trace = lambda: 1 model = LexNormalizer(generator=Generator, load=True, model_full_name=model_full_name, tasks=tasks, word_decoding=word_decoding, char_decoding=char_decoding, voc_size=voc_size, use_gpu=use_gpu, dict_path=dict_path, model_specific_dictionary=True, dir_model=os.path.join(PROJECT_PATH, "checkpoints", model_full_name + "-folder"), extra_arg_specific_label=extra_arg_specific_label, loading_sanity_test=True, verbose=verbose ) if model is None else model if score_to_compute_ls is None: score_to_compute_ls = ["edit", "exact"] if model.auxilliary_task_norm_not_norm: score_to_compute_ls.extend(SCORE_AUX) printing("EVALUATION : Evaluating {} metric with details {} ", var=[score_to_compute_ls, mode_norm_ls], verbose=verbose, verbose_level=3) #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch="EVAL") readers_eval = readers_load(datasets=[data_path], tasks=[evaluated_task], word_dictionary=model.word_dictionary, word_dictionary_norm=model.word_nom_dictionary, char_dictionary=model.char_dictionary, pos_dictionary=model.pos_dictionary, xpos_dictionary=model.xpos_dictionary, type_dictionary=model.type_dictionary, use_gpu=use_gpu, norm_not_norm=model.auxilliary_task_norm_not_norm, word_decoder=word_decoding, bucket=bucket,max_char_len=max_char_len, add_start_char=1, add_end_char=1, symbolic_end=model.symbolic_end, symbolic_root=model.symbolic_root, verbose=verbose) batchIter = data_gen_multi_task_sampling_batch(tasks=[evaluated_task], readers=readers_eval, batch_size=batch_size, word_dictionary=model.word_dictionary, char_dictionary=model.char_dictionary, pos_dictionary=model.pos_dictionary, get_batch_mode=get_batch_mode_evaluate, word_dictionary_norm=model.word_nom_dictionary, extend_n_batch=1, dropout_input=0, verbose=verbose) model.eval() # the formulas comes from normalization_erros functions score_dic_new, formulas = greedy_decode_batch(char_dictionary=model.char_dictionary, verbose=verbose, gold_output=True, score_to_compute_ls=score_to_compute_ls, use_gpu=use_gpu, write_output=write_output, eval_new=True, task_simultaneous_eval=[evaluated_task], stat="sum", mode_norm_score_ls=mode_norm_ls, label_data=REPO_DATASET[data_path], batchIter=batchIter, model=model, scoring_func_sequence_pred=scoring_func_sequence_pred, compute_mean_score_per_sent=compute_mean_score_per_sent, batch_size=batch_size) for score_name, formula in formulas.items(): if isinstance(formula, tuple) and len(formula) > 1: (num, denom) = formula score_value = score_dic_new[num]/score_dic_new[denom] if score_dic_new[denom] > 0 else None #score_value_per_sent = if score_dic_new[denom] == 0: print("WARNING Score {} has denumerator {} null and numerator {} equal to {}".format(score_name, denom, num, score_dic_new[num] )) reg = re.match("([^-]+)-([^-]+)-.*", num) mode_norm = reg.group(1) task = reg.group(2) # report all in a dictionary if not reportint_unavailable: report = report_template(metric_val=score_name, info_score_val=mode_norm, score_val=score_value, n_sents=score_dic_new["n_sents"], avg_per_sent=0, n_tokens_score=score_dic_new.get(mode_norm+"-"+task+"-gold-count",-1), model_full_name_val=model.model_full_name, task=task, report_path_val=model.arguments["checkpoint_dir"], evaluation_script_val="exact_match", model_args_dir=model.args_dir, data_val=REPO_DATASET[data_path]) else: report = {"report ":0} over_all_report_dir = os.path.join(dir_report, model.model_full_name + "-report-" + label_report + ".json") over_all_report_dir_all_models = os.path.join(overall_report_dir, overall_label + "-report.json") writing_mode = "w" if not os.path.isfile(over_all_report_dir) else "a" writing_mode_all_models = "w" if not os.path.isfile(over_all_report_dir_all_models) else "a" for dir, writing_mode in zip([over_all_report_dir, over_all_report_dir_all_models], [writing_mode, writing_mode_all_models]): if writing_mode == "w": _all_report = [report] json.dump([report], open(dir, writing_mode)) printing("REPORT : Creating new report {} ".format(dir), verbose=verbose, verbose_level=1) else: all_report = json.load(open(dir, "r")) all_report.append(report) json.dump(all_report, open(dir, "w")) printing("NEW REPORT metric : {} ", var=[" ".join(list(formulas.keys()))], verbose=verbose, verbose_level=1) try: printing("NEW REPORT : model specific report saved {} ".format(over_all_report_dir), verbose=verbose, verbose_level=1) printing("NEW REPORT : overall report saved {} ".format(over_all_report_dir_all_models), verbose=verbose,verbose_level=1) except Exception as e: print(Exception(e)) if writing_mode == "w": all_report = _all_report return all_report