Esempio n. 1
0
def interact(dic_path,
             model_full_name,
             dir_model,
             debug=False,
             model_specific_dictionary=True,
             beam_size=2,
             word_decoding=False,
             extra_arg_specific_label="",
             save_attention=False,
             show_attention=False,
             beam_decode=False,
             max_len=MAX_LEN,
             verbose=2):
    from model.seq2seq import LexNormalizer

    assert model_specific_dictionary
    char_dictionary = None
    voc_size = None

    if not debug:
        pdb.set_trace = lambda: 1
    model = LexNormalizer(generator=Generator,
                          voc_size=voc_size,
                          load=True,
                          model_full_name=model_full_name,
                          model_specific_dictionary=model_specific_dictionary,
                          dict_path=dic_path,
                          dir_model=dir_model,
                          extra_arg_specific_label=extra_arg_specific_label,
                          loading_sanity_test=True,
                          word_decoding=word_decoding,
                          char_decoding=not word_decoding,
                          verbose=verbose)
    model.eval()
    if show_attention or save_attention:
        assert model.decoder.attn_layer is not None, "ERROR : no attention to plot "
    if save_attention:
        dir_attention = os.path.join(dir_model, "attention_plot")
        if os.path.isdir(dir_attention):
            info = "existing"
        else:
            os.mkdir(dir_attention)
            info = "created"
        printing("Saving to {} {}",
                 var=[info, dir_attention],
                 verbose_level=1,
                 verbose=verbose)
    else:
        dir_attention = None
    decode_interacively(max_len=max_len,
                        model=model,
                        char_dictionary=char_dictionary,
                        sent_mode=True,
                        dir_attention=dir_attention,
                        save_attention=save_attention,
                        show_attention=show_attention,
                        beam_decode=beam_decode,
                        beam_size=beam_size,
                        showing_attention=show_attention,
                        verbose=verbose)
Esempio n. 2
0
def predict(batch_size, data_path,
            dict_path, model_full_name,
            bucket=False, model_specific_dictionary=True,
            print_raw=False, dir_normalized=None, dir_original=None,
            get_batch_mode=False,
            normalization=True, debug=False, use_gpu=None, verbose=0):

    assert model_specific_dictionary, "ERROR : only model_specific_dictionary = True supported now"
    # NB : now : you have to load dictionary when evaluating (cannot recompute) (could add in the LexNormalizer ability)
    use_gpu = use_gpu_(use_gpu)
    hardware_choosen = "GPU" if use_gpu else "CPU"
    printing("{} mode ", var=([hardware_choosen]), verbose_level=0, verbose=verbose)

    if not debug:
        pdb.set_trace = lambda: 1

    model = LexNormalizer(generator=Generator, load=True, model_full_name=model_full_name,
                          voc_size=None, use_gpu=use_gpu, dict_path=dict_path, model_specific_dictionary=True,
                          dir_model=os.path.join(PROJECT_PATH, "checkpoints",
                                                 model_full_name + "-folder"),
                          char_decoding=True, word_decoding=False,
                          verbose=verbose
                          )

    data_read = conllu_data.read_data_to_variable(data_path, model.word_dictionary, model.char_dictionary,
                                                  model.pos_dictionary,
                                                  model.xpos_dictionary, model.type_dictionary,
                                                  use_gpu=use_gpu,
                                                  norm_not_norm=model.auxilliary_task_norm_not_norm,
                                                  symbolic_end=True, symbolic_root=True,
                                                  dry_run=0, lattice=False, verbose=verbose,
                                                  normalization=normalization,
                                                  bucket=bucket,
                                                  add_start_char=1, add_end_char=1)

    batchIter = data_gen_conllu(data_read, model.word_dictionary, model.char_dictionary,
                                batch_size=batch_size,
                                get_batch_mode=False,
                                normalization=normalization,
                                print_raw=print_raw,  verbose=verbose)
    model.eval()
    greedy_decode_batch(char_dictionary=model.char_dictionary, verbose=verbose,
                        gold_output=False,
                        use_gpu=use_gpu,
                        write_output=True,
                        label_data=REPO_DATASET[data_path],
                        batchIter=batchIter, model=model, dir_normalized=dir_normalized, dir_original=dir_original,
                        batch_size=batch_size)
def evaluate(model_full_name,
             dir,
             nbatches=50,
             V=5,
             batch=2,
             seq_len=10,
             generalize_extra=0,
             verbose=2):
    model = LexNormalizer(load=True,
                          dir_model=dir,
                          model_full_name=model_full_name,
                          generator=Generator,
                          voc_size=9,
                          verbose=verbose)
    model.eval()
    loss = run_epoch(
        data_gen_dummy(V,
                       batch=batch,
                       nbatches=10,
                       seed=SEED,
                       sent_len=seq_len + generalize_extra,
                       verbose=verbose), model, LossCompute(model.generator))
    print("Final Loss {} ".format(loss))
    return loss
def train_1_epoch(epochs=EPOCHS,
                  seq_len=10,
                  generalize_extra=0,
                  nbatches=50,
                  verbose=2,
                  lr=0.001,
                  V=5,
                  batch=2):

    model = LexNormalizer(generator=Generator,
                          char_embedding_dim=5,
                          hidden_size_encoder=11,
                          voc_size=9,
                          hidden_size_sent_encoder=9,
                          output_dim=10,
                          hidden_size_decoder=11,
                          verbose=verbose)
    # optimizer
    adam = torch.optim.Adam(model.parameters(),
                            lr=lr,
                            betas=(0.9, 0.98),
                            eps=1e-9)
    for epoch in tqdm(range(epochs),
                      disable_tqdm_level(verbose=verbose, verbose_level=0)):
        model.train()
        run_epoch(data_gen_dummy(V=V,
                                 batch=batch,
                                 nbatches=nbatches,
                                 sent_len=seq_len,
                                 verbose=verbose,
                                 seed=SEED),
                  model,
                  LossCompute(model.generator, opt=adam, verbose=verbose),
                  verbose=verbose,
                  i_epoch=epoch,
                  n_epochs=EPOCHS,
                  n_batches=nbatches)

    loss = run_epoch(
        data_gen_dummy(V,
                       batch=batch,
                       nbatches=10,
                       seed=SEED,
                       sent_len=seq_len + generalize_extra,
                       verbose=verbose), model, LossCompute(model.generator))
    print("Final Loss {} ".format(loss))
    dir, model_full_name = model.save("./test_logs",
                                      model,
                                      verbose=verbose,
                                      info_checkpoint=TEMPLATE_INFO_CHECKPOINT)
    return dir, model_full_name, loss
Esempio n. 5
0
    xpos_dictionary, type_dictionary = conllu_data.create_dict(dict_path=dict_path,
                                                               train_path=test_path,
                                                               dev_path=test_path,
                                                               test_path=test_path,
                                                               word_embed_dict={},
                                                               dry_run=False,
                                                               add_start_char=add_start_char,
                                                               vocab_trim=True)

    V = len(char_dictionary.instance2index) + 1
    print("Character vocabulary is {} length".format(V))

    model = LexNormalizer(generator=Generator,
                          char_embedding_dim=5,
                          voc_size=V,
                          hidden_size_encoder=11,
                          output_dim=10,
                          hidden_size_decoder=11,
                          verbose=verbose)

    batchIter = data_gen_conllu(test_path,
                                word_dictionary,
                                char_dictionary,
                                pos_dictionary,
                                xpos_dictionary,
                                type_dictionary,
                                normalization=True,
                                add_start_char=add_start_char,
                                add_end_char=0,
                                batch_size=2,
                                nbatch=nbatches,
from io_.data_iterator import data_gen_dummy
from model.loss import LossCompute
import matplotlib.pyplot as plt
from tracking.plot_loss import simple_plot
from tqdm import tqdm
from io_.info_print import disable_tqdm_level
import pdb
# hyperparameters
V = 5
lr = 0.001
# optimizer


verbose = 1
model = LexNormalizer(generator=Generator, char_embedding_dim=5, hidden_size_encoder=12, voc_size=9, output_dim=50,
                      hidden_size_sent_encoder=10,
                      hidden_size_decoder=11, verbose=verbose)
adam = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9)
# reporting
training_loss = []
nbatches = 1
EPOCHS = 20
seq_len = 10
generalize_extra = 5


def _test_overfit_dummy():
    pdb.set_trace = lambda: 1
    for epoch in tqdm(range(EPOCHS), disable_tqdm_level(verbose=verbose, verbose_level=0)):
        model.train()
        run_epoch(data_gen_dummy(V=V, batch=2, nbatches=nbatches, sent_len=seq_len, verbose=verbose),
Esempio n. 7
0
def train(train_path,
          dev_path,
          n_epochs,
          normalization,
          dict_path=None,
          pos_specific_path=None,
          expand_vocab_dev_test=False,
          checkpointing_metric="loss-dev-all",
          batch_size=10,
          test_path=None,
          label_train="",
          label_dev="",
          use_gpu=None,
          lr=0.001,
          n_layers_word_encoder=1,
          n_layers_sent_cell=1,
          get_batch_mode_all=True,
          dropout_sent_encoder_cell=0,
          dropout_word_encoder_cell=0,
          dropout_word_decoder_cell=0,
          dropout_bridge=0,
          drop_out_word_encoder_out=0,
          drop_out_sent_encoder_out=0,
          dir_word_encoder=1,
          word_embed=False,
          word_embedding_dim=None,
          word_embedding_projected_dim=None,
          mode_word_encoding="cat",
          char_level_embedding_projection_dim=0,
          word_recurrent_cell_encoder=None,
          word_recurrent_cell_decoder=None,
          drop_out_char_embedding_decoder=0,
          hidden_size_encoder=None,
          output_dim=None,
          char_embedding_dim=None,
          hidden_size_decoder=None,
          hidden_size_sent_encoder=None,
          freq_scoring=5,
          compute_scoring_curve=False,
          score_to_compute_ls=None,
          mode_norm_ls=None,
          checkpointing=True,
          freq_checkpointing=None,
          freq_writer=None,
          model_dir=None,
          reload=False,
          model_full_name=None,
          model_id_pref="",
          print_raw=False,
          model_specific_dictionary=False,
          dir_sent_encoder=1,
          add_start_char=None,
          add_end_char=1,
          overall_label="DEFAULT",
          overall_report_dir=CHECKPOINT_DIR,
          compute_mean_score_per_sent=False,
          weight_binary_loss=1,
          dense_dim_auxilliary=None,
          dense_dim_auxilliary_2=None,
          unrolling_word=False,
          char_src_attention=False,
          debug=False,
          timing=False,
          dev_report_loss=True,
          bucketing=True,
          policy=None,
          teacher_force=True,
          proportion_pred_train=None,
          shared_context="all",
          clipping=None,
          extend_n_batch=1,
          stable_decoding_state=False,
          init_context_decoder=True,
          dense_dim_auxilliary_pos=None,
          dense_dim_auxilliary_pos_2=None,
          tasks=None,
          word_decoding=False,
          char_decoding=True,
          dense_dim_word_pred=None,
          dense_dim_word_pred_2=None,
          dense_dim_word_pred_3=None,
          symbolic_root=False,
          symbolic_end=False,
          extern_emb_dir=None,
          activation_word_decoder=None,
          activation_char_decoder=None,
          extra_arg_specific_label="",
          freezing_mode=False,
          freeze_ls_param_prefix=None,
          multi_task_loss_ponderation=None,
          max_char_len=None,
          attention_tagging=False,
          dropout_input=None,
          optimizer="adam",
          verbose=1):

    if multi_task_loss_ponderation is not None:
        sanity_check_loss_poneration(multi_task_loss_ponderation,
                                     verbose=verbose)
    if teacher_force:
        assert proportion_pred_train is None, "proportion_pred_train should be None as teacher_force mode"
    else:
        assert 100 > proportion_pred_train > 0, "proportion_pred_train should be between 0 and 100"
    auxilliary_task_norm_not_norm = "norm_not_norm" in tasks  # auxilliary_task_norm_not_norm
    auxilliary_task_pos = "pos" in tasks
    if "normalize" not in tasks:
        word_decoding = False
        char_decoding = False
    if not unrolling_word:
        assert not char_src_attention, "ERROR attention requires step by step unrolling  "
    printing("WARNING bucketing is {} ",
             var=bucketing,
             verbose=verbose,
             verbose_level=1)
    if freq_writer is None:
        freq_writer = freq_checkpointing
        printing("REPORTING freq_writer set to freq_checkpointing {}",
                 var=[freq_checkpointing],
                 verbose=verbose,
                 verbose_level=1)
    if auxilliary_task_norm_not_norm:
        printing(
            "MODEL : training model with auxillisary task (loss weighted with {})",
            var=[weight_binary_loss],
            verbose=verbose,
            verbose_level=1)
    #if compute_scoring_curve:
    #assert score_to_compute_ls is not None and mode_norm_ls is not None and freq_scoring is not None, \
    #    "ERROR score_to_compute_ls and mode_norm_ls should not be None"
    use_gpu = use_gpu_(use_gpu)
    hardware_choosen = "GPU" if use_gpu else "CPU"
    printing("{} hardware mode ",
             var=([hardware_choosen]),
             verbose_level=0,
             verbose=verbose)
    freq_checkpointing = int(
        n_epochs / 10
    ) if checkpointing and freq_checkpointing is None else freq_checkpointing
    assert add_start_char == 1, "ERROR : add_start_char must be activated due decoding behavior of output_text_"
    printing("WARNING : add_start_char is {} and add_end_char {}  ".format(
        add_start_char, add_end_char),
             verbose=verbose,
             verbose_level=0)
    printing("TRAINING : checkpointing every {} epoch",
             var=freq_checkpointing,
             verbose=verbose,
             verbose_level=1)
    if reload:
        assert model_full_name is not None and len(
            model_id_pref
        ) == 0 and model_dir is not None and dict_path is not None
    else:
        assert model_full_name is None and model_dir is None

    if not debug:
        pdb.set_trace = lambda: None

    loss_training = []
    loss_developing = []
    # was not able to use the template cause no more reinitialization of the variable
    loss_details_template = {
        'loss_seq_prediction': [],
        'other': {},
        'loss_binary': [],
        'loss_overall': []
    } if auxilliary_task_norm_not_norm else None

    # used for computed scores for early stoping if checkpoint_metric != loss and for curves plot
    evaluation_set_reporting = dev_path
    mode_norm_ls = ["all"]
    score_to_compute_ls = ["exact_match"]
    print(
        "WARNING :train.py overwriting mode_norm_ls score_to_compute_ls argument "
    )
    curve_scores = {
        score + "-" + mode_norm + "-" + REPO_DATASET[data]: []
        for score in score_to_compute_ls for mode_norm in mode_norm_ls
        for data in evaluation_set_reporting
    } if compute_scoring_curve else None

    printing("WARNING :  lr {} ".format(lr, add_start_char, add_end_char),
             verbose=verbose,
             verbose_level=0)
    printing(
        "INFO : dictionary is computed (re)created from scratch on train_path {} and dev_path {}"
        .format(train_path, dev_path),
        verbose=verbose,
        verbose_level=1)

    if not model_specific_dictionary:
        word_dictionary, char_dictionary, pos_dictionary, \
        xpos_dictionary, type_dictionary = \
        conllu_data.load_dict(dict_path=dict_path,
                              train_path=train_path,
                              dev_path=dev_path,
                              test_path=test_path,
                              word_embed_dict={},
                              dry_run=False,
                              force_new_dic=True,
                              add_start_char=add_start_char, verbose=1)

        voc_size = len(char_dictionary.instance2index) + 1
        word_voc_input_size = len(word_dictionary.instance2index) + 1
        printing("DICTIONARY ; character vocabulary is len {} : {} ",
                 var=str(
                     len(char_dictionary.instance2index) + 1,
                     char_dictionary.instance2index),
                 verbose=verbose,
                 verbose_level=0)
        _train_path, _dev_path, _add_start_char = None, None, None
    else:
        voc_size = None
        word_voc_input_size = 0
        if not reload:
            # we need to feed the model the data so that it computes the model_specific_dictionary
            _train_path = train_path
            _dev_path = dev_path
            _test_path = test_path
            _add_start_char = add_start_char
        else:
            # as it reload : we don't need data
            _train_path, _dev_path, _test_path, _add_start_char = None, None, None, None

    model = LexNormalizer(
        generator=Generator,
        expand_vocab_dev_test=expand_vocab_dev_test,
        dense_dim_auxilliary=dense_dim_auxilliary,
        dense_dim_auxilliary_2=dense_dim_auxilliary_2,
        tasks=tasks,
        weight_binary_loss=weight_binary_loss,
        dense_dim_auxilliary_pos=dense_dim_auxilliary_pos,
        dense_dim_auxilliary_pos_2=dense_dim_auxilliary_pos_2,
        load=reload,
        char_embedding_dim=char_embedding_dim,
        voc_size=voc_size,
        dir_model=model_dir,
        use_gpu=use_gpu,
        dict_path=dict_path,
        word_recurrent_cell_decoder=word_recurrent_cell_decoder,
        word_recurrent_cell_encoder=word_recurrent_cell_encoder,
        train_path=_train_path,
        dev_path=_dev_path,
        pos_specific_path=pos_specific_path,
        add_start_char=_add_start_char,
        model_specific_dictionary=model_specific_dictionary,
        dir_word_encoder=dir_word_encoder,
        drop_out_sent_encoder_cell=dropout_sent_encoder_cell,
        drop_out_word_encoder_cell=dropout_word_encoder_cell,
        drop_out_word_decoder_cell=dropout_word_decoder_cell,
        drop_out_bridge=dropout_bridge,
        drop_out_char_embedding_decoder=drop_out_char_embedding_decoder,
        drop_out_word_encoder_out=drop_out_word_encoder_out,
        drop_out_sent_encoder_out=drop_out_sent_encoder_out,
        n_layers_word_encoder=n_layers_word_encoder,
        dir_sent_encoder=dir_sent_encoder,
        n_layers_sent_cell=n_layers_sent_cell,
        hidden_size_encoder=hidden_size_encoder,
        output_dim=output_dim,
        model_id_pref=model_id_pref,
        model_full_name=model_full_name,
        hidden_size_sent_encoder=hidden_size_sent_encoder,
        shared_context=shared_context,
        unrolling_word=unrolling_word,
        char_src_attention=char_src_attention,
        word_decoding=word_decoding,
        dense_dim_word_pred=dense_dim_word_pred,
        dense_dim_word_pred_2=dense_dim_word_pred_2,
        dense_dim_word_pred_3=dense_dim_word_pred_3,
        char_decoding=char_decoding,
        mode_word_encoding=mode_word_encoding,
        char_level_embedding_projection_dim=char_level_embedding_projection_dim,
        stable_decoding_state=stable_decoding_state,
        init_context_decoder=init_context_decoder,
        symbolic_root=symbolic_root,
        symbolic_end=symbolic_end,
        word_embed=word_embed,
        word_embedding_dim=word_embedding_dim,
        word_embedding_projected_dim=word_embedding_projected_dim,
        word_embed_dir=extern_emb_dir,
        word_voc_input_size=word_voc_input_size,
        teacher_force=teacher_force,
        activation_char_decoder=activation_char_decoder,
        activation_word_decoder=activation_word_decoder,
        test_path=_test_path,
        extend_vocab_with_test=_test_path is not None,
        attention_tagging=attention_tagging,
        multi_task_loss_ponderation=
        multi_task_loss_ponderation,  # needed for save/reloading purposes
        hidden_size_decoder=hidden_size_decoder,
        verbose=verbose,
        timing=timing)

    pos_batch = auxilliary_task_pos

    if use_gpu:
        model = model.cuda()
        printing("TYPE model is cuda : {} ",
                 var=(next(model.parameters()).is_cuda),
                 verbose=verbose,
                 verbose_level=4)
        #model.decoder.attn_layer = model.decoder.attn_layer.cuda()
    if not model_specific_dictionary:
        model.word_dictionary, model.char_dictionary, model.pos_dictionary, \
        model.xpos_dictionary, model.type_dictionary = word_dictionary, char_dictionary, pos_dictionary, xpos_dictionary, type_dictionary

    starting_epoch = model.arguments["info_checkpoint"][
        "n_epochs"] if reload else 1
    reloading = "" if not reload else " reloaded from " + str(starting_epoch)
    n_epochs += starting_epoch
    if freezing_mode:
        assert freeze_ls_param_prefix is not None, "freeze_ls_param_prefix should not be None"
        printing("TRAINING : freezing is on for layers {} ",
                 var=[freeze_ls_param_prefix],
                 verbose=verbose,
                 verbose_level=1)
        for name, param in model.named_parameters():
            for freeze_param in freeze_ls_param_prefix:
                if name.startswith(freeze_param):
                    param.requires_grad = False
                    printing("TRAINING : freezing {} parameter ",
                             var=[name],
                             verbose=verbose,
                             verbose_level=1)

    _loss_dev = 1000
    checkpoint_score_saved = 1000
    _loss_train = 1000
    counter_no_deacrease = 0
    saved_epoch = 1
    if reload:
        printing(
            "TRAINING : RELOADED MODE , starting from checkpointed epoch {} ",
            var=starting_epoch,
            verbose_level=0,
            verbose=verbose)
    printing(
        "TRAINING : Running from {} to {} epochs : training on {} evaluating on {}",
        var=(starting_epoch, n_epochs, train_path, dev_path),
        verbose=verbose,
        verbose_level=0)
    starting_time = time.time()
    total_time = 0
    x_axis_epochs = []
    epoch_ls_dev = []
    epoch_ls_train = []

    train_path = [train_path] if isinstance(train_path, str) else train_path
    dev_path = [dev_path] if isinstance(dev_path, str) else dev_path

    readers_train = readers_load(
        datasets=train_path,
        tasks=tasks,
        word_dictionary=model.word_dictionary,
        word_dictionary_norm=model.word_nom_dictionary,
        char_dictionary=model.char_dictionary,
        pos_dictionary=model.pos_dictionary,
        xpos_dictionary=model.xpos_dictionary,
        type_dictionary=model.type_dictionary,
        use_gpu=use_gpu,
        norm_not_norm=auxilliary_task_norm_not_norm,
        word_decoder=word_decoding,
        add_start_char=add_start_char,
        add_end_char=add_end_char,
        symbolic_end=symbolic_end,
        symbolic_root=symbolic_root,
        bucket=bucketing,
        max_char_len=max_char_len,
        verbose=verbose)

    readers_dev = readers_load(datasets=dev_path,
                               tasks=tasks,
                               word_dictionary=model.word_dictionary,
                               word_dictionary_norm=model.word_nom_dictionary,
                               char_dictionary=model.char_dictionary,
                               pos_dictionary=model.pos_dictionary,
                               xpos_dictionary=model.xpos_dictionary,
                               type_dictionary=model.type_dictionary,
                               use_gpu=use_gpu,
                               norm_not_norm=auxilliary_task_norm_not_norm,
                               word_decoder=word_decoding,
                               add_start_char=add_start_char,
                               add_end_char=add_end_char,
                               symbolic_end=symbolic_end,
                               symbolic_root=symbolic_root,
                               bucket=bucketing,
                               max_char_len=max_char_len,
                               verbose=verbose)

    dir_writer = os.path.join(overall_report_dir, "runs",
                              "{}-model".format(model.model_full_name))
    writer = SummaryWriter(log_dir=dir_writer)
    printing(
        "REPORT : run \ntensorboard --logdir={} --host=localhost --port=9101 "
        "(run tensorboard remotely : sh $EXPERIENCE/track/run_tensorboard_serveo.sh $log_dir $port )  ",
        var=[dir_writer],
        verbose=verbose,
        verbose_level=1)
    printing("REPORT : summary writer will be located {}",
             var=[dir_writer],
             verbose_level=1,
             verbose=verbose)
    step_train = 0
    step_dev = 0
    if ADAPTABLE_SCORING:
        printing("WARNING : scoring epochs not regular (more at the begining ",
                 verbose_level=1,
                 verbose=verbose)
        freq_scoring = 1
    checkpoint_dir_former = None

    for epoch in tqdm(range(starting_epoch, n_epochs),
                      disable_tqdm_level(verbose=verbose, verbose_level=0)):
        index_look = 25
        #parameters = filter(lambda p: p.requires_grad, model.parameters())
        decay_rate = 1
        opt = dptx.get_optimizer(model.parameters(),
                                 lr=lr * decay_rate**epoch,
                                 optimizer="adam")
        assert policy in AVAILABLE_SCHEDULING_POLICIES
        policy_dic = eval(policy)(epoch) if policy is not None else None
        #TODO : no need of re-ouptuting multi_task_mode : tasks should be harmonized to read
        multi_task_mode, ponderation_normalize_loss, weight_binary_loss, weight_pos_loss = scheduling_policy(
            epoch=epoch, phases_ls=policy_dic, tasks=tasks)
        printing(
            "TRAINING Tasks scheduling : ponderation_normalize_loss is {} weight_binary_loss is {}"
            " weight_pos_loss is {} mode is {} ",
            var=[
                ponderation_normalize_loss, weight_binary_loss,
                weight_pos_loss, multi_task_mode
            ],
            verbose=verbose,
            verbose_level=2)

        printing("TRAINING : Starting {} epoch out of {} ",
                 var=(epoch + 1, n_epochs),
                 verbose=verbose,
                 verbose_level=1)
        model.train()
        #batchIter = data_gen_conllu(data_read_train,model.word_dictionary, model.char_dictionary,normalization=normalization,get_batch_mode=get_batch_mode_all,batch_size=batch_size, extend_n_batch=extend_n_batch,print_raw=print_raw, timing=timing, pos_dictionary=model.pos_dictionary,verbose=verbose)
        batchIter = data_gen_multi_task_sampling_batch(
            tasks=tasks,
            readers=readers_train,
            batch_size=batch_size,
            word_dictionary=model.word_dictionary,
            char_dictionary=model.char_dictionary,
            pos_dictionary=model.pos_dictionary,
            word_dictionary_norm=model.word_nom_dictionary,
            get_batch_mode=get_batch_mode_all,
            extend_n_batch=extend_n_batch,
            dropout_input=dropout_input,
            verbose=verbose)
        start = time.time()
        printing(
            "TRAINING : TEACHER FORCE : Schedule Sampling proportion of train on prediction is {} ",
            var=[proportion_pred_train],
            verbose=verbose,
            verbose_level=2)

        #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch=epoch)

        loss_train, loss_details_train, step_train = run_epoch(
            batchIter,
            model,
            LossCompute(
                model.generator,
                opt=opt,
                multi_task_loss_ponderation=model.multi_task_loss_ponderation,
                auxilliary_task_norm_not_norm=auxilliary_task_norm_not_norm,
                model=model,
                writer=writer,
                use="train",
                use_gpu=use_gpu,
                verbose=verbose,
                tasks=tasks,
                char_decoding=char_decoding,
                word_decoding=word_decoding,
                pos_pred=auxilliary_task_pos,
                vocab_char_size=len(
                    list(model.char_dictionary.instance2index.keys())) + 1,
                timing=timing),
            verbose=verbose,
            i_epoch=epoch,
            multi_task_mode=multi_task_mode,
            n_epochs=n_epochs,
            timing=timing,
            weight_binary_loss=weight_binary_loss,
            weight_pos_loss=weight_pos_loss,
            ponderation_normalize_loss=ponderation_normalize_loss,
            step=step_train,
            clipping=clipping,
            pos_batch=pos_batch,
            proportion_pred_train=proportion_pred_train,
            log_every_x_batch=100)

        writer_weights_and_grad(model=model,
                                freq_writer=freq_writer,
                                epoch=epoch,
                                writer=writer,
                                verbose=verbose)

        _train_ep_time, start = get_timing(start)
        model.eval()
        # TODO : should be added in the freq_checkpointing orhterwise useless
        #batchIter_eval = data_gen_conllu(data_read_dev,model.word_dictionary, model.char_dictionary,batch_size=batch_size, get_batch_mode=False,normalization=normalization, extend_n_batch=1,pos_dictionary=model.pos_dictionary, verbose=verbose)
        batchIter_eval = data_gen_multi_task_sampling_batch(
            tasks=tasks,
            readers=readers_dev,
            batch_size=batch_size,
            word_dictionary=model.word_dictionary,
            char_dictionary=model.char_dictionary,
            word_dictionary_norm=model.word_nom_dictionary,
            pos_dictionary=model.pos_dictionary,
            dropout_input=0,
            extend_n_batch=1,
            get_batch_mode=False,
            verbose=verbose)
        _create_iter_time, start = get_timing(start)
        # TODO : should be able o factorize this to have a single run_epoch() for train and dev (I think the computaiton would be same )
        # TODO : should not evaluate for each epoch : should evalaute every x epoch : check if it decrease and checkpoint
        if (dev_report_loss and
            (epoch % freq_checkpointing == 0)) or (epoch + 1 == n_epochs):
            printing("EVALUATION : computing loss on dev epoch {}  ",
                     var=epoch,
                     verbose=verbose,
                     verbose_level=1)
            loss_obj = LossCompute(
                model.generator,
                use_gpu=use_gpu,
                verbose=verbose,
                multi_task_loss_ponderation=model.multi_task_loss_ponderation,
                writer=writer,
                use="dev",
                vocab_char_size=len(
                    list(model.char_dictionary.instance2index.keys())) + 1,
                pos_pred=auxilliary_task_pos,
                tasks=tasks,
                char_decoding=char_decoding,
                word_decoding=word_decoding,
                auxilliary_task_norm_not_norm=auxilliary_task_norm_not_norm)
            loss_dev, loss_details_dev, step_dev = run_epoch(
                batchIter_eval,
                model,
                loss_compute=loss_obj,
                i_epoch=epoch,
                n_epochs=n_epochs,
                verbose=verbose,
                timing=timing,
                step=step_dev,
                weight_binary_loss=weight_binary_loss,
                ponderation_normalize_loss=ponderation_normalize_loss,
                weight_pos_loss=weight_pos_loss,
                pos_batch=pos_batch,
                log_every_x_batch=100)

            loss_developing.append(loss_dev)
            epoch_ls_dev.append(epoch)

            if auxilliary_task_norm_not_norm:
                # in this case we report loss detail
                for ind, loss_key in enumerate(loss_details_dev.keys()):
                    if loss_key != "other":
                        loss_details_template[loss_key].append(
                            loss_details_dev[loss_key])
            else:
                loss_details_template = None

        _eval_time, start = get_timing(start)
        loss_training.append(loss_train)
        epoch_ls_train.append(epoch)
        time_per_epoch = time.time() - starting_time
        total_time += time_per_epoch
        starting_time = time.time()

        # computing exact/edit score
        exact_only = False
        overall_report_ls = None
        # MODIFIED FREQ SCORING TO FREQ CHECKPOINTING

        if compute_scoring_curve and (
            (epoch %
             (freq_checkpointing if checkpointing_metric != "loss-dev-all" else
              freq_scoring) == 0) or (epoch + 1 == n_epochs)):
            if epoch < 1 and ADAPTABLE_SCORING:
                freq_scoring *= 5
            if epoch > 4 and epoch < 6 and ADAPTABLE_SCORING:
                freq_scoring *= 3
            if epoch > 14 and epoch < 15 and ADAPTABLE_SCORING:
                freq_scoring *= 2
            if (epoch + 1 == n_epochs):
                printing("EVALUATION : final scoring ",
                         verbose,
                         verbose_level=0)
            x_axis_epochs.append(epoch)
            printing("EVALUATION : Computing score on {} and {}  ",
                     var=(score_to_compute_ls, mode_norm_ls),
                     verbose=verbose,
                     verbose_level=1)
            overall_report_ls = []
            for task, eval_data in zip(tasks, evaluation_set_reporting):
                eval_label = REPO_DATASET[eval_data]
                assert len(set(evaluation_set_reporting)) == len(evaluation_set_reporting),\
                    "ERROR : twice the same dataset has been provided for reporting which will mess up the loss"
                printing("EVALUATION on {} ",
                         var=[eval_data],
                         verbose=verbose,
                         verbose_level=1)
                scores = evaluate(
                    data_path=eval_data,
                    use_gpu=use_gpu,
                    overall_label=overall_label,
                    overall_report_dir=overall_report_dir,
                    score_to_compute_ls=score_to_compute_ls,
                    mode_norm_ls=mode_norm_ls,
                    label_report=eval_label,
                    model=model,
                    normalization=normalization,
                    print_raw=False,
                    model_specific_dictionary=True,
                    get_batch_mode_evaluate=False,
                    compute_mean_score_per_sent=compute_mean_score_per_sent,
                    batch_size=batch_size,
                    word_decoding=word_decoding,
                    dir_report=model.dir_model,
                    debug=debug,
                    evaluated_task=task,
                    tasks=tasks,
                    verbose=verbose)
                # we keep everythinghere in case we want to do some fancy early stopping metric
                overall_report_ls.extend(scores)

                # dirty but do the job
                exact_only = True
                DEPRECIATED = False
                if DEPRECIATED:
                    curve_scores = update_curve_dic(
                        score_to_compute_ls=score_to_compute_ls,
                        mode_norm_ls=mode_norm_ls,
                        eval_data=eval_label,
                        former_curve_scores=curve_scores,
                        scores=scores,
                        exact_only=exact_only)
                    curve_ls_tuple = [
                        (loss_ls, label)
                        for label, loss_ls in curve_scores.items()
                        if isinstance(loss_ls, list)
                    ]
                    curves = [tupl[0] for tupl in curve_ls_tuple]
                    val_ls = [
                        tupl[1] + "({}tok)".format(info_token)
                        for tupl in curve_ls_tuple
                        for data, info_token in curve_scores.items()
                        if not isinstance(info_token, list)
                        if tupl[1].endswith(data)
                    ]
            score_to_compute_ls = ["exact"
                                   ] if exact_only else score_to_compute_ls
            if DEPRECIATED:
                for score_plot in score_to_compute_ls:
                    # dirty but do the job
                    print(val_ls)
                    if exact_only:
                        val_ls = [
                            val for val in val_ls
                            if val.startswith("exact-all")
                            or val.startswith("exact-NORMED")
                            or val.startswith("exact-NEED_NORM")
                        ]
                        #val_ls = ["{}-all-{}".format(metric,REPO_DATASET[eval]) for eval in evaluation_set_reporting for metric in ["exact", "edit"]]
                        curves = [curve for curve in curves if len(curve) > 0]

                    simple_plot_ls(losses_ls=curves,
                                   labels=val_ls,
                                   final_loss="",
                                   save=True,
                                   filter_by_label=score_plot,
                                   x_axis=x_axis_epochs,
                                   dir=model.dir_model,
                                   prefix=model.model_full_name,
                                   epochs=str(epoch) + reloading,
                                   verbose=verbose,
                                   lr=lr,
                                   label_color_0=REPO_DATASET[
                                       evaluation_set_reporting[0]],
                                   label_color_1=REPO_DATASET[
                                       evaluation_set_reporting[1]])

        # WARNING : only saving if we decrease not loading former model if we relaod
        if (checkpointing
                and epoch % freq_checkpointing == 0) or (epoch + 1
                                                         == n_epochs):
            if checkpointing_metric != "loss-dev-all" and epoch < STARTING_CHECKPOINTING_WITH_SCORE:
                _checkpointing_metric = "loss-dev-all"
            elif checkpointing_metric != "loss-dev-all":
                _checkpointing_metric = checkpointing_metric
                if epoch == STARTING_CHECKPOINTING_WITH_SCORE:
                    checkpoint_score_saved = -report["score"]
                    printing("Checkoint info : switching "
                             "checkpoint_score_saved to {} : {}".format(
                                 checkpointing_metric, checkpoint_score_saved),
                             verbose_level=1,
                             verbose=verbose)
            elif checkpointing_metric == "loss-dev-all":
                _checkpointing_metric = checkpointing_metric
            else:
                raise (Exception("You missed a case"))

            dir_plot_detailed = simple_plot(
                final_loss=0,
                epoch_ls_1=epoch_ls_dev,
                epoch_ls_2=epoch_ls_dev,
                loss_2=loss_details_template.get("loss_binary", None),
                loss_ls=loss_details_template["loss_seq_prediction"],
                epochs=str(epoch) + reloading,
                label="dev-seq_prediction",
                label_2="dev-binary",
                save=True,
                dir=model.dir_model,
                verbose=verbose,
                verbose_level=1,
                lr=lr,
                prefix=model.model_full_name + "-details",
                show=False) if loss_details_template is not None else None

            dir_plot = simple_plot(final_loss=loss_train,
                                   loss_2=loss_developing,
                                   loss_ls=loss_training,
                                   epochs=str(epoch) + reloading,
                                   epoch_ls_1=epoch_ls_train,
                                   epoch_ls_2=epoch_ls_dev,
                                   label=label_train + "-train",
                                   label_2=label_dev + "-dev",
                                   save=True,
                                   dir=model.dir_model,
                                   verbose=verbose,
                                   verbose_level=1,
                                   lr=lr,
                                   prefix=model.model_full_name,
                                   show=False)

            sanity_check_checkpointing_metric(
                tasks, checkpointing_metric=_checkpointing_metric)

            if _checkpointing_metric != "loss-dev-all" or \
                    (epoch == (STARTING_CHECKPOINTING_WITH_SCORE-1) and checkpointing_metric != "loss-dev-all"):
                # for now only useful when different from loss --> compute metric on dev all and default always
                # assuing unitask thanks to sanity check
                assert overall_report_ls is not None, "ERROR overall_report_ls  was not defined "
                report = rep_tl.get_score(
                    overall_report_ls,
                    metric=TASKS_PARAMETER[tasks[0]].get("default_metric"),
                    data=REPO_DATASET[dev_path[0]],
                    info_score="all",
                    task=tasks[0])
                # Negative cause it's an accuracy
                checkpoint_score = -report["score"]
            else:
                checkpoint_score = loss_dev

            model, checkpoint_score_saved, counter_no_deacrease, saved_epoch, checkpoint_dir_former = \
                    checkpoint(loss_saved=checkpoint_score_saved, loss=checkpoint_score,
                               checkpointing_metric=_checkpointing_metric,
                               model=model, counter_no_decrease=counter_no_deacrease,
                               checkpoint_dir_former=checkpoint_dir_former,
                               saved_epoch=saved_epoch, model_dir=model.dir_model,
                               extra_checkpoint_label="1st_train" if not reload else "start_{}_ep-{}".format(starting_epoch, extra_arg_specific_label),
                               extra_arg_specific_label=extra_arg_specific_label,
                               info_checkpoint={"n_epochs": epoch, "batch_size": batch_size, "optimizer": optimizer,
                                                "gradient_clipping": clipping,
                                                "tasks_schedule_policy": policy,
                                                "teacher_force": teacher_force,
                                                "proportion_pred_train": proportion_pred_train,
                                                "train_data_path": train_path, "dev_data_path": dev_path,
                                                "other": {"error_curves": dir_plot, "loss": loss_dev,
                                                          "sanity_test": {"loss": loss_dev,
                                                                          "data": [REPO_DATASET[_dev_path] for _dev_path in dev_path],
                                                                          "batch_size": batch_size},
                                                          "error_curves_details": dir_plot_detailed,
                                                          "dropout_input": dropout_input,
                                                          "checkpointing_metric": _checkpointing_metric,
                                                          "multi_task_loss_ponderation": multi_task_loss_ponderation,
                                                          "weight_binary_loss": weight_binary_loss*int(auxilliary_task_norm_not_norm),
                                                          "weight_pos_loss": weight_pos_loss*int(auxilliary_task_pos),
                                                          "ponderation_normalize_loss": ponderation_normalize_loss,
                                                          "data": "dev", "seed(np/torch)": (SEED_NP, SEED_TORCH),
                                                          "extend_n_batch": extend_n_batch,
                                                          "lr": lr, "optim_strategy": "lr_constant",
                                                          "time_training(min)": "{0:.2f}".format(total_time/60),
                                                          "average_per_epoch(min)": "{0:.2f}".format((total_time/n_epochs)/60)}},
                               epoch=epoch, epochs=n_epochs-1,
                               keep_all_checkpoint=False if epoch > starting_epoch else True,# we have nothing to remove after 1st epoch
                               verbose=verbose)
            if counter_no_deacrease * freq_checkpointing >= BREAKING_NO_DECREASE:
                printing(
                    "CHECKPOINTING : Breaking training : loss did not decrease on dev for 10 checkpoints "
                    "so keeping model from {} epoch  ".format(saved_epoch),
                    verbose=verbose,
                    verbose_level=0)
                break
        printing(
            "LOSS train {:.3f}, dev {:.3f} for epoch {} out of {} epochs ",
            var=(loss_train, loss_dev, epoch, n_epochs),
            verbose=verbose,
            verbose_level=1)

        if timing:
            print("Summary : {}".format(
                OrderedDict([("_train_ep_time", _train_ep_time),
                             ("_create_iter_time", _create_iter_time),
                             ("_eval_time", _eval_time)])))

    writer.close()
    printing(
        "REPORT : run : \n tensorboard --logdir={} --host=localhost --port=9101  ",
        var=[dir_writer],
        verbose=verbose,
        verbose_level=1)

    #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch="LAST")

    simple_plot(final_loss=loss_dev,
                loss_ls=loss_training,
                loss_2=loss_developing,
                epoch_ls_1=epoch_ls_train,
                epoch_ls_2=epoch_ls_dev,
                epochs=n_epochs,
                save=True,
                dir=model.dir_model,
                label=label_train,
                label_2=label_dev,
                lr=lr,
                prefix=model.model_full_name + "-LAST",
                verbose=verbose)

    return model.model_full_name
Esempio n. 8
0
    word_dictionary, char_dictionary, pos_dictionary,\
    xpos_dictionary, type_dictionary = \
            conllu_data.create_dict(dict_path=dict_path,
                                    train_path=test_path,
                                    dev_path=test_path,
                                    test_path=None,
                                    word_embed_dict={},
                                    dry_run=False,
                                    vocab_trim=True, add_start_char=add_start_char,
                                    )

    printing("char_dictionary".format(char_dictionary.instance2index), verbose=verbose, verbose_level=0)
    V = len(char_dictionary.instance2index)+1
    printing("Character vocabulary is {} length".format(V), verbose=verbose, verbose_level=0)
    model = LexNormalizer(generator=Generator, char_embedding_dim=20, voc_size=V, hidden_size_encoder=50, output_dim=50,
                          hidden_size_sent_encoder=10,
                          hidden_size_decoder=50, verbose=verbose)
    adam = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9)
    loss_former = 1000
    checkpointing_freq = 10

    if checkpointing and False:
        model_dir = os.path.join("../checkpoints","{}-folder".format(model.model_full_name))
        os.mkdir(model_dir)
        printing("Dir {} created".format(model_dir), verbose=verbose, verbose_level=0)

    for epoch in tqdm(range(epochs), disable_tqdm_level(verbose=verbose, verbose_level=0)):

        printing("Starting new epoch {} ".format(epoch), verbose=verbose, verbose_level=1)
        model.train()
        batchIter = data_gen_conllu(test_path, word_dictionary, char_dictionary, pos_dictionary, xpos_dictionary,
def evaluate(batch_size, data_path, tasks, evaluated_task,
             write_report=True, dir_report=None,
             dict_path=None, model_full_name=None,
             score_to_compute_ls=None, mode_norm_ls=None, get_batch_mode_evaluate=True,
             overall_label="ALL_MODELS", overall_report_dir=CHECKPOINT_DIR, bucket=False,
             model_specific_dictionary=True, label_report="",
             print_raw=False,
             model=None,
             compute_mean_score_per_sent=False, write_output=False,
             word_decoding=False, char_decoding=True,
             extra_arg_specific_label="", scoring_func_sequence_pred="BLUE",
             max_char_len=None,
             normalization=True, debug=False,
             force_new_dic=False,
             use_gpu=None, verbose=0):
    assert model_specific_dictionary, "ERROR : only model_specific_dictionary = True supported now"
    # NB : now : you have to load dictionary when evaluating (cannot recompute) (could add in the LexNormalizer ability)
    use_gpu = use_gpu_(use_gpu)
    hardware_choosen = "GPU" if use_gpu else "CPU"
    printing("{} mode ", var=([hardware_choosen]), verbose_level=0, verbose=verbose)
    printing("EVALUATION : evaluating with compute_mean_score_per_sent {}".format(compute_mean_score_per_sent), verbose=verbose, verbose_level=1)

    if mode_norm_ls is None:
        mode_norm_ls = ["all", "NORMED", "NEED_NORM"]
    if write_report:
        assert dir_report is not None
    if model is not None:
        assert model_full_name is None and dict_path is None, \
            "ERROR as model is provided : model_full_name and dict_path should be None"
    else:
        assert model_full_name is not None and dict_path is not None,\
            "ERROR : model_full_name and dict_path required to load model "
    voc_size = None
    if not debug:
        pdb.set_trace = lambda: 1

    model = LexNormalizer(generator=Generator, load=True, model_full_name=model_full_name,
                          tasks=tasks,
                          word_decoding=word_decoding, char_decoding=char_decoding,
                          voc_size=voc_size, use_gpu=use_gpu, dict_path=dict_path, model_specific_dictionary=True,
                          dir_model=os.path.join(PROJECT_PATH, "checkpoints", model_full_name + "-folder"),
                          extra_arg_specific_label=extra_arg_specific_label,
                          loading_sanity_test=True,
                          verbose=verbose
                          ) if model is None else model

    if score_to_compute_ls is None:
        score_to_compute_ls = ["edit", "exact"]
        if model.auxilliary_task_norm_not_norm:
            score_to_compute_ls.extend(SCORE_AUX)

    printing("EVALUATION : Evaluating {} metric with details {}  ", var=[score_to_compute_ls, mode_norm_ls], verbose=verbose, verbose_level=3)

    #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch="EVAL")

    readers_eval = readers_load(datasets=[data_path], tasks=[evaluated_task], word_dictionary=model.word_dictionary,
                                word_dictionary_norm=model.word_nom_dictionary, char_dictionary=model.char_dictionary,
                                pos_dictionary=model.pos_dictionary, xpos_dictionary=model.xpos_dictionary,
                                type_dictionary=model.type_dictionary, use_gpu=use_gpu,
                                norm_not_norm=model.auxilliary_task_norm_not_norm, word_decoder=word_decoding,
                                bucket=bucket,max_char_len=max_char_len,
                                add_start_char=1, add_end_char=1, symbolic_end=model.symbolic_end, symbolic_root=model.symbolic_root,
                                verbose=verbose)
    batchIter = data_gen_multi_task_sampling_batch(tasks=[evaluated_task], readers=readers_eval, batch_size=batch_size,
                                                   word_dictionary=model.word_dictionary,
                                                   char_dictionary=model.char_dictionary,
                                                   pos_dictionary=model.pos_dictionary,
                                                   get_batch_mode=get_batch_mode_evaluate,
                                                   word_dictionary_norm=model.word_nom_dictionary,
                                                   extend_n_batch=1, dropout_input=0,
                                                   verbose=verbose)

    model.eval()
    # the formulas comes from normalization_erros functions
    score_dic_new, formulas = greedy_decode_batch(char_dictionary=model.char_dictionary, verbose=verbose, gold_output=True,
                                                  score_to_compute_ls=score_to_compute_ls, use_gpu=use_gpu,
                                                  write_output=write_output, eval_new=True,
                                                  task_simultaneous_eval=[evaluated_task],
                                                  stat="sum", mode_norm_score_ls=mode_norm_ls,
                                                  label_data=REPO_DATASET[data_path],
                                                  batchIter=batchIter, model=model,
                                                  scoring_func_sequence_pred=scoring_func_sequence_pred,
                                                  compute_mean_score_per_sent=compute_mean_score_per_sent,
                                                  batch_size=batch_size)
    for score_name, formula in formulas.items():
        if isinstance(formula, tuple) and len(formula) > 1:
            (num, denom) = formula
            score_value = score_dic_new[num]/score_dic_new[denom] if score_dic_new[denom] > 0 else None
            #score_value_per_sent =
            if score_dic_new[denom] == 0:
                print("WARNING Score {} has denumerator {} null and numerator {} equal to  {}".format(score_name, denom,
                                                                                                      num,
                                                                                                      score_dic_new[num]
                                                                                                      ))
            reg = re.match("([^-]+)-([^-]+)-.*", num)
            mode_norm = reg.group(1)
            task = reg.group(2)
            # report all in a dictionary
            if not reportint_unavailable:
                report = report_template(metric_val=score_name,
                                         info_score_val=mode_norm,
                                         score_val=score_value,
                                         n_sents=score_dic_new["n_sents"],
                                         avg_per_sent=0,
                                         n_tokens_score=score_dic_new.get(mode_norm+"-"+task+"-gold-count",-1),
                                         model_full_name_val=model.model_full_name,
                                         task=task,
                                         report_path_val=model.arguments["checkpoint_dir"],
                                         evaluation_script_val="exact_match",
                                         model_args_dir=model.args_dir,
                                         data_val=REPO_DATASET[data_path])
            else:
                report = {"report ":0}
            over_all_report_dir = os.path.join(dir_report, model.model_full_name + "-report-" + label_report + ".json")
            over_all_report_dir_all_models = os.path.join(overall_report_dir, overall_label + "-report.json")
            writing_mode = "w" if not os.path.isfile(over_all_report_dir) else "a"
            writing_mode_all_models = "w" if not os.path.isfile(over_all_report_dir_all_models) else "a"
            for dir, writing_mode in zip([over_all_report_dir, over_all_report_dir_all_models],
                                         [writing_mode, writing_mode_all_models]):
                if writing_mode == "w":
                    _all_report = [report]
                    json.dump([report], open(dir, writing_mode))
                    printing("REPORT : Creating new report  {} ".format(dir), verbose=verbose, verbose_level=1)
                else:
                    all_report = json.load(open(dir, "r"))
                    all_report.append(report)
                    json.dump(all_report, open(dir, "w"))
    printing("NEW REPORT metric : {} ", var=[" ".join(list(formulas.keys()))], verbose=verbose, verbose_level=1)
    try:
        printing("NEW REPORT : model specific report saved {} ".format(over_all_report_dir), verbose=verbose, verbose_level=1)
        printing("NEW REPORT : overall report saved {} ".format(over_all_report_dir_all_models), verbose=verbose,verbose_level=1)
    except Exception as e:
        print(Exception(e))
    if writing_mode == "w":
        all_report = _all_report
    return all_report