Exemple #1
0
            for i in range(len(tgt_dict)):
                f.write(u"%s %s\n" % (tgt_dict[i], " ".join('%.5f' % x for x in mapped_tgt_emb[i])))
        print(f'Writing corresponding source embeddings to {pre_src_path}')
        with io.open(pre_src_path, 'w', encoding='utf-8') as f:
            f.write(u"%i %i\n" % src_emb.shape)
            for i in range(len(src_dict)):
                f.write(u"%s %s\n" % (src_dict[i], " ".join('%.5f' % x for x in src_emb[i])))

save_s2t = False
save_t2s = False

args.dico_method = "csls_knn_10"
evaluator = Evaluator(model, src_emb, tgt_emb)
to_log = OrderedDict()
print("--------------------------------- Before refinement ---------------------------------" )
evaluator.all_eval(to_log, s2t=s2t, t2s=t2s)

if args.n_refinement > 0:
    print("--------------------------------- Starting Procrustes Refinement ---------------------------------")
    for n_iter in range(args.n_refinement):
        print("Refinement iteration %d" % (n_iter+1))

        model.build_dictionary(src_emb, tgt_emb, s2t=s2t, t2s=t2s)
        model.procrustes(src_emb, tgt_emb, s2t=s2t, t2s=t2s)

        to_log["iters"] = n_iter
        evaluator.all_eval(to_log, s2t=s2t, t2s=t2s)

        if s2t and to_log[VALIDATION_METRIC_SUP_s2t] > best_valid_s2t_metric:
            model.set_save_s2t_path(save_path + "best_" + args.src_lang + "2" + args.tgt_lang + "_params.bin")
            model.save_best_s2t()
Exemple #2
0
            tic = time.time()

    if args.supervise_id:
        sup_src_batch, sup_tgt_batch = src_in_dict, tgt_in_dict
    else:
        sup_src_batch = sup_tgt_batch = None

    model.flow_step(base_src_idx, base_tgt_idx, src_idx, tgt_idx,
                    training_stats, sup_src_batch, sup_tgt_batch)
    n_words_proc += len(src_idx) * 2

    if train_step > 0 and train_step % args.valid_steps == 0:
        gc.collect()
        to_log = OrderedDict({'train_iters': train_step, 'exp_path': exp_path})
        evaluator.all_eval(to_log,
                           train=True,
                           unsup_eval=args.valid_option == "unsup")

        if to_log[VALIDATION_METRIC_s2t] > best_valid_s2t_metric:
            model.set_save_s2t_path(exp_path + "best_s2t_params.bin")
            model.save_best_s2t()
            best_valid_s2t_metric = to_log[VALIDATION_METRIC_s2t]
            best_valid_csls_s2t_metric = to_log[VALIDATION_METRIC_SUP_s2t]
            best_valid_density_s2t_metric = to_log[DENSITY_METRIC_SUP_s2t]

        if to_log[VALIDATION_METRIC_t2s] > best_valid_t2s_metric:
            model.set_save_t2s_path(exp_path + "best_t2s_params.bin")
            model.save_best_t2s()
            best_valid_t2s_metric = to_log[VALIDATION_METRIC_t2s]
            best_valid_csls_t2s_metric = to_log[VALIDATION_METRIC_SUP_t2s]
            best_valid_density_t2s_metric = to_log[DENSITY_METRIC_SUP_t2s]
Exemple #3
0
        stats_log = [
            '%s: %.4f' % (v, np.mean(stats[k])) for k, v in stats_str
            if len(stats[k]) > 0
        ]
        # stats_log.append('%i samples/s' % int(n_words_proc / (time.time() - tic)))
        logger.info(('%06i - ' % n_epoch) + ' - '.join(stats_log))

        # reset
        tic = time.time()
        # n_words_proc = 0
        for k, _ in stats_str:
            del stats[k][:]

    # embeddings / discriminator evaluation
    to_log = OrderedDict({'n_epoch': n_epoch})
    evaluator.all_eval(to_log)
    evaluator.eval_dis(to_log)

    def default(o):
        if isinstance(o, np.int64): return int(o)
        raise TypeError

    # json.dumps({'value': np.int64(42)}, default=default)

    # JSON log / save best model / end of epoch
    # logger.info("__log__:%s" % json.dumps(to_log, default=default))
    # trainer.save_best(to_log, VALIDATION_METRIC)
    # logger.info('End of epoch %i.\n\n' % n_epoch)

    # update the learning rate (stop if too small)
    trainer.update_lr(to_log, VALIDATION_METRIC)