def plot_series(errs, dir_out, name):
    """
    Plotting the mean word and character errors

    :param errs: tuples of PHOCNet paths and respective errors
    :param dir_out: output directory
    :param name: plot base-title
    """
    plot_w_err_path = sanity_util.unique_file_name(dir_out, name + '_w_err',
                                                   '.png')
    plot_c_err_path = sanity_util.unique_file_name(dir_out, name + '_c_err',
                                                   '.png')
    # gathering data
    x_ticks = []
    w_errs = []
    c_errs = []
    for n_path, e_dict in errs:
        n_name = os.path.basename(n_path)
        x_ticks.append(n_name)
        w_errs.append(e_dict['mean_w_err'])
        c_errs.append(e_dict['mean_c_err']['mean_pct'])
    # plotting word error
    plt.plot(np.arange(len(w_errs)), w_errs)
    plt.xticks(np.arange(len(w_errs)),
               x_ticks,
               rotation=25,
               rotation_mode="anchor",
               horizontalalignment='right',
               verticalalignment='top')
    fig = plt.gcf()
    fig.set_size_inches(15, 10)
    plt.gca().set_ylabel('WER')
    plt.savefig(plot_w_err_path)
    plt.close(fig)
    plt.clf()
    # plotting character error
    plt.plot(np.arange(len(c_errs)), c_errs)
    plt.xticks(np.arange(len(c_errs)),
               x_ticks,
               rotation=25,
               rotation_mode="anchor",
               horizontalalignment='right',
               verticalalignment='top')
    fig = plt.gcf()
    fig.set_size_inches(15, 10)
    plt.gca().set_ylabel('CER')
    plt.savefig(plot_c_err_path)
    plt.close(fig)
    plt.clf()
 def save(self, dir_out, train=None, test=None, pfx=''):
     """saving the NN, aswell as all relevant meta-data"""
     # creating save path
     sanity_util.safe_dir_path(dir_out)
     # not deleting prior data
     file_path = sanity_util.unique_file_name(dir=dir_out,
                                              fn='nn_{}'.format(pfx),
                                              suffix='.pth')
     file_path_setup = sanity_util.unique_file_name(
         dir=dir_out, fn='setup_{}'.format(pfx), suffix='.json')
     # writing nn
     torch.save(self.nn.state_dict(), file_path)
     # writing the training setup
     with open(file_path_setup, 'w') as f_json:
         json.dump(self.set_up(), f_json)
Exemple #3
0
    def save(self, dir, name='estimator'):
        """
        This method pikles the estimator.
        NOTE: Pre-existing data/ estimators will NOT be overwritten. Please clean up outdated estimators manually.

        :param dir: directory to pikle
        :param name: name of the file
        """
        dir = sanity_util.safe_dir_path(dir)
        file_name = sanity_util.unique_file_name(dir=dir, fn=name, suffix='.pkl')
        with open(file_name, 'wb') as f_out:
            pickle.dump(self, f_out)
def new_logger(dir_out, name):
    """initializes a logger for training"""
    logger = logging.getLogger(name)
    dir_out = sanity_util.safe_dir_path(dir_out)
    log_file_path = sanity_util.unique_file_name(dir=dir_out,
                                                 fn=name,
                                                 suffix='.log')
    hdlr = logging.FileHandler(log_file_path)
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.INFO)
    return logger
Exemple #5
0
def save(dir_out, json_dict, name):
    """
    This method handles saving the errors to json files

    :param dir_out: Directory to save output file at
    :param json_dict: JSON object/ dictionary containing the errors
    :param name: The output files name
    """
    # safe path
    sanity_util.safe_dir_path(dir_out)
    file_name = sanity_util.unique_file_name(dir=dir_out,
                                             fn='{}_ERR'.format(name),
                                             suffix='.json')
    # writing JSON file
    with open(file_name, 'w') as f_json:
        json.dump(json_dict, f_json)
    dir_out = args['dir_out']
    dir_out = sanity_util.safe_dir_path(dir_out)
    name = args['model_name']
    t_phocnet = args['PHOCNet_type'].lower()
    k_fold = int(args['k_fold'])
    n_codes_lvl = int(args['n_codes_lvl'])
    ## loading dataset
    dset, train, test = get_dsets(name_dset, gt_path, imgs_path, alphabet,
                                  scale, k_fold)
    words = list(set(train.words).union(test.words))
    ## estimator
    estimator = get_estimator(str_est, words, alphabet)
    ## loading PHOCNet
    phocnet = get_PHOCNet(t_phocnet, alphabet)
    ## collect error rates
    logger = new_logger(dir_out, name)
    errs = evaluate_dir(dir,
                        device,
                        test,
                        estimator,
                        phocnet,
                        logger,
                        s_batch=s_batch,
                        n_codes_lvl=n_codes_lvl)
    # save error rates for plotting
    file_path = sanity_util.unique_file_name(dir_out, name, '.json')
    with open(file_path, 'w') as f_out:
        json.dump(errs, f_out)
    # plotting and savong the plot
    plot_series(errs, dir_out, name)
                      net_log_dir=os.path.join(dir_out, 'tmp', model_pfx, ''),
                      device=device,
                      logger=log,
                      loss=loss,
                      s_batch=s_batch,
                      augmented=augment_dset,
                      tmp_save_mod=intv_save,
                      alphabet=alphabet,
                      phoc_lvls=phoc_lvls,
                      quant_aug=t_augment,
                      FP=FP)
    """run training"""
    shuffle = args['shuffle'].lower() in ['true', '1', 't', 'yes', 'y']
    trainer.train_on(d_set=train_set, optimizer=optimizer, n_iter=max_iter)
    """save net"""
    ids_train = train_set.ids
    ids_test = test_set.ids
    trainer.save(dir_out=dir_out,
                 train=ids_train,
                 test=ids_test,
                 pfx=model_pfx)
    """saving args, so you have a reference to the training-config of your model"""
    sanity_util.safe_dir_path(dir_out)
    file_path = sanity_util.unique_file_name(dir=dir_out,
                                             fn='args_{}'.format(model_pfx),
                                             suffix='.json')
    with open(file_path, 'w') as args_out:
        t_end_training = time.asctime()
        args['date'] = {'started': t_start_training, 'ended': t_end_training}
        json.dump(args, args_out)
 def save(self, dir, name='estimator'):
     super().save(dir, name)
     # additionally keeping track of configuration
     file_config = sanity_util.unique_file_name(dir, name, '.json')
     with open(file_config, 'w') as f_config:
         json.dump({'reg': self.reg, 'n_dim': self.n_dim}, f_config)