Beispiel #1
0
def init_logger(logger_name=__name__, output_path=None, level=logging.INFO):
    """
    Initializes a logger for console and file writing.

    :param logger_name: self-explanatory.
    :param output_path: directory or file path where the logs should be saved.
                        By default it will not store file logs.
    :param level: self-explanatory.
    """
    logger = logging.getLogger(logger_name)
    logger.setLevel(level)

    formatter = logging.Formatter("%(asctime)s [%(levelname)s]: %(message)s")

    # adding console output
    stream_handler = logging.StreamHandler()
    stream_handler.setFormatter(formatter)
    logger.addHandler(stream_handler)

    if output_path:
        if is_file_path(output_path):
            safe_mkfdir(output_path)
            if os.path.exists(output_path):
                os.remove(output_path)
        else:
            safe_mkdir(output_path)
            # using the default name of the logger
            default_file_name = "log_" + strftime("%b_%d_%H_%M_%S") + '.txt'
            output_path = os.path.join(output_path, default_file_name)
        file_handler = logging.FileHandler(output_path)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)

    return logger
    def __call__(self, data_path):
        try:
            validate_data_paths(data_path)
        except Exception as e:
            raise e

        safe_mkdir(self.output_folder)
        for dp in listify(data_path):
            for file_path in get_file_paths(dp):
                file_name = os.path.basename(file_path)
                output_file_path = os.path.join(self.output_folder, file_name)
                if not os.path.exists(output_file_path):
                    self._clean_and_save_file(file_path, output_file_path)
        return {"data_path": self.output_folder}
Beispiel #3
0
def postprocess_data(data_path,
                     out_dir_path,
                     min_revs_per_file=None,
                     workers=1,
                     max_revs_per_file=9,
                     early_term=None,
                     logging_period=1000):
    """
    Creates `K` reviews per group files, computes ROUGE 1 vs rest. In this case,
    avoids an expensive online computation of ROUGE.
    """
    logger = init_logger("", output_path=os.path.dirname(out_dir_path))
    dt = MosesDetokenizer()
    detok_func = lambda x: [
        dt.detokenize(_x.split(" "), unescape=False) for _x in x
    ]
    data_pipeline = assemble_postproc_pipeline(
        text_prep_func=detok_func,
        seed=seed,
        min_revs_per_group=min_revs_per_file,
        max_revs_per_group=max_revs_per_file,
        workers=workers)
    logger.info("Writing chunks to: '%s'." % out_dir_path)
    safe_mkdir(out_dir_path)
    chunks_count = 0
    start = time()
    unique_groups = set()
    review_count = 0
    min_rev_per_chunk = float('inf')
    max_rev_per_chunk = float('-inf')
    for dc in data_pipeline.iter(data_path=data_path, early_term=early_term):
        assert len(np.unique(dc[InpDataF.GROUP_ID])) == 1
        group_id = dc[0, InpDataF.GROUP_ID].split("_")[0]
        unique_groups.add(group_id)
        review_count += len(dc)
        min_rev_per_chunk = min(min_rev_per_chunk, len(dc))
        max_rev_per_chunk = max(max_rev_per_chunk, len(dc))
        fp = comb_paths(out_dir_path, "%s.csv" % dc[0][InpDataF.GROUP_ID])
        dc.to_csv(open(fp, encoding='utf-8', mode='w'))
        chunks_count += 1
        if chunks_count % logging_period == 0:
            logger.info("Wrote %d chunks." % chunks_count)
    logger.info("Totally wrote %d chunks." % chunks_count)
    logger.info("Total time elapsed: %f." % (time() - start))
    logger.info("Unique groups: %d." % len(unique_groups))
    logger.info("Total reviews: %d." % review_count)
    logger.info("Min reviews per chunk: %d." % min_rev_per_chunk)
    logger.info("Max reviews per chunk: %d." % max_rev_per_chunk)
Beispiel #4
0
        def after_ep_func(epoch=None):
            new_out_path = comb_paths(out_dir_path, "out_ep%d" % epoch) \
                if epoch else comb_paths(out_dir_path, "out")
            safe_mkdir(new_out_path)

            # saving the state
            if checkpoint_fn is not None and epoch is not None:
                new_checkpoint_fn = checkpoint_fn if epoch is None else \
                    "ep%d_%s" % (epoch, checkpoint_fn)
                out_fp = comb_paths(out_dir_path, new_checkpoint_fn)
                self.imodel.save_state(out_fp)

            # running evaluation against gold summaries
            if summ_eval_data_source is not None:
                self.summ_eval(new_out_path, summ_eval_data_source,
                               **summ_eval_kwargs)
    def __call__(self, parent_folder_path, suffix=None):
        """
        :param parent_folder_path: the path to an existing or non-existing
                                   parent folder that should host experiment
                                    folders.
        :param suffix: TODO!
        :return: a new path where artifacts can be stored.
        """
        parent_folder_path = os.path.join(os.getcwd(), parent_folder_path)
        safe_mkdir(parent_folder_path)

        # 1. create a new folder's name
        new_folder_name = self._create_new_folder_name(parent_folder_path,
                                                       suffix)

        # 2. create a new folder
        new_folder_path = os.path.join(parent_folder_path, new_folder_name)
        os.mkdir(new_folder_path)

        return new_folder_path
Beispiel #6
0
 def setUp(self):
     self.tmp_folder = ".tmp"
     safe_mkdir(self.tmp_folder)
 def safe_make_folder(self, path):
     safe_mkdir(path)