Example #1
0
    def _update(self, **kwargs):
        params = kwargs
        params["logger"] = params.pop(
            "logger",
            config_logging(logger=params.get("model_name", self.model_name),
                           console_log_level="info"))

        for key in params:
            if key.endswith("_params") and key + "_update" in params:
                params[key].update(params[key + "_update"])

        self.deep_update(**params)

        _vars = ["ctx"]
        for _var in _vars:
            if _var in kwargs:
                try:
                    setattr(self, _var, eval_var(kwargs[_var]))
                except TypeError:
                    pass

        self.validation_result_file = path_append(self.model_dir,
                                                  RESULT_JSON,
                                                  to_str=True)
        self.cfg_path = path_append(self.model_dir, CFG_JSON, to_str=True)
Example #2
0
    def toolbox_init(
        self,
        evaluation_formatter_parameters=None,
        validation_logger_mode="w",
        informer_silent=False,
    ):

        from longling.lib.clock import Clock
        from longling.lib.utilog import config_logging
        from longling.ML.toolkit.formatter import MultiClassEvalFormatter \
            as Formatter
        from longling.ML.toolkit.monitor import MovingLoss, \
            ConsoleProgressMonitor as ProgressMonitor

        self.toolbox = {
            "monitor": dict(),
            "timer": None,
            "formatter": dict(),
        }

        mod = self.mod
        cfg = self.mod.cfg

        # 4.1 todo 定义损失函数
        # bp_loss_f 定义了用来进行 back propagation 的损失函数,
        # 有且只能有一个,命名中不能为 *_\d+ 型

        assert self.loss_function is not None

        loss_monitor = MovingLoss(self.loss_function)

        # 4.1 todo 初始化一些训练过程中的交互信息
        timer = Clock()

        progress_monitor = ProgressMonitor(
            loss_index=[name for name in self.loss_function],
            end_epoch=cfg.end_epoch - 1,
            silent=informer_silent)

        validation_logger = config_logging(
            filename=os.path.join(cfg.model_dir, "result.log"),
            logger="%s-validation" % cfg.model_name,
            mode=validation_logger_mode,
            log_format="%(message)s",
        )

        # set evaluation formatter
        evaluation_formatter_parameters = {} \
            if evaluation_formatter_parameters is None \
            else evaluation_formatter_parameters

        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=mod.cfg.validation_result_file,
            **evaluation_formatter_parameters)

        self.toolbox["monitor"]["loss"] = loss_monitor
        self.toolbox["monitor"]["progress"] = progress_monitor
        self.toolbox["timer"] = timer
        self.toolbox["formatter"]["evaluation"] = evaluation_formatter
Example #3
0
    def toolbox_init(
        self,
        evaluation_formatter_parameters=None,
        validation_logger_mode="w",
        silent=False,
    ):

        from longling import path_append
        from longling.lib.clock import Clock
        from longling.lib.utilog import config_logging
        from longling.ML.toolkit import EpochEvalFMT as Formatter
        from longling.ML.toolkit import MovingLoss, ConsoleProgressMonitor as ProgressMonitor

        self.toolbox = {
            "monitor": dict(),
            "timer": None,
            "formatter": dict(),
        }

        mod = self.mod
        cfg = self.mod.cfg

        assert self.loss_function is not None

        loss_monitor = MovingLoss(self.loss_function)

        # 4 todo 初始化一些训练过程中的交互信息
        timer = Clock()

        progress_monitor = ProgressMonitor(
            indexes={"Loss": [name for name in self.loss_function]},
            values={"Loss": loss_monitor.losses},
            silent=silent,
            player_type="epoch",
            total_epoch=cfg.end_epoch - 1)

        validation_logger = config_logging(
            filename=path_append(cfg.model_dir, "result.log"),
            logger="%s-validation" % cfg.model_name,
            mode=validation_logger_mode,
            log_format="%(message)s",
        )

        # set evaluation formatter
        evaluation_formatter_parameters = {} \
            if evaluation_formatter_parameters is None \
            else evaluation_formatter_parameters

        evaluation_formatter = Formatter(
            logger=validation_logger,
            dump_file=mod.cfg.validation_result_file,
            **evaluation_formatter_parameters)

        self.toolbox["monitor"]["loss"] = loss_monitor
        self.toolbox["monitor"]["progress"] = progress_monitor
        self.toolbox["timer"] = timer
        self.toolbox["formatter"]["evaluation"] = evaluation_formatter
Example #4
0
    def _update(self, **kwargs):
        params = kwargs

        params["logger"] = params.pop(
            "logger",
            config_logging(logger=params.get("model_name", self.model_name),
                           console_log_level="info"))

        for key in params:
            if key.endswith("_params") and key + "_update" in params:
                params[key].update(params[key + "_update"])

        # path_override_check
        path_check_list = [
            "dataset", "root_data_dir", "workspace", "root_model_dir",
            "model_dir"
        ]
        _overridden = {}
        for path_check in path_check_list:
            if kwargs.get(path_check) is None or kwargs[path_check] == getattr(
                    self, "%s" % path_check):
                _overridden[path_check] = False
            else:
                _overridden[path_check] = True

        for param, value in params.items():
            setattr(self, "%s" % param, value)

        def is_overridden(varname):
            return _overridden["%s" % varname]

        # set dataset
        if is_overridden("dataset") and not is_overridden("root_data_dir"):
            kwargs["root_data_dir"] = path_append("$root", "data", "$dataset")
        # set workspace
        if (is_overridden("workspace") or is_overridden("root_model_dir")
            ) and not is_overridden("model_dir"):
            kwargs["model_dir"] = path_append("$root_model_dir", "$workspace")

        # rebuild relevant directory or file path according to the kwargs
        _dirs = [
            "workspace", "root_data_dir", "data_dir", "root_model_dir",
            "model_dir"
        ]
        for _dir in _dirs:
            exp = var2exp(kwargs.get(_dir, getattr(self, _dir)),
                          env_wrap=lambda x: "self.%s" % x)
            setattr(self, _dir, eval(exp))

        self.validation_result_file = path_append(self.model_dir,
                                                  RESULT_JSON,
                                                  to_str=True)
        self.cfg_path = path_append(self.model_dir, CFG_JSON, to_str=True)
Example #5
0
    def _update(self, **kwargs):
        params = kwargs
        params["logger"] = params.pop(
            "logger",
            config_logging(logger=params.get("model_name", self.model_name),
                           console_log_level="info"))

        for key in params:
            if key.endswith("_params") and key + "_update" in params:
                params[key].update(params[key + "_update"])

        self.deep_update(**params)

        self.validation_result_file = path_append(self.model_dir,
                                                  RESULT_JSON,
                                                  to_str=True)
        self.cfg_path = path_append(self.model_dir, CFG_JSON, to_str=True)
Example #6
0
# coding: utf-8
# create by tongshiwei on 2019-9-4

from __future__ import absolute_import
from __future__ import print_function

import argparse
import os

from longling.lib.parser import path_append
from longling.lib.stream import wf_open
from longling.lib.utilog import config_logging, LogLevel

logger = config_logging(logger="glue",
                        level=LogLevel.DEBUG,
                        console_log_level=LogLevel.DEBUG)


def new_model(model_name,
              source_dir,
              directory=None,
              level="project",
              skip_existing=False):  # pragma: no cover
    target_dir = os.path.join(
        directory, model_name) if directory is not None else model_name

    target_dir = os.path.abspath(target_dir)
    if level == "project":
        pass
    elif level == "model":
        source_dir = path_append(
Example #7
0
    def __init__(self, params_path=None, **kwargs):
        """
        Configuration File, including categories:

        * directory setting
        * optimizer setting
        * training parameters
        * equipment
        * parameters saving setting
        * user parameters

        Parameters
        ----------
        params_path: str
            The path to configuration file which is in json format
        kwargs:
            Parameters to be reset.
        """
        super(Configuration, self).__init__(
            logger=config_logging(
                logger=self.model_name,
                console_log_level=LogLevel.INFO
            )
        )

        params = self.class_var
        if params_path:
            params.update(self.load_cfg(cfg_path=params_path))
        params.update(**kwargs)

        for key in params:
            if key.endswith("_params") and key + "_update" in params:
                params[key].update(params[key + "_update"])

        # path_override_check
        path_check_list = ["dataset", "root_data_dir", "workspace", "root_model_dir", "model_dir"]
        _overridden = {}
        for path_check in path_check_list:
            if kwargs.get(path_check) is None or kwargs[path_check] == getattr(self, "%s" % path_check):
                _overridden[path_check] = False
            else:
                _overridden[path_check] = True

        for param, value in params.items():
            setattr(self, "%s" % param, value)

        def is_overridden(varname):
            return _overridden["%s" % varname]

        # set dataset
        if is_overridden("dataset") and not is_overridden("root_data_dir"):
            kwargs["root_data_dir"] = path_append("$root", "data", "$dataset")
        # set workspace
        if (is_overridden("workspace") or is_overridden("root_model_dir")) and not is_overridden("model_dir"):
            kwargs["model_dir"] = path_append("$root_model_dir", "$workspace")

        # rebuild relevant directory or file path according to the kwargs
        _dirs = [
            "workspace", "root_data_dir", "data_dir", "root_model_dir",
            "model_dir"
        ]
        for _dir in _dirs:
            exp = var2exp(
                kwargs.get(_dir, getattr(self, _dir)),
                env_wrap=lambda x: "self.%s" % x
            )
            setattr(self, _dir, eval(exp))

        _vars = [
            "ctx"
        ]
        for _var in _vars:
            if _var in kwargs:
                try:
                    setattr(self, _var, eval_var(kwargs[_var]))
                except TypeError:
                    pass

        self.validation_result_file = path_append(
            self.model_dir, "result.json", to_str=True
        )
        self.cfg_path = path_append(
            self.model_dir, "configuration.json", to_str=True
        )
Example #8
0
    def __init__(self, params_json=None, **kwargs):
        """
        Configuration File, including categories:

        * directory setting
        * optimizer setting
        * training parameters
        * equipment
        * parameters saving setting
        * user parameters

        Parameters
        ----------
        params_json: str
            The path to configuration file which is in json format
        kwargs:
            Parameters to be reset.
        """
        super(Configuration, self).__init__(logger=config_logging(
            logger=self.model_name, console_log_level=LogLevel.INFO))

        params = self.class_var
        if params_json:
            params.update(self.load_cfg(params_json=params_json))
        params.update(**kwargs)

        for param, value in params.items():
            setattr(self, "%s" % param, value)

        # set dataset
        if kwargs.get("dataset") and not kwargs.get("root_data_dir"):
            kwargs["root_data_dir"] = "$root/data/$dataset"
        # set workspace
        if (kwargs.get("workspace") or
                kwargs.get("root_model_dir")) and not kwargs.get("model_dir"):
            kwargs["model_dir"] = "$root_model_dir/$workspace"

        # rebuild relevant directory or file path according to the kwargs
        _dirs = [
            "workspace", "root_data_dir", "data_dir", "root_model_dir",
            "model_dir"
        ]
        for _dir in _dirs:
            exp = var2exp(kwargs.get(_dir, getattr(self, _dir)),
                          env_wrap=lambda x: "self.%s" % x)
            setattr(self, _dir, eval(exp))

        _vars = ["ctx"]
        for _var in _vars:
            if _var in kwargs:
                try:
                    setattr(self, _var, eval_var(kwargs[_var]))
                except TypeError:
                    pass

        self.validation_result_file = path_append(self.model_dir,
                                                  "result.json",
                                                  to_str=True)
        self.cfg_path = path_append(self.model_dir,
                                    "configuration.json",
                                    to_str=True)
Example #9
0
# coding: utf-8
from __future__ import absolute_import

import logging
import time
from contextlib import contextmanager

from longling.lib.utilog import config_logging

_logger = config_logging(logger="clock", console_log_level=logging.INFO)

__all__ = ["Clock", "print_time", "Timer"]


@contextmanager
def print_time(tips: str = "", logger=_logger):
    """
    统计并打印脚本运行时间, 秒为单位

    Parameters
    ----------
    tips: str
    logger: logging.Logger or logging

    Examples
    --------
    >>> with print_time("tips"):
    ...     a = 1 + 1  # The code you want to test

    """
    start_time = time.time()
Example #10
0
import functools
import os
from collections import OrderedDict
from shutil import copyfile as _copyfile, rmtree, copytree as _copytree

from longling import wf_open, PATH_TYPE
from longling.Architecture.utils import binary_legal_input
from longling.lib.path import abs_current_dir, path_append
from longling.lib.regex import default_variable_replace as dvr
from longling.lib.utilog import config_logging, LogLevel
from longling.lib.yaml_helper import FoldedString, ordered_yaml_load, dump_folded_yaml

from . import config

logger = config_logging(logger="arch", console_log_level=LogLevel.INFO)

META = path_append(abs_current_dir(__file__), "meta_docs")
default_variable_replace = functools.partial(dvr, quotation="\'")


def copytree(src, dst, **kwargs):
    """
    Recursively copy a directory tree.

    Change OVERRIDE mode to specify the operation when dst existed.

    Examples
    --------
    .. code-block :: python
Example #11
0
from __future__ import absolute_import
from __future__ import division

import logging
import os

from collections import OrderedDict

from tqdm import tqdm
import requests

from longling.lib.utilog import config_logging
from longling.lib.stream import check_file, wf_open, wf_close
from dev.spider import conf

logger = config_logging(logger="spider", console_log_level=logging.INFO, propagate=False)

COOKIES = conf.cookies
HEADERS = conf.headers


def conf_request(url, cookies=conf.cookies, headers=conf.headers):
    return requests.get(url, cookies=cookies, headers=headers)


def dyn_ua_requests(urls, cookies=conf.cookies, ua_list="doc/agent_list"):
    import random
    with open(ua_list) as f:
        uas = [line.strip() for line in f if '#' not in line]

    for url in urls:
Example #12
0
def get_default_toolbox(loss_function=None,
                        evaluation_formatter_parameters=None,
                        progress_monitor_parameters=None,
                        validation_logger_mode="w",
                        silent=False,
                        configuration=None):  # pragma: no cover
    """
    New in version 1.3.16

    todo: consider whether to keep it

    Notice
    ------
    The developer who modify this document should simultaneously modify the related function in glue
    """

    from longling import path_append
    from longling.lib.clock import Clock
    from longling.lib.utilog import config_logging
    from longling.ML.toolkit import EpochEvalFMT as Formatter
    from longling.ML.toolkit import MovingLoss, ConsoleProgressMonitor as ProgressMonitor

    cfg = configuration

    toolbox = {
        "monitor": dict(),
        "timer": None,
        "formatter": dict(),
    }

    loss_monitor = MovingLoss(loss_function) if loss_function else None

    timer = Clock()

    progress_monitor = ProgressMonitor(
        indexes={"Loss": [name
                          for name in loss_function]} if loss_function else {},
        values={"Loss": loss_monitor.losses} if loss_monitor else {},
        silent=silent,
        **progress_monitor_parameters
        if progress_monitor_parameters is not None else {})

    validation_logger = config_logging(
        filename=path_append(cfg.model_dir, "result.log") if hasattr(
            cfg, "model_dir") else None,
        logger="%s-validation" %
        cfg.model_name if hasattr(cfg, "model_name") else "model",
        mode=validation_logger_mode,
        log_format="%(message)s",
    )

    # set evaluation formatter
    evaluation_formatter_parameters = {} \
        if evaluation_formatter_parameters is None \
        else evaluation_formatter_parameters

    evaluation_formatter = Formatter(logger=validation_logger,
                                     dump_file=getattr(
                                         cfg, "validation_result_file", False),
                                     **evaluation_formatter_parameters)

    toolbox["monitor"]["loss"] = loss_monitor
    toolbox["monitor"]["progress"] = progress_monitor
    toolbox["timer"] = timer
    toolbox["formatter"]["evaluation"] = evaluation_formatter

    return toolbox