Exemple #1
0
def train(config) -> None:
    setup_logging('train')
    logger = logging.getLogger()
    logger.info(f'Training: {config}')
    seed_everything(config['SEED'])
    # setup data_loader instances
    data_loader = eval(config["DATA_LOADER"]["TYPE"])(**config["DATA_LOADER"]["ARGS"])
    valid_data_loader = data_loader.split_validation()
    # build model architecture, then print to console
    model = create_model((config["MODEL"]["TYPE"]))(**config["MODEL"]["ARGS"])
    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['N_GPU'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # get function handles of loss and metrics
    criterion = eval(config['LOSS']).to(device)
    metrics = [eval(met) for met in config['METRICS']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    optimizer = create_optimizer(config["OPTIMIZER"]["TYPE"])(**config["OPTIMIZER"]["ARGS"], model=model)
    lr_scheduler, num_epochs = create_scheduler(config["LR_SCHEDULER"]["TYPE"])(**config["LR_SCHEDULER"]["ARGS"],
                                                                                optimizer=optimizer)
    trainer = Trainer(model, criterion, metrics, optimizer,
                      config=config,
                      device=device,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler)
    trainer.train()
Exemple #2
0
    def __init__(
        self,
        args: Dict,
        options: Optional[List["CustomArgs"]] = None,
        timestamp: bool = True,
    ):
        if "device" in args:
            if args["device"] is not None:
                os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(args["device"])
                self.device = [int(device) for device in args["device"]]

        if args["resume"]:
            self.resume: Union[Path, None] = Path(args["resume"])
            self.cfg_fname = self.resume.parent / DEFAULT_CONFIG_FILE_NAME
        elif args["config"]:
            self.resume: Union[Path, None] = None
            self.cfg_fname = Path(args["config"])
        else:
            msg_no_cfg = ("Configuration file need to be specified.\n"
                          "add -c config.json for example.")
            raise ValueError(msg_no_cfg)

        self._config = self.load(self.cfg_fname)
        self._apply_options(args, options)

        # set save_dir where trained model and log will be saved
        save_dir = Path(self._config["trainer"]["save_dir"])
        expr_name = self._config["name"]
        if self.resume:
            time_stamp = self.resume.parent.stem if timestamp else ""
            self._save_dir = save_dir / "models" / expr_name / time_stamp
            self._log_dir = save_dir / "log" / expr_name / time_stamp
            self._test_dir = save_dir / "test" / expr_name / time_stamp

        else:
            time_stamp = datetime.now().strftime(
                r"%m%d_%H%M%S") if timestamp else ""
            self._save_dir = save_dir / "models" / expr_name / time_stamp
            self._log_dir = save_dir / "log" / expr_name / time_stamp
            self._test_dir = save_dir / "test" / expr_name / time_stamp

            self._save_dir.mkdir(parents=True, exist_ok=True)
            self._log_dir.mkdir(parents=True, exist_ok=True)
            self._test_dir.mkdir(parents=True, exist_ok=True)

            self.save(self._save_dir / DEFAULT_CONFIG_FILE_NAME)

        # configurations for logging module
        setup_logging(self.log_dir)
        self._log_level = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
Exemple #3
0
    def __init__(self, args, options='', timestamp=True):
        # parse default and custom cli options
        for opt in options:
            args.add_argument(*opt.flags, default=None, type=opt.type)
        args = args.parse_args()

        self.resume = None
        if args.device:
            os.environ["CUDA_VISIBLE_DEVICES"] = args.device
        if args.resume:
            self.resume = Path(args.resume)
            self.cfg_fname = self.resume.parent / 'config.json'
        if args.config:
            self.cfg_fname = Path(args.config)
        msg_no_cfg = ("Configuration file need to be specified. "
                      "Add '-c config.json', for example.")
        assert self.cfg_fname is not None, msg_no_cfg

        # load config file and apply custom cli options
        config = read_json(self.cfg_fname)
        self.__config = _update_config(config, options, args)
        self.__raw = copy.deepcopy(self.__config)

        # set save_dir where trained model and log will be saved.
        save_dir = Path(
            parse_value(self.config['trainer']['extra_args']['save_dir']))
        timestamp = datetime.now().strftime(
            r'%m%d_%H%M%S') if timestamp else ''

        exper_name = self.config['name']
        self.__save_dir = save_dir / 'models' / exper_name / timestamp
        self.__log_dir = save_dir / 'log' / exper_name / timestamp

        self.save_dir.mkdir(parents=True, exist_ok=True)
        self.log_dir.mkdir(parents=True, exist_ok=True)

        # save updated config file to the checkpoint dir
        if get_global_rank() == 0:
            write_json(self.config, self.save_dir / 'config.json')

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
        logger = self.get_logger('config')
        logger.info(f"Experiment name: {exper_name}")
Exemple #4
0
    def __init__(self, args: Dict, timestamp: bool = True):
        if "device" in args:
            os.environ["CUDA_VISIBLE_DEVICES"] = args['device']
            self.device = [int(device) for device in args['device'].split(',')]

        if 'resume' in args:
            self.resume: Union[Path, None] = Path(args['resume'])
            self.cfg_fname = self.resume.parent / DEFAULT_CONFIG_FILE_NAME
        elif 'config' in args:
            self.resume: Union[Path, None] = None
            self.cfg_fname = Path(args['config'])
        else:
            msg_no_cfg = "Configuration file need to be specified." \
                "add -c config.json for example."
            raise ValueError(msg_no_cfg)

        self._config = self.load(self.cfg_fname)
        self._update(args)

        # set save_dir where trained model and log will be saved
        save_dir = Path(self._config["trainer"]["save_dir"])
        expr_name = self._config["name"]
        if self.resume:
            time_stamp = self.resume.parent.stem if timestamp else ''
            self._save_dir = save_dir / "models" / expr_name / time_stamp
            self._log_dir = save_dir / "log" / expr_name / time_stamp
            self._test_dir = save_dir / "test" / expr_name / time_stamp

        else:
            time_stamp = datetime.now().strftime(
                r"%m%d_%H%M%S") if timestamp else ''
            self._save_dir = save_dir / "models" / expr_name / time_stamp
            self._log_dir = save_dir / "log" / expr_name / time_stamp
            self._test_dir = save_dir / "test" / expr_name / time_stamp

            self._save_dir.mkdir(parents=True, exist_ok=True)
            self._log_dir.mkdir(parents=True, exist_ok=True)
            self._test_dir.mkdir(parents=True, exist_ok=True)

            self.save(self._save_dir / DEFAULT_CONFIG_FILE_NAME)

        # configurations for logging module
        setup_logging(self.log_dir)
        self._log_level = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
    def __init__(self,
                 dataset,
                 batch_size,
                 shuffle,
                 validation_split,
                 num_workers,
                 collate_fn=default_collate,
                 drop_last=False):
        """
        Initiates the DataLoader with the given parameters and initiates the super class.
        :param dataset (Dataset): dataset from which to load the data.
        :param batch_size (int): how many samples per batch to load.
        :param shuffle (bool): set to ``True`` to have the data reshuffled at every epoch.
        :param validation_split (int or float): If integer, treats as amount of validation examples.
                                                If float, treats as percent of validation examples.
        :param num_workers (int): how many subprocesses to use for data loading.
        :param collate_fn (Callable): merges a list of samples to form a mini-batch of Tensor(s).
        :param drop_last determines behavior of last non-full batch
        """
        # Define logger
        if not LOGGER_SETUP:
            setup_logging()
        self.logger = logging.getLogger('BaseDataLoader')
        self.logger.setLevel(logging.INFO)

        self.validation_split = validation_split
        self.shuffle = shuffle

        self.batch_idx = 0
        self.n_samples = len(dataset)

        # Define samplers for the training split and the validation split.
        self.sampler, self.valid_sampler = self._split_sampler(
            self.validation_split)
        self.drop_last = drop_last

        # Define arguments to initiate the superclass.
        self.init_kwargs = {
            'dataset': dataset,
            'batch_size': batch_size,
            'shuffle': self.shuffle,
            'collate_fn': collate_fn,
            'num_workers': num_workers,
            'drop_last': self.drop_last
        }
        super().__init__(sampler=self.sampler, **self.init_kwargs)
    def __init__(self, config, resume=None, modification=None, run_id=None):
        """
        class to parse configuration json file. Handles hyper-parameters for training, initializations of modules,
        checkpoint saving and logging module.
        :param config:          Dict containing configurations, hyper-parameters for training. contents of
                                `config.json` file for example.
        :param resume:          String, path to the checkpoint being loaded.
        :param modification:    Dict keychain:value, specifying position values to be replaced from config dict.
        :param run_id:          Unique Identifier for training processes. Used to save checkpoints and training log.
                                Timestamp is being used as default
        """
        # load config file and apply modification
        self._config = _update_config(config, modification)
        self.resume = resume

        # set save_dir where trained model and log will be saved.
        save_dir = Path(self.config['trainer']['save_dir'])

        exper_name = self.config['name']
        if run_id is None:  # use timestamp as default run-id
            run_id = datetime.now().strftime(r'%m%d_%H%M%S')
        self._save_dir = save_dir / 'models' / exper_name / run_id
        self._log_dir = save_dir / 'log' / exper_name / run_id
        self._predictions_dir = save_dir / 'predictions' / exper_name / run_id
        self.predictions_file_name = self._predictions_dir / 'predictions.csv'

        # make directory for saving checkpoints and log.
        exist_ok = run_id == ''
        self.save_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.log_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.predictions_dir.mkdir(parents=True, exist_ok=exist_ok)

        # save updated config file to the checkpoint dir
        write_json(self.config, self.save_dir / 'config.json')

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
Exemple #7
0
    def init_params(cls, config_fname=None):
        """
        intialize and setup params from config file

        Parameters
        ----------
        config_fname

        Returns
        -------

        """
        if config_fname is None:
            config_fname = os.path.join(Path(__file__).parent.parent, 'config.json')
            logging.info('Using default configuration file: ', config_fname)

        # parse the config file
        _config = read_json(config_fname)
        # specify the saving directory
        proj_name = _config['project_name']
        save_root_dir = Path(_config['resparams']['save_dir'])
        run_id = datetime.now().strftime(r'%m%d_%H%M%S')
        save_model_dir = save_root_dir / proj_name / 'models'
        save_log_dir = save_root_dir / proj_name / 'logs'

        if not os.path.exists(save_model_dir):
            save_model_dir.mkdir(parents=True, exist_ok=True)
        if not os.path.exists(save_log_dir):
            save_log_dir.mkdir(parents=True, exist_ok=True)

        _config['resparams']['run_id'] = run_id
        _config['resparams']['save_model_dir'] = str(save_model_dir)
        _config['resparams']['save_log_dir'] = str(save_log_dir)

        # setup logging
        setup_logging(save_log_dir, run_id=run_id)

        return cls(_config)
Exemple #8
0
import sys
import worker
from shutil import rmtree
from time import sleep
from algthm.utils.file import dir_empty
from cfg.loader import cfg
from multiprocessing import Process
from logger import logger
from dex.core.db import MongoConnection
from dex.core.exceptions.indexer import IndexerBootFailure
from logging import CRITICAL, getLogger
from datetime import datetime
from elasticsearch import Elasticsearch, ElasticsearchException


logger.setup_logging()
logger = logger.get_logger('dex')
pika_logger = getLogger('pika')
pika_logger.setLevel(CRITICAL)


def initialize_workers(num_workers, target, daemon=True):
    """
    Initializes the worker processes.
    """
    workers = []
    process = None

    print '> initializing {} workers ..'.format(num_workers),

    for i in range(num_workers):
Exemple #9
0
                criterion = eval("loss_module." + trainer_loss)
                # metrics
                metrics = [
                    eval("metric_module." + met)
                    for met in eval(trainer_metrics)
                ]

                # optimizer
                optimizer = eval("optimizer_module." + trainer_optimizer)(
                    **sessions.trainer_params[trainer_id]["optimizer_params"],
                    params=model.parameters())
                # lr_scheduler
                lr_scheduler = eval("scheduler_module." + trainer_scheduler)(
                    **sessions.trainer_params[trainer_id]['scheduler_params'],
                    optimizer=optimizer)
                setup_logging(trainer_save_path)
                trainer = Trainer(
                    model=model,
                    criterion=criterion,
                    metrics=metrics,
                    optimizer=optimizer,
                    epoch=trainer_epoch,
                    device=device,
                    data_loader=data_loader,
                    valid_data_loader=valid_data_loader,
                    lr_scheduler=lr_scheduler,
                    sts=[trainer_stop_btn, trainer_chart, trainer_save_path])
                trainer.train()

    if cur == "Eval":
        eval_container = st.sidebar.beta_container()
Exemple #10
0
    def __init__(self, config, resume=None, modification=None, run_id=None):
        """
        解析json配置文件,
        处理超参数,for训练、初始化模型、检查点模型、日志模块
        class to parse configuration json file.
        Handles hyperparameters for training, initializations of modules, checkpoint saving and logging module.

        :param config: 配置的json(字典)
        Dict containing configurations, hyperparameters for training. contents of `rnn_config.json` file for example.
        :param resume: 重启checkpoint的路径
        String, path to the checkpoint being loaded.
        :param modification: 修改项(字典)
        Dict keychain:value, specifying position values to be replaced from config dict.
        :param run_id: 训练唯一标识(用来保存检查点训练日志)
        Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default
        """

        # 训练唯一标识
        if run_id is None:
            # 若run_id为none,用时间戳作为默认run_id
            run_id = datetime.now().strftime(r'%m%d_%H%M%S')

        # 加载配置文件,并更新配置(这个不能乱放,要先放前面)
        self._config = _update_config(config, modification)

        # 项目名
        exper_name = self.config['name']

        # 路径:重启checkpoint
        self.resume = resume

        # 保存路径:模型、模型记录、日志:
        save_dir = Path(self.config['trainer']['saved'])

        # 路径文件名:模型、模型记录
        # /,应该是文件名可以这样连,字符串不行
        self._save_dir = save_dir / 'models' / run_id
        # 路径文件名:日志
        self._log_dir = save_dir / 'log' / run_id

        # 创建目录:模型、模型记录、日志
        """
        run_id为'',exist_ok为True
        parents,父目录不存在,也要创建目录
        exist_ok,?

        pathlib.Path('/my/directory').mkdir(parents=True, exist_ok=True)
        parents:如果父目录不存在,是否创建父目录。
        exist_ok:只有在目录不存在时,创建目录;目录已存在时,不会抛出异常。
        """
        exist_ok = run_id == ''
        self.save_dir.mkdir(parents=True, exist_ok=exist_ok)
        self.log_dir.mkdir(parents=True, exist_ok=exist_ok)

        # 写配置:训练配置(√);模型、模型记录(×)
        # 注意,这里要改,是写死的!
        # 记下run_id的训练配置
        """
        写,这个run_id使用的配置,到(模型/记录)目录
        save updated config file to the checkpoint dir
        """
        write_json(self.config, self.save_dir / 'config.json')

        # 写配置:训练日志
        # 建立日志
        setup_logging(self.log_dir)
        # 日志等级
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }
    def __init__(self,
                 data_dir,
                 gold_type,
                 domain_split,
                 length_split,
                 train=True,
                 valid=False,
                 debug=False):
        """
        Initiates the Bread dataset.
        :param data_dir (str):  Path to the data in which to save/ from which to read the dataset.
        :param train:           True to load the train data, False to load the test data.
        :param valid:           If train is False, ignored. If train is True, then valid=True will load the validation
                                data and valid=False will load the train data.
        :param debug:           True for using a small subset of the dataset.
        """
        # Define logger
        if not LOGGER_SETUP:
            setup_logging()
        self.logger = logging.getLogger('BREAKLogical')
        self.logger.setLevel(logging.INFO)
        self.logger.info("Preparing dataset")
        super(BREAKLogical, self).__init__()

        self.gold_type = gold_type
        self.domain_split = domain_split
        self.length_split = length_split

        # Load dataset and lexicon
        self.dataset_split = 'test'
        if train:
            self.dataset_split = 'train'
            if valid:
                self.dataset_split = 'validation'

        self.logger.info('loading data split:' + self.dataset_split)

        self.logger.info('loading vanilla dataset')
        self.dataset_logical = self.load_dataset(data_dir, 'logical-forms',
                                                 self.logger)
        if self.domain_split:
            self.logger.info('loading domain split dataset')
            self.dataset_logical = self.load_domain_split_dataset(
                data_dir, self.logger)
        elif self.length_split:
            self.logger.info('loading length split dataset')
            self.dataset_logical = self.load_length_split_dataset(
                data_dir, self.logger)
        self.logger.info('dataset ready.')

        # Download spacy language model
        if not spacy.util.is_package("en_core_web_sm"):
            self.logger.info('Downloading spacy english core...')
            run(['python', '-m', 'spacy', 'download', 'en'])

        # Prepare the data parts
        self.ids = self.dataset_logical[self.dataset_split]['question_id']
        self.questions = self.dataset_logical[
            self.dataset_split]['question_text']
        # lexicon is based on vanilla/ domain_split type of dataset_logical
        self.lexicon_str = self.get_lexicon()[self.dataset_split]
        self.logger.info('dataset and lexicon ready.')

        # uses QDMR
        self.qdmrs = [
            format_qdmr(decomp) for decomp in self.dataset_logical[
                self.dataset_split]["decomposition"]
        ]
        self.programs = self.get_programs()

        if debug:
            self.ids = self.ids[:DEBUG_EXAMPLES_AMOUNT]
            self.questions = self.questions[:DEBUG_EXAMPLES_AMOUNT]
            self.qdmrs = self.qdmrs[:DEBUG_EXAMPLES_AMOUNT]
            self.lexicon_str = self.lexicon_str[:DEBUG_EXAMPLES_AMOUNT]
            self.programs = self.programs[:DEBUG_EXAMPLES_AMOUNT]
Exemple #12
0
    def __init__(self, args, options='', timestamp=True):
        # parse default and custom cli options
        for opt in options:
            args.add_argument(*opt.flags, default=None, type=opt.type)
        args = args.parse_args()
        self.args = args

        # set config file from arguments (dataset, lr scheduler, loss fn)
        cfg_fname = None
        if args.dataset and args.lr_scheduler and args.loss_fn:
            cfg_fname = './hyperparams/' + args.lr_scheduler + '/config_' + args.dataset + '_' + args.loss_fn + '_' + args.arch + '.json'

        if args.device:
            os.environ["CUDA_VISIBLE_DEVICES"] = args.device
        if args.resume is None:
            msg_no_cfg = "Configuration file need to be specified. Add '-c config.json', for example."
            if cfg_fname is not None:
                self.cfg_fname = Path(cfg_fname)
            else:
                assert args.config is not None, msg_no_cfg
                self.cfg_fname = Path(args.config)
            config = read_json(self.cfg_fname)
            self.resume = None
        else:
            self.resume = Path(args.resume)
            resume_cfg_fname = self.resume.parent / 'config.json'
            config = read_json(resume_cfg_fname)
            if args.config is not None:
                config.update(read_json(Path(args.config)))

        # load config file and apply custom cli options
        self._config = _update_config(config, options, args)

        # set save_dir where trained model and log will be saved.
        save_dir = Path(self.config['trainer']['save_dir'])

        dataset_name = self.config['name'].split('_')[0]
        model_type = self.config['arch']['type']
        lr_scheduler = self.config['lr_scheduler']['type']
        loss_fn = self.config['train_loss']['type']
        sym_setting = 'sym' if not self.config['trainer']['asym'] else 'asym'
        percent = str(int(self.config['trainer']['percent'] * 100))

        if args.distillation:
            distill_mode = args.distill_mode
            seed = args.dataseed
            self._save_dir = save_dir / 'models' / dataset_name / model_type / lr_scheduler / loss_fn / sym_setting / percent / distill_mode / str(
                int(seed))
            self._log_dir = save_dir / 'log' / dataset_name / model_type / lr_scheduler / loss_fn / sym_setting / percent / distill_mode / str(
                int(seed))
        else:
            self._save_dir = save_dir / 'models' / dataset_name / model_type / lr_scheduler / loss_fn / sym_setting / percent
            self._log_dir = save_dir / 'log' / dataset_name / model_type / lr_scheduler / loss_fn / sym_setting / percent

        self.save_dir.mkdir(parents=True, exist_ok=True)
        self.log_dir.mkdir(parents=True, exist_ok=True)

        # save updated config file to the checkpoint dir
        config_name = 'config_' + str(self.config['seed']) + '.json'
        write_json(self.config, self.save_dir / config_name)

        # configure logging module
        setup_logging(self.log_dir)
        self.log_levels = {
            0: logging.WARNING,
            1: logging.INFO,
            2: logging.DEBUG
        }