parser.add_argument('--encoding', default='utf8', help="The encoding for input and output file.") args = parser.parse_args() dataset_cfg = DatasetCfg(args.data_dir) exp_cfg = ExperimentCfg(args.exp_dir) inputs_file = args.inputs_file outputs_file = args.outputs_file restore_checkpoint = args.restore_checkpoint encoding = args.encoding msg = "Inputs file not exists: {}" assert os.path.isfile(inputs_file), msg.format(inputs_file) logger = Logger.set(os.path.join(exp_cfg.experiment_dir(), 'predict.log')) checkpoint = Checkpoint( checkpoint_dir=exp_cfg.experiment_dir(), filename=exp_cfg.checkpoint_filename(), best_checkpoint=exp_cfg.best_checkpoint(), latest_checkpoint=exp_cfg.latest_checkpoint(), logger=logger) # load params word_vocab = Vocab(words_file) tag_vocab = Vocab(tags_file) params = Params(exp_cfg.params_file()) params.update(Params(dataset_cfg.params_file())) params.set('cuda', torch.cuda.is_available())
# load parser dataset_cfg = DatasetCfg(config.data_dir) exp_cfg = ExperimentCfg(config.base_model_dir) parser = get_parser(data_dir=dataset_cfg.data_dir(), exp_dir=exp_cfg.experiment_dir(), restore_checkpoint=None) # parse command line arguments args = parser.parse_args() restore_checkpoint = args.restore_checkpoint dataset_cfg.set_data_dir(args.data_dir) exp_cfg.set_experiment_dir(args.exp_dir) # set logger # Note: log file will be stored in the `exp_dir` directory logger = Logger.set(exp_cfg.train_log()) # load experiment configuration logger.info("Loading the experiment configurations...") params = Params(exp_cfg.params_file()) logger.info("- done.") # set params params.set('cuda', torch.cuda.is_available()) # load datesets logger.info("Loading the datasets...") # add datasets parameters into params params.update(Params(dataset_cfg.params_file())) trainloader, valloader = load_data(params, dataset_cfg.data_dir(),
args = parser.parse_args() msg = 'Data file {} not found.' assert os.path.isfile(args.data_file), msg.format(args.data_file) msg = '{} directory not found. Please create it first.' assert os.path.isdir(args.data_dir), msg.format(args.data_dir) msg = 'the proportion of dataset to builded must in (0.0, 1.0]' assert (args.data_factor > 0.0) and (args.data_factor <= 1.0), msg msg = 'train factor + val factor + test factor must be equal to 1.0' total = args.train_factor + args.val_factor + args.test_factor assert (1.0 == total), msg dataset_cfg.set_data_dir(args.data_dir) # set and get logger logger = Logger.set(dataset_cfg.log_file()) # build, load and dump datasets builder = Builder(data_factor=args.data_factor, train_factor=args.train_factor, val_factor=args.val_factor, test_factor=args.test_factor, train_name=args.train_name, val_name=args.val_name, test_name=args.test_name, logger=logger) builder.load(args.data_file, encoding='windows-1252') builder.dump(dataset_cfg.data_dir(), dataset_cfg.params_file(), min_count_word=args.min_count_word, min_count_tag=args.min_count_tag,
if __name__ == '__main__': dataset_cfg = DatasetCfg(config.data_dir) exp_cfg = ExperimentCfg(config.base_model_dir) parser = get_parser(data_dir=dataset_cfg.data_dir(), exp_dir=exp_cfg.experiment_dir(), restore_checkpoint=exp_cfg.best_checkpoint(), dataset_name=dataset_cfg.test_name()) args = parser.parse_args() dataset_name = args.dataset_name restore_checkpoint = args.restore_checkpoint dataset_cfg.set_data_dir(args.data_dir) exp_cfg.set_experiment_dir(args.exp_dir) # set logger logger = Logger.set(exp_cfg.evaluate_log()) # load model configuration logger.info("Loading the experiment configurations...") params = Params(exp_cfg.params_file()) # cuda flag params.set('cuda', torch.cuda.is_available()) logger.info("- done.") # load datesets logger.info("Loading the {} dataset...".format(dataset_name)) # add datasets parameters into params params.update(Params(dataset_cfg.params_file())) dataset = load_data(params, dataset_cfg.data_dir(), dataset_name, params['{}_size'.format(dataset_name)]) logger.info("- done.")