Beispiel #1
0
def main():
    # 进行训练所需的数据处理
    vocab_size_source, vocab_size_target = _pre.train_preprocess()

    # 创建模型及相关变量
    transformer = nmt_model.get_model(vocab_size_source, vocab_size_target)

    # 开始训练
    trainer.train(transformer)
Beispiel #2
0
def main():
    # 进行训练所需的数据处理
    vocab_size_source, vocab_size_target = _pre.train_preprocess()

    # 创建模型及相关变量
    transformer = nmt_model.get_model(vocab_size_source, vocab_size_target)

    # 开始训练
    trainer.train(transformer,
                  validation_data=_config.validation_data,
                  validation_split=1 - _config.train_size,
                  validation_freq=_config.validation_freq)
Beispiel #3
0
def main():
    transformer = nmt_model.get_model(2894, 1787)
    learning_rate = trainer.CustomSchedule(_config.d_model)
    optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
    trackables = {'transformer': transformer, 'optimizer': optimizer}
    model_key = 'transformer'
    model_dir = _config.checkpoint_path
    output_dir = _config.checkpoint_path + '_avg_ckpts'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir, exist_ok=True)
    trainer.train(transformer
                  , validation_data=_config.validate_from_txt
                  , validation_split=1 - _config.train_size
                  , validation_freq=_config.validation_freq)
    path = average_checkpoints(model_dir, output_dir, trackables, max_count=8, model_key=model_key)
    print(path)
def start_training():
    """Starts the training asynchronously using the flask executor

    It runs the training based on the DSI_EXECUTE_ON environment variable and at the end,
    removes the future from the executor
    """
    logging.getLogger(__name__).info("Training execution started...")
    # noinspection PyBroadException
    try:
        environment = execution_environment()
        if environment == DSI_EXECUTE_ON_LOCAL:
            if dvc_remote():
                train(dvc_data_repo=dvc_remote(),
                      dvc_ssh_user=ssh_username(),
                      dvc_ssh_password=ssh_password())
            else:
                train()
        elif environment == DSI_EXECUTE_ON_SSH:
            connection = SSHRemoteExecutor(host=ssh_host(),
                                           username=ssh_username(),
                                           password=ssh_password(),
                                           debug_mode=debug_mode()
                                           or flask_args.debug,
                                           port=ssh_port(),
                                           dvc_remote=dvc_remote())

            connection.setup_prerequisites()
            connection.run_training()
            connection.save_model_locally()
        else:
            raise Exception("{0} has a unknown value '{1}'".format(
                DSI_EXECUTE_ON, environment))

        logging.getLogger(__name__).info("Training execution ended!!!")
    except Exception as training_exc:
        # This exception is broad because we cannot forseen all possible exceptions in
        # the DS train code.
        # Also, since this train is beeing executed in a separed thread all exceptions
        # should be catched
        logging.getLogger(__name__).info(
            "Training execution raised an exception...")
        f = io.StringIO()
        traceback.print_exc(file=f)
        f.seek(0)
        logging.getLogger(__name__).error(f.read())
        raise ValueError(training_exc)
def train_wrapper(model):
    # resume train
    resume_count = 1
    if args.resume:
        model.load(args.pretrained_model)
        resume_count = args.resume_count

    # load data
    train_loader = DataLoader(dataset=SunspotData(args.train_data_paths, args),
                              num_workers=args.num_work,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(dataset=SunspotData(args.valid_data_paths, args),
                              num_workers=0,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=False,
                              drop_last=True)

    train_summary = SummaryHelper(save_path=os.path.join(
        args.logs_dir, 'train'),
                                  comment='custom',
                                  flush_secs=20)
    test_summary = SummaryHelper(save_path=os.path.join(args.logs_dir, 'test'),
                                 comment='custom',
                                 flush_secs=20)

    eta = args.sampling_start_value

    for epoch in range(resume_count, args.max_epoch + 1):
        loss = []
        model.train_mode()
        for itr, (imgs, names) in enumerate(train_loader):
            eta, real_input_flag = schedule_sampling(eta, epoch)
            real_input_flag = torch.from_numpy(real_input_flag)
            itr_loss = trainer.train(model, imgs, real_input_flag, args, epoch,
                                     itr)
            loss.append(itr_loss)

        train_loss_avg = np.mean(loss)
        train_summary.add_scalar('train/loss',
                                 train_loss_avg,
                                 global_step=epoch)

        if epoch % args.snapshot_interval == 0:
            model.save(epoch)

        if epoch % args.test_interval == 0:
            model.eval_mode()
            metrics = trainer.test(model, valid_loader, args, epoch,
                                   args.gen_frm_dir, args.is_sunspots)
            test_summary.add_scalars('test', metrics, global_step=epoch)
Beispiel #6
0
        if os.path.exists("./weight/CNN/") == False:
            os.mkdir("./weight/CNN/")

        if FLAGS.train:
            model = cnn_model.Model(FLAGS.keep_prob, FLAGS.class_num)
            print(model)

            try:
                model.load_state_dict(torch.load("./weight/CNN/weight.pt"))
                print("\n***\nCheckpoint found\nModel Restored\n***\n")
            except:
                print("\n***\nNo Checkpoint found\nTraining from begining\n***\n")

            trainer = trainer.Trainer(FLAGS.data, model, FLAGS.bsize, FLAGS.lr, FLAGS.epoch)
            trainer.train()

            torch.save(model.state_dict(), "./weight/CNN/weight.pt")

        if FLAGS.predict:
            model = cnn_model.Model(FLAGS.keep_prob, FLAGS.class_num)
            print(model)

            try:
                model.load_state_dict(torch.load("./weight/CNN/weight.pt"))
                print("\n***\nCheckpoint found\nModel Restored\n***\n")
            except:
                print("\n***\nNo Checkpoint found\nPrediction Abort, train the model first.\n***\n")
                sys.exit()
            
            predictor = trainer.Predictor(FLAGS.data, model)
"""
This file contains the script for executing the training in a remote machine
"""

if __name__ == '__main__':
    import argparse
    import os
    from services.infrastructure.logging import initialize_logging
    from model.trainer import train

    parser = argparse.ArgumentParser(description="Remote training script")
    parser.add_argument("--env",
                        "-e",
                        required=True,
                        type=str,
                        help="Environment folder/name")
    parser.add_argument("--debug",
                        "-d",
                        action="store_true",
                        help="Enables debug mode")

    args = parser.parse_args()

    os.chdir(args.env)
    initialize_logging(path="training-remote.log",
                       remote=True,
                       debug=args.debug)
    train()
                        type=str,
                        default=None,
                        help="dvc remote repository name")
    parser.add_argument("--dvc_user",
                        "-u",
                        required=False,
                        type=str,
                        default=None,
                        help="ssh user for the remote dvc repository")
    parser.add_argument("--dvc_password",
                        "-p",
                        required=False,
                        type=str,
                        default=None,
                        help="ssh password for the remote dvc repository")
    parser.add_argument("--debug",
                        "-d",
                        required=False,
                        default=True,
                        help="Enables debug mode")

    args = parser.parse_args()

    os.chdir(args.env)
    initialize_logging(path="training-remote.log",
                       remote=True,
                       debug=args.debug)
    train(dvc_data_repo=args.dvc_remote,
          dvc_ssh_user=args.dvc_user,
          dvc_ssh_password=args.dvc_password)
Beispiel #9
0
def main():
    # 配置命令行参数
    parser = OptionParser(version='%prog V1.0')

    parser.add_option("-t",
                      "--type",
                      action="store",
                      type="string",
                      dest="type",
                      default="translate",
                      help="TYPE: train/eval/translate")
    if len(sys.argv) > 1 and sys.argv[1] not in ['-t']:
        print('Error:no option ' + sys.argv[1])
        print(parser.format_option_help())
    (options, args) = parser.parse_args()

    if options.type == 'train':
        # 加载句子
        en, ch = _pre.load_sentences(_config.path_to_train_file,
                                     _config.num_sentences)
        # 预处理句子
        en = _pre.preprocess_sentences_en(en, mode=_config.en_tokenize_type)
        ch = _pre.preprocess_sentences_ch(ch, mode=_config.ch_tokenize_type)
        # 生成及保存字典
        tokenizer_en, vocab_size_en = _pre.create_tokenizer(
            sentences=en,
            mode=_config.en_tokenize_type,
            save_path=_config.en_bpe_tokenizer_path)
        tokenizer_ch, vocab_size_ch = _pre.create_tokenizer(
            sentences=ch,
            mode=_config.ch_tokenize_type,
            save_path=_config.ch_tokenizer_path)
        print('vocab_size_en:%d' % vocab_size_en)
        print('vocab_size_ch:%d' % vocab_size_ch)
        # 编码句子
        tensor_en, max_sequence_length_en = _pre.encode_sentences(
            sentences=en,
            tokenizer=tokenizer_en,
            mode=_config.en_tokenize_type)
        tensor_ch, max_sequence_length_ch = _pre.encode_sentences(
            sentences=ch,
            tokenizer=tokenizer_ch,
            mode=_config.ch_tokenize_type)

        # 创建模型及相关变量
        optimizer, train_loss, train_accuracy, transformer = network.get_model(
            vocab_size_en, vocab_size_ch)
        # 开始训练
        trainer.train(tensor_en, tensor_ch, transformer, optimizer, train_loss,
                      train_accuracy)

    elif options.type == 'eval' or options.type == 'translate':
        if_ckpt = _pre.check_point()  # 检测是否有检查点
        if if_ckpt:
            # 加载中英文字典
            tokenizer_en, vocab_size_en = _pre.get_tokenizer(
                path=_config.en_bpe_tokenizer_path,
                mode=_config.en_tokenize_type)
            tokenizer_ch, vocab_size_ch = _pre.get_tokenizer(
                path=_config.ch_tokenizer_path, mode=_config.ch_tokenize_type)
            print('vocab_size_en:%d' % vocab_size_en)
            print('vocab_size_ch:%d' % vocab_size_ch)
            # 创建模型及相关变量
            optimizer, _, _, transformer = network.get_model(
                vocab_size_en, vocab_size_ch)
            # 加载检查点
            network.load_checkpoint(transformer, optimizer)
            if options.type == 'eval':
                # 评估模式
                print('-' * 30)
                print('可选择评价指标: 1.bleu指标  0.退出程序')
                eval_mode = input('请输入选择指标的序号:')
                if eval_mode == '1':
                    eval.calc_bleu(_config.path_to_eval_file, transformer,
                                   tokenizer_en, tokenizer_ch)
                elif eval_mode == '0':
                    print('感谢您的体验!')
                else:
                    print('请输入正确序号')
            elif options.type == 'translate':
                # 翻译模式
                while True:
                    print('-' * 30)
                    print('输入0可退出程序')
                    sentence = input('请输入要翻译的句子 :')
                    if sentence == '0':
                        break
                    else:
                        print(
                            '翻译结果:',
                            translator.translate(sentence, transformer,
                                                 tokenizer_en, tokenizer_ch))
        else:
            print('请先训练才可使用其它功能...')
    elif len(sys.argv) > 2:
        print('Error:no TYPE ' + sys.argv[2])
        print(parser.format_option_help())
from model.conv_net import conv_net
from model.trainer import neural_net_trainer, train
from utils.image_handler import load_datasets

kwargs = {}

X, Yobj, Yemt = load_datasets(img_dir='Images')

# Object labeller - man, woman or child
net_obj = conv_net(input_shape=X[0].shape)
model_obj = neural_net_trainer(net_obj, best_checkpoint_path='best_obj_model')
train(model_obj, X, Yobj, save_path='obj_model.tfl')

# Emotion labeller - happy, sad etc
net_emt = conv_net(input_shape=X.shape)
model_emt = neural_net_trainer(net_emt,
                               best_checkpoint_path='best_emotion_model')
train(model_emt, X, Yemt, save_path='emt_model.tfl')