Exemple #1
0
def train(model, config):
    runner = KerasModelDatasetRunner(model,
                                     model_dir=config['model_dir'],
                                     model_name='dssm',
                                     configs=config,
                                     logger_name='dssm')
    train_dataset = datasets.build_train_dataset(config['train_files'], config)
    eval_dataset = None
    if config['eval_files']:
        eval_dataset = datasets.build_eval_dataset(config['eval_files'],
                                                   config)
    runner.train(train_dataset, val_dataset=None)
    weights = runner.model.get_weights()
    vocab = []
    with open(config['vocab_file'], "r", encoding="utf-8") as f:
        for word in f.readlines():
            vocab.append(word.strip("\n"))
    if len(vocab) != weights[0].shape[0]:
        print("vocab size:{} != weights[0].shape[0]:{}".format(
            len(vocab), weights[0].shape[0]))
    with open(os.path.join(config['model_dir'], "embedding.txt"),
              "w",
              encoding="utf-8") as f:
        f.write(" ".join([str(weights[0].shape[0]),
                          str(weights[0].shape[1])]) + "\n")
        for i, vec in enumerate(weights[0]):
            f.write(" ".join([vocab[i]] + [str(x) for x in vec]) + "\n")
    print('weights:{}, {}'.format(len(weights), weights[0].shape))
    print('weights:{}, {}'.format(weights[1].shape, weights[2].shape))
    print('weights:{}, {}'.format(weights[1], weights[2]))
Exemple #2
0
def train(model, config):
    runner = KerasModelDatasetRunner(model,
                                     model_dir=config['model_dir'],
                                     model_name='dssm',
                                     configs=config,
                                     logger_name='dssm')
    train_dataset = datasets.build_train_dataset(config['train_files'], config)
    eval_dataset = None
    if config['eval_files']:
        eval_dataset = datasets.build_eval_dataset(config['eval_files'],
                                                   config)
    runner.train(train_dataset, val_dataset=eval_dataset)
Exemple #3
0
        model = models.build_cosine_model(models.model_config)
    else:
        raise ValueError('Invalid model: %s' % args.model)

    runner = KerasModelDatasetRunner(model=model,
                                     model_name='mp',
                                     model_dir=args.model_dir,
                                     configs=None)

    if args.action == 'train':
        train_files = [os.path.join(utils.testdat_dir(), 'train.txt')]
        # use train files as validation files, not recommend in actual use
        valid_files = [os.path.join(utils.testdat_dir(), 'train.txt')]
        train_dataset = dataset.build_train_dataset(train_files)
        valid_dataset = dataset.build_eval_dataset(valid_files)
        runner.train(dataset=train_dataset,
                     val_dataset=valid_dataset,
                     ckpt=args.model_dir)
    elif args.action == 'eval':
        eval_files = [os.path.join(utils.testdat_dir(), 'train.txt')]
        eval_dataset = dataset.build_eval_dataset(eval_files)
        runner.eval(dataset=eval_dataset)
    elif args.action == 'predict':
        predict_files = [os.path.join(utils.testdat_dir(), 'train.txt')]
        predict_dataset = dataset.build_predict_dataset(predict_files)
        runner.predict(dataset=predict_dataset)
    elif args.action == 'export':
        runner.export(path=os.path.join(args.model_dir, 'export'), ckpt=None)
    else:
        raise ValueError('Invalid action: %s' % args.action)
Exemple #4
0
    else:
        raise ValueError('Invalid model: %s' % args.model)

    dataset = XYZSameFileDataset(x_tokenizer=tokenizer, y_tokenizer=tokenizer, config=None)

    runner = KerasModelDatasetRunner(
        model=model,
        model_name='dssm',
        model_dir=config['model_dir'],
        configs=config)

    if 'train' == args.action:
        train_files = config['train_files']
        train_dataset = dataset.build_train_dataset(train_files=train_files)
        eval_dataset = dataset.build_eval_dataset(eval_files=config['eval_files']) if config['eval_files'] else None
        runner.train(dataset=train_dataset, val_dataset=eval_dataset, ckpt=None)

    elif 'eval' == args.action:
        if not config['eval_files']:
            raise ValueError('eval_files must not be None in eval mode.')
        eval_dataset = dataset.build_eval_dataset(eval_files=config['eval_files'])
        runner.eval(dataset=eval_dataset, ckpt=None)
        logging.info('Finished to evaluate model.')
    elif 'predict' == args.action:
        if not config['predict_files']:
            raise ValueError('predict_files must not be None in predict mode.')
        predict_dataset = dataset.build_predict_dataset(eval_files=config['eval_files'])
        history = runner.predict(dataset=eval_dataset, ckpt=None)
        logging.info('Finished ti predict files.')
    elif 'export' == args.action:
        runner.export(os.path.join(config['model_dir'], 'export'), ckpt=None)