예제 #1
0
def train_siamese_bert():
    # 读取配置
    # conf = Config()
    cfg_path = "./configs/config_bert.yml"
    cfg = yaml.load(open(cfg_path, encoding='utf-8'), Loader=yaml.FullLoader)
    os.environ["CUDA_VISIBLE_DEVICES"] = "4"
    # vocab: 将 seq转为id,
    vocab = Vocabulary(meta_file='./data/vocab.txt', max_len=cfg['max_seq_len'], allow_unk=1, unk='[UNK]', pad='[PAD]')
    # 读取数据
    data_train, data_val, data_test = data_input.get_lcqmc_bert(vocab)
    # data_train = data_train[:1000]
    print("train size:{},val size:{}, test size:{}".format(
        len(data_train), len(data_val), len(data_test)))
    model = SiamenseBert(cfg)
    model.fit(data_train, data_val, data_test)
    pass
예제 #2
0
파일: train.py 프로젝트: hjzf/dssm-1
def train_siamese_bert():
    # 读取配置
    # conf = Config()
    cfg_path = "./configs/config_bert.yml"
    cfg = yaml.load(open(cfg_path, encoding='utf-8'), Loader=yaml.FullLoader)
    # 自动调参的参数,每次会更新一组搜索空间中的参数
    tuner_params = nni.get_next_parameter()
    cfg.update(tuner_params)
    # vocab: 将 seq转为id,
    vocab = Vocabulary(meta_file='./data/vocab.txt',
                       max_len=cfg['max_seq_len'],
                       allow_unk=1,
                       unk='[UNK]',
                       pad='[PAD]')
    # 读取数据
    data_train, data_val, data_test = data_input.get_lcqmc_bert(vocab)
    # data_train = data_train[:100]
    print("train size:{},val size:{}, test size:{}".format(
        len(data_train), len(data_val), len(data_test)))
    model = SiamenseBert(cfg)
    model.fit(data_train, data_val, data_test)
    pass