コード例 #1
0
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):


    bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json')
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin')



    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    # if no_pretraining:
    #     pass
    # else:
        # model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
        # print("Load pre-trained parameters.")
    # model_bert=torch.nn.DataParallel(model_bert, device_ids=[0, 4, 5])
    model_bert.to(device)
    # model_bert.cuda(2)

    return model_bert, tokenizer, bert_config
コード例 #2
0
def get_bert(path_bert):
    bert_config_file = path_bert + 'bert_config_uncased_L-12_H-768_A-12.json'
    vocab_file = path_bert + 'vocab_uncased_L-12_H-768_A-12.txt'
    init_checkpoint = path_bert + 'pytorch_model_uncased_L-12_H-768_A-12.bin'
    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=True)
    model_bert = BertModel(bert_config)
    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    print("Load pre-trained parameters.")
    if gpu:
        model_bert.to(device)
    return model_bert, tokenizer, bert_config
コード例 #3
0
def get_bert(BERT_PATH):
    bert_config_file = BERT_PATH + "/bert_config_uncased_L-12_H-768_A-12.json"
    vocab_file = BERT_PATH + "/vocab_uncased_L-12_H-768_A-12.txt"
    init_checkpoint = BERT_PATH + "/pytorch_model_uncased_L-12_H-768_A-12.bin"

    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=True)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    print("Load pre-trained BERT parameters.")
    model_bert.to(device)
    return model_bert, tokenizer, bert_config
コード例 #4
0
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):
    bert_config_file = os.path.join(BERT_PT_PATH,
                                    f'bert_config_{bert_type}.json')
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH,
                                   f'pytorch_model_{bert_type}.bin')

    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    model_bert.to(device)

    return model_bert, tokenizer, bert_config
コード例 #5
0
def get_bert(BERT_PT_PATH):

    bert_config_file = os.path.join(BERT_PT_PATH, 'bert_config.json')
    vocab_file = os.path.join(BERT_PT_PATH, 'vocab.txt')
    init_checkpoint = os.path.join(BERT_PT_PATH, 'pytorch_model.bin')


    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file)
    bert_config.print_status()

    model_bert = BertModel(bert_config)

    model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu'))
    print("Load pre-trained parameters.")
    model_bert.to(device)

    return model_bert, tokenizer, bert_config
コード例 #6
0
ファイル: train.py プロジェクト: jaidevd/sqlova
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):

    bert_config_file = os.path.join(BERT_PT_PATH, f"bert_config_{bert_type}.json")
    vocab_file = os.path.join(BERT_PT_PATH, f"vocab_{bert_type}.txt")
    init_checkpoint = os.path.join(BERT_PT_PATH, f"pytorch_model_{bert_type}.bin")

    bert_config = BertConfig.from_json_file(bert_config_file)
    tokenizer = tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case
    )
    bert_config.print_status()

    model_bert = BertModel(bert_config)
    if no_pretraining:
        pass
    else:
        model_bert.load_state_dict(torch.load(init_checkpoint, map_location="cpu"))
        print("Load pre-trained parameters.")
    model_bert.to(device)

    return model_bert, tokenizer, bert_config
コード例 #7
0
ファイル: train.py プロジェクト: WILDCHAP/sqlova_hw
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining):
    bert_config_file = os.path.join(BERT_PT_PATH,
                                    f'bert_config.json')  #bert的配置文件
    vocab_file = os.path.join(BERT_PT_PATH, f'vocab.txt')  #bert的词汇文件
    init_checkpoint = os.path.join(BERT_PT_PATH,
                                   f'pytorch_model.bin')  #bert的预训练模型(不一定有)
    """
    ==BertConfig==该类在bert文件里的modeling里,用bert的配置文件初始化(默认uS)
    <from_json_file>方法用于读取bert配置文件的内容
    """
    bert_config = BertConfig.from_json_file(bert_config_file)
    """
    ==tokenization==bert里的文件
    ==FullTokenizer==类,里面存放词汇信息
    """
    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                           do_lower_case=do_lower_case)

    #毫无作用的输出参数
    bert_config.print_status()
    """
    ==BertModel==该类在bert文件里的modeling里,同样用bert的配置文件初始化,里面有一系列对bert模型的操作(例如添加层,加载参数等...)
    """
    model_bert = BertModel(bert_config)

    if no_pretraining:  #如果不用bert预训练模型,只要它们团队的模型(不需要.bin)
        pass
    else:
        model_bert.load_state_dict(
            torch.load(init_checkpoint,
                       map_location='cpu'))  #加载.bin文件,即加载预训练参数
        print("Load pre-trained parameters.")
    model_bert.to(device)

    #      bert模型       词汇       bert配置文件
    return model_bert, tokenizer, bert_config