Ejemplo n.º 1
0
#    name_='WW_'+checkpoint_path
#    torch.save(model,os.path.join(checkpoint_DIR,name_))
#    print('model saved to %s / %s' % (checkpoint_DIR,name_))
#    return model

#alphabert  904405 parameters
#lstm       899841 parameters
#bert    108523714 parameters
#bioBERT 108523714 parameters
pretrained_weights = 'bert-base-cased'
bert_model = bert_cls_Model.bert_baseModel(bioBERT=bioBERT)
bert_tokenizer = tokenization_bert.BertTokenizer.from_pretrained(
    pretrained_weights)
try:
    #    bert_model = load_checkpoint('bert_pretrain.pth',bert_model,parallel=parallel)
    bert_model = load_checkpoint(checkpoint_DIR, checkpoint_path, bert_model)
except:
    print('*** No Pretrain_Model ***')
    pass

#device_ids = list(range(rank * n, (rank + 1) * n))

filepath = './data'
filename1a = os.path.join(filepath, 'finetune_train.csv')
data_a = pd.read_csv(filename1a, header=None, encoding='utf-8')
data_train = data_a

filename1b = os.path.join(filepath, 'finetune_valtest.csv')
data_val = pd.read_csv(filename1b, header=None, encoding='utf-8')

data_test = data_val.sample(frac=0.5, random_state=1)
Ejemplo n.º 2
0
                             num_workers=8,
                             collate_fn=task1_dataset.collate_fn_electra)

D2S_test = task1_dataset.task1ds_electra(data_test,
                                         Electratokenizer,
                                         train=False)

D2S_testloader = DataLoader(D2S_test,
                            batch_size=8,
                            shuffle=False,
                            num_workers=8,
                            collate_fn=task1_dataset.collate_fn_electra_test)

try:
    checkpoint_file = './checkpoint_electra'
    Electramodel = load_checkpoint(checkpoint_file, 'electra_task1.pth',
                                   Electramodel)
except:
    print('*** No Pretrain_Model ***')
    pass


def test_electra(DS_model, dloader):
    DS_model.to(device)
    DS_model.eval()
    pred_ = []
    with torch.no_grad():

        t0 = time.time()
        #        step_loss = 0
        for batch_idx, sample in enumerate(dloader):
Ejemplo n.º 3
0
data_test['Cause'] = None
data_test['Effect'] = None

D2S_datatest = task2_dataset.task2_electra_test(ds=data_test,
                                                tokenizer=Electratokenizer)

D2S_testloader = DataLoader(D2S_datatest,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=8,
                            collate_fn=task2_dataset.collate_fn_electra_test)

try:
    checkpoint_file = './checkpoint_bert_task2'
    Electramodel = load_checkpoint(checkpoint_file, 'bert_task2.pth',
                                   Electramodel)
except:
    print('*** No Pretrain_Model ***')
    pass


def test_electra(DS_model, dloader, tokenizer, ds):
    DS_model.to(device)
    DS_model.eval()
    pred_ = []
    with torch.no_grad():

        t0 = time.time()
        #        step_loss = 0
        for batch_idx, sample in enumerate(dloader):
Ejemplo n.º 4
0
        pd_total_loss.to_csv('./result/total_loss_finetune_head.csv', sep=',')
    print(total_loss)

try:
    task = sys.argv[1]
    train_val_test = sys.argv[2]
    print('***** task = ', task)
    print('***** mode = ', train_val_test)
except:
    task = 'test'
    train_val_test = 'test'

if task == 'pretrain':
    d2s_satge1_model = alphaBERT_model.alphaBertForMaskedLM(config)
    try:
        d2s_satge1_model = load_checkpoint(checkpoint_file, 'DS_pretrain.pth',
                                           d2s_satge1_model)
    except:
        print('*** No Pretrain_Model ***')
        pass
    loader = alphaloader.make_loaders(finetune=False,
                                      head=False,
                                      ahead=True,
                                      pretrain=True,
                                      trvte='test')
    stage1_dataloader = loader['stage1_dataloader']
    stage1_test_dataloader = loader['stage1_dataloader_test']
    train_alphaBert_stage1(d2s_satge1_model,
                           stage1_dataloader,
                           stage1_test_dataloader,
                           lr=1e-5,
                           epoch=50,
Ejemplo n.º 5
0
    data_pretrain,
    tokenize_alphabets,
    clamp_size=config['max_position_embeddings'],
)

D2S_prentrainloader = DataLoader(
    D2S_datapretrain,
    batch_size=batch_size,
    shuffle=True,
    num_workers=8,
    collate_fn=task1_dataset_v02.collate_fn_lstm_pretrain)

lstm_model = gruModel.LSTM_baseModel(config)
try:
    checkpoint_file = './checkpoint_lstm'
    lstm_model = load_checkpoint(checkpoint_file, 'lstm_pretrain.pth',
                                 lstm_model)
except:
    print('*** No Pretrain_Model ***')
    pass


def test_lstm(DS_model, dloader):
    DS_model.to(device)
    DS_model.eval()
    pred_ = []
    with torch.no_grad():

        t0 = time.time()
        #        step_loss = 0
        for batch_idx, sample in enumerate(dloader):
Ejemplo n.º 6
0
    print('***** mode = ', train_val_test)
except:
    task = 'test'
    train_val_test = 'test'

alphaloader = alphabet_loaders(datapath=filepath,
                               config=config,
                               tokenize_alphabets=tokenize_alphabets,
                               num_workers=4,
                               batch_size=batch_size,
                               bioBERTcorpus=0)

if task == 'pretrain':
    d2s_satge1_model = alphaBERT_model.alphaBertForMaskedLM(config)
    try:
        d2s_satge1_model = load_checkpoint(checkpoint_file, 'd2s_total.pth',
                                           d2s_satge1_model)
    except:
        print('*** No Pretrain_Model ***')
        pass
    loader = alphaloader.make_loaders(finetune=False,
                                      head=False,
                                      ahead=False,
                                      pretrain=True,
                                      trvte='test')
    stage1_dataloader = loader['stage1_dataloader']
    stage1_test_dataloader = loader['stage1_dataloader_test']
    train_alphaBert_stage1(d2s_satge1_model,
                           stage1_dataloader,
                           stage1_test_dataloader,
                           lr=1e-4,
                           epoch=50,