def extract_items(text_in):
    R = []
    _s = [char2id.get(c, 1) for c in text_in]
    _s = np.array([_s])
    _k1, _k2, t, t_max, mask = s_model(torch.LongTensor(_s))
    _k1, _k2 = _k1[0, :, 0], _k2[0, :, 0]
    _kk1s = []
    for i, _kk1 in enumerate(_k1):
        if _kk1 > 0.5:
            _subject = ''
            for j, _kk2 in enumerate(_k2[i:]):
                if _kk2 > 0.5:
                    _subject = text_in[i: i + j + 1]
                    break
            if _subject:
                _k1, _k2 = torch.LongTensor([[i]]), torch.LongTensor([[i + j]])  # np.array([i]), np.array([i+j])
                _o1, _o2 = po_model(t.cuda(), t_max.cuda(), _k1.cuda(), _k2.cuda())
                _o1, _o2 = _o1.cpu().data.numpy(), _o2.cpu().data.numpy()
                _o1, _o2 = np.argmax(_o1[0], 1), np.argmax(_o2[0], 1)

                for i, _oo1 in enumerate(_o1):
                    if _oo1 > 0:
                        for j, _oo2 in enumerate(_o2[i:]):
                            if _oo2 == _oo1:
                                _object = text_in[i: i + j + 1]
                                _predicate = id2predicate[_oo1]
                                # print((_subject, _predicate, _object))
                                R.append((_subject, _predicate, _object))
                                break
        _kk1s.append(_kk1.data.cpu().numpy())
    _kk1s = np.array(_kk1s)
    return list(set(R))
import json
from pdb import set_trace
import numpy as np
from config import set_args
from model import s_model, po_model

if __name__ == '__main__':
    args = set_args()
    # 加载类别  词表
    id2predicate, predicate2id = json.load(
        open('./data/all_50_schemas_me.json'))
    id2predicate = {int(i): j for i, j in id2predicate.items()}
    id2char, char2id = json.load(open('./data/all_chars_me.json'))
    num_classes = len(id2predicate)
    s_model = s_model(len(char2id) + 2, args.char_size, args.hidden_size)
    po_model = po_model(len(char2id) + 2, args.char_size, args.hidden_size, 49)

    s_model.load_state_dict(torch.load('./save_model/s_0.bin'))
    po_model.load_state_dict(torch.load('./save_model/po_0.bin'))

    text_in = '周杰伦出演了电影不能说的秘密。我当时是在电影院看的'
    t_s = [char2id.get(i, 1) for i in text_in]
    _s = torch.LongTensor([t_s])

    R = []
    with torch.no_grad():
        _k1, _k2, t, t_max, mask = s_model(torch.LongTensor(_s))
        _k1, _k2 = _k1[0, :, 0], _k2[0, :, 0]
        _kk1s = []
        for i, _kk1 in enumerate(_k1):
            if _kk1 > 0.5:
dg = data_generator(train_data)
T, S1, S2, K1, K2, O1, O2 = dg.pro_res()


torch_dataset = myDataset(T, S1, S2, K1, K2, O1, O2)
loader = Data.DataLoader(
    dataset=torch_dataset,  # torch TensorDataset format
    batch_size=args.batch_size,  # mini batch size
    shuffle=True,  # random shuffle for training
    num_workers=8,
    collate_fn=collate_fn,  # subprocesses for loading data
)


s_m = model.s_model(len(char2id) + 2, args.char_dim, args.hidden_dim, args).to(args.device)  # .cuda()
po_m = model.po_model(args.char_dim, num_classes=num_classes, args=args).to(args.device)  # .cuda()
params = list(s_m.parameters())

params += list(po_m.parameters())
optimizer = torch.optim.Adam(params, lr=args.learning_rate)

loss = torch.nn.CrossEntropyLoss().to(args.device)
b_loss = torch.nn.BCEWithLogitsLoss().to(args.device)


def extract_items(text_in):
    #  验证测试时,batch_size=1
    R = []
    _s = [char2id.get(c, 1) for c in text_in]
    _s = np.array([_s])
    _k1, _k2, t, h, mask = s_m(torch.LongTensor(_s).to(args.device))
    dg = Data_Generator(train_data)
    T, S1, S2, K1, K2, O1, O2 = dg.pro_res()

    # print('数据量:', len(T))    # 数据量: 21
    torch_dataset = MyDataset(T, S1, S2, K1, K2, O1, O2)

    data_loader = DataLoader(
        dataset=torch_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=4,
        collate_fn=collate_fn,   # subprocesses for loading data
    )

    s_model = s_model(len(char2id) + 2, args.char_size, args.hidden_size)
    po_model = po_model(len(char2id) + 2, args.char_size, args.hidden_size, 49)

    params = list(s_model.parameters())
    params += list(po_model.parameters())

    optimizer = torch.optim.Adam(params, lr=0.001)

    cross_loss = torch.nn.CrossEntropyLoss()
    bce_loss = torch.nn.BCEWithLogitsLoss()
    best_f1 = 0
    best_epoch = 0

    for epoch in range(args.num_epochs):
        for step, loader_res in tqdm(iter(enumerate(data_loader))):
            t_s = loader_res["T"]
            k1 = loader_res["K1"]
Exemple #5
0
torch_dataset = myDataset(T,S1,S2,K1,K2,O1,O2)
loader = Data.DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # random shuffle for training
    num_workers=8,
	collate_fn=collate_fn,      # subprocesses for loading data
)




# print("len",len(id2char))
s_m = model.s_model(len(char2id)+2,CHAR_SIZE,HIDDEN_SIZE).cuda()
po_m = model.po_model(len(char2id)+2,CHAR_SIZE,HIDDEN_SIZE,49).cuda()
params = list(s_m.parameters())

params += list(po_m.parameters())
optimizer = torch.optim.Adam(params, lr=0.001)


loss = torch.nn.CrossEntropyLoss().cuda()
b_loss = torch.nn.BCEWithLogitsLoss().cuda()


def extract_items(text_in):
    R = []
    _s = [char2id.get(c, 1) for c in text_in]
    _s = np.array([_s])
    _k1, _k2,t , t_max,mask = s_m(torch.LongTensor(_s).cuda())