F.pad(t, pad=(0, 77 - t.size(0))).view(1, -1) for t in mask_index_ts
    ]

    words_t = torch.cat(words_ts, dim=0)
    sent_t = torch.cat(sent_ts, dim=0)
    mask_index_t = torch.cat(mask_index_ts, dim=0)

    words_num_t = torch.tensor(words_num, dtype=torch.long)
    mask_num_t = torch.tensor(mask_num, dtype=torch.long)

    return words_t, words_num_t, sent_t, mask_index_t, mask_num_t


if __name__ == "__main__":

    char2idx, idx2char = get_chars('../corpus/chars.lst')
    data = TestData('../data/demo_test.txt', char2idx)
    #    words_idx, sent_idx, mask_index, mask_label_idx, label_idx = data[0]
    #    print(words_idx)
    #    print(sent_idx)
    #    print(mask_index)
    #    print(mask_label_idx)
    #    print(label_idx)
    #    words = [idx2char[idx] for idx in words_idx]
    #    print(words)
    #    mask_label = [idx2char[idx] for idx in mask_label_idx]
    #    print(mask_label)
    #    print(len(words_idx), len(sent_idx), len(mask_index), len(mask_label_idx))
    #
    print('size of data = ', len(data))
    #    maxlen = 0
Ejemplo n.º 2
0
import cv2
import utils

image = cv2.imread('1.png', cv2.IMREAD_COLOR)
blue = utils.get_chars(image.copy(), utils.BLUE)
green = utils.get_chars(image.copy(), utils.GREEN)
red = utils.get_chars(image.copy(), utils.RED)

cv2.imshow('Image Gray', blue)
cv2.waitKey(0)
cv2.imshow('Image Gray', green)
cv2.waitKey(0)
cv2.imshow('Image Gray', red)
cv2.waitKey(0)
Ejemplo n.º 3
0
                                     mask_index_t)

        mask_loss, mask_acc = mask_loss_fn(mask_out, mask_label_t, mask_num_t)
        mask_loss = mask_loss / mask_num_t.float().sum()
        sent_cls_loss = sent_cls_loss_fn(sent_cls_out, label_t)
        total_loss = mask_loss + sent_cls_loss

        sent_cls_pred = sent_cls_out.argmax(dim=-1)
        sent_cls_acc = sent_cls_pred.eq(label_t).float().sum() / words_t.size(
            0)

        return total_loss.item(), mask_loss.item(), mask_acc.item(
        ), sent_cls_loss.item(), sent_cls_acc.item()


char2idx, idx2char = get_chars(cfg.chars_path)
train_data = TrainData(cfg.train_path)
valid_data = ValidData(cfg.valid_path)

net = Bert(cfg).to(device)
print(net)
show_model_size(net)

try:
    model_path = os.path.abspath(cfg.load_model_path)
    net.load_state_dict(
        torch.load(
            os.path.join(
                model_path,
                '%s_%.8f_lr_%d_embeddim_%.2f_dropout_%d_layers.pt' %
                (net.name, cfg.lr, cfg.embed_dim, cfg.dropout,
Ejemplo n.º 4
0
    print('{} = {}'.format(key, val))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


def test(net, words_t, words_num_t, sent_t):
    net.eval()
    with torch.no_grad():
        para_cls_out = net(words_t, words_num_t, sent_t)

        para_cls_pred = para_cls_out.argmax(dim=-1)

        return para_cls_out


char2idx, idx2char = get_chars(cfg.chars_path)
label2idx, idx2label = get_chars(cfg.para_cls_labels_path)

test_data = TestData(cfg.test_path, char2idx)

bert_net = Bert(cfg).to(device)

para_cls_net = Para_cls_model(cfg, bert_net).to(device)
print(para_cls_net)
show_model_size(para_cls_net)

try:
    model_path = os.path.abspath(cfg.para_cls_load_model_path)
    para_cls_net.load_state_dict(
        torch.load(
            os.path.join(
Ejemplo n.º 5
0
    return loss.item()

def valid(net, words_t, label_t, loss_fn):

    net.eval()
    with torch.no_grad():
        batch_size = words_t.size(0)
        out = net(words_t)
        loss = loss_fn(out, label_t)
        pred = out.argmax(dim=1)
        acc = pred.eq(label_t).float().sum(dim=-1) / batch_size

        return loss.item(), acc.item()


char2idx, idx2char = get_chars(os.path.join(BASE_PATH, config.chars_path))
label2idx, idx2label = get_labels(os.path.join(BASE_PATH, config.labels_path))
train_data = TrainData(os.path.join(BASE_PATH, config.train_path), char2idx, label2idx)
valid_data = ValidData(os.path.join(BASE_PATH, config.valid_path), char2idx, label2idx)
train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(valid_data, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn)

net = FFN(config).to(device)
print(net)
show_model_size(net)

try:
    model_path = os.path.join(BASE_PATH, config.load_model_path))
    net.load_state_dict(torch.load(os.path.join(model_path, '%s_%.8f_lr_%d_hidsize.pt' % (net.name, config.lr, config.hidden_size))))
    opt = optim.Adam(net.parameters(), lr=config.cur_lr)
    print('load pre-train model succeed.')
Ejemplo n.º 6
0
    sent_ts = [torch.tensor(idx, dtype=torch.long) for idx in sent_idx]

    words_ts = [F.pad(t, pad=(0, 512-t.size(0))).view(1, -1) for t in words_ts]
    sent_ts = [F.pad(t, pad=(0, 512-t.size(0))).view(1, -1) for t in sent_ts]

    words_t = torch.cat(words_ts, dim=0)
    sent_t = torch.cat(sent_ts, dim=0)

    words_num_t = torch.tensor(words_num, dtype=torch.long)

    return words_t, words_num_t, sent_t


if __name__ == "__main__":

    char2idx, idx2char = get_chars('../../corpus/chars.lst')
    label2idx, idx2label = get_chars('../../corpus/labels.lst')
    data = TestData('../data/demo_test.txt', char2idx)
#    words_idx, sent_idx, mask_index, mask_label_idx, label_idx = data[0]
#    print(words_idx)
#    print(sent_idx)
#    print(mask_index)
#    print(mask_label_idx)
#    print(label_idx)
#    words = [idx2char[idx] for idx in words_idx]
#    print(words)
#    mask_label = [idx2char[idx] for idx in mask_label_idx]
#    print(mask_label)
#    print(len(words_idx), len(sent_idx), len(mask_index), len(mask_label_idx))
#
    print('size of data = ', len(data))
        #return words_idx, sent_idx, mask_index, mask_label_idx, label_idx
        res = ' '.join([str(idx) for idx in words_idx]) + '\t'
        res += ' '.join([str(idx) for idx in sent_idx]) + '\t'
        res += ' '.join([str(idx) for idx in mask_index]) + '\t'
        res += ' '.join([str(idx) for idx in mask_label_idx]) + '\t'
        res += label + '\n'
        
        return res
        
    def __len__(self):
        return len(self.data)

    def gen_bert_data(self, output_file):

        with open(output_file, 'w', encoding='utf-8') as f:
            for i in range(len(self.data)):
                f.write(self._gen_item(i))


if __name__ == "__main__":

    if len(sys.argv) != 4:
        print('Using: python %s chars_vocab_path raw_text_path output_idx_data_path')
        sys.exit(1)

    char2idx, idx2char = get_chars(sys.argv[1])
    data = Dataset(sys.argv[2], char2idx)
    data.gen_bert_data(sys.argv[3])
    #print(data._gen_item(0))
    pass