Beispiel #1
0
def test(net,
         data,
         abc,
         cuda,
         visualize,
         batch_size=1,
         tb_writer=None,
         n_iter=0,
         initial_title="",
         loss_function=None,
         is_trian=True,
         output_path=None,
         do_beam_search=False,
         do_results=False,
         word_lexicon=None):
    collate = lambda x: text_collate(x, do_mask=False)
    data_loader = DataLoader(data,
                             batch_size=1,
                             num_workers=2,
                             shuffle=False,
                             collate_fn=collate)
    stop_characters = ['-', '.', '༎', '༑', '།', '་']
    garbage = '-'
    count = 0
    tp = 0
    avg_ed = 0
    avg_no_stop_ed = 0
    avg_accuracy = 0
    avg_loss = 0
    min_ed = 1000
    iterator = tqdm(data_loader)
    all_pred_text = all_label_text = all_im_pathes = []
    test_letter_statistics = Statistics()
    im_by_error = {}

    for i, sample in enumerate(iterator):
        if is_trian and (i > 500):
            break
        imgs = Variable(sample["img"])
        if cuda:
            imgs = imgs.cuda()
        img_seq_lens = sample["im_seq_len"]
        out, orig_seq = net(imgs,
                            img_seq_lens,
                            decode=True,
                            do_beam_search=do_beam_search)
        if loss_function is not None:
            labels_flatten = Variable(sample["seq"]).view(-1)
            label_lens = Variable(sample["seq_len"].int())
            loss = loss_function(
                orig_seq, labels_flatten,
                Variable(torch.IntTensor(np.array(img_seq_lens))),
                label_lens) / batch_size
            avg_loss += loss.data[0]
        gt = (sample["seq"].numpy()).tolist()
        lens = sample["seq_len"].numpy().tolist()
        labels_flatten = Variable(sample["seq"]).view(-1)
        label_lens = Variable(sample["seq_len"].int())
        if output_path is not None:
            preds_text = net.decode(orig_seq, data.get_lexicon())
            all_pred_text = all_pred_text + [
                ''.join(c for c in pd if c != garbage) + '\n'
                for pd in preds_text
            ]

            label_text = net.decode_flatten(labels_flatten, label_lens,
                                            data.get_lexicon())
            all_label_text = all_label_text + [lb + '\n' for lb in label_text]

            all_im_pathes.append(
                sample["im_path"] +
                '\n')  #[imp +'\n' for imp in sample["im_path"]]

        if i == 0:
            if tb_writer is not None:
                print_data_visuals(net, tb_writer, data.get_lexicon(),
                                   sample["img"], labels_flatten, label_lens,
                                   orig_seq, n_iter, initial_title)

        pos = 0
        key = ''
        for i in range(len(out)):
            gts = ''.join(abc[c] for c in gt[pos:pos + lens[i]])

            pos += lens[i]
            if gts == out[i]:
                tp += 1
            else:
                cur_out = ''.join(c for c in out[i] if c != garbage)
                cur_gts = ''.join(c for c in gts if c != garbage)
                cur_out_no_stops = ''.join(c for c in out[i]
                                           if not c in stop_characters)
                cur_gts_no_stops = ''.join(c for c in gts
                                           if not c in stop_characters)
                cur_ed = editdistance.eval(cur_out, cur_gts) / len(cur_gts)
                if word_lexicon is not None:
                    closest_word = get_close_matches(cur_out,
                                                     word_lexicon,
                                                     n=1,
                                                     cutoff=0.2)
                else:
                    closest_word = cur_out

                if len(closest_word) > 0 and closest_word[0] == cur_gts:
                    avg_accuracy += 1

                errors, matches, bp = my_edit_distance_backpointer(
                    cur_out_no_stops, cur_gts_no_stops)
                test_letter_statistics.add_data(bp)
                #my_no_stop_ed = errors / max(len(cur_out_no_stops), len(cur_gts_no_stops))
                #cur_no_stop_ed = editdistance.eval(cur_out_no_stops, cur_gts_no_stops) / max(len(cur_out_no_stops), len(cur_gts_no_stops))
                if do_results:
                    im_by_error[sample["im_path"]] = cur_ed
                my_no_stop_ed = errors / len(cur_gts_no_stops)
                cur_no_stop_ed = editdistance.eval(
                    cur_out_no_stops, cur_gts_no_stops) / len(cur_gts_no_stops)

                if my_no_stop_ed != cur_no_stop_ed:
                    print('old ed: {} , vs. new ed: {}\n'.format(
                        my_no_stop_ed, cur_no_stop_ed))
                avg_no_stop_ed += cur_no_stop_ed
                avg_ed += cur_ed
                if cur_ed < min_ed: min_ed = cur_ed
            count += 1
            if visualize:
                status = "pred: {}; gt: {}".format(out[i], gts)
                iterator.set_description(status)
                img = imgs[i].permute(1, 2,
                                      0).cpu().data.numpy().astype(np.uint8)
                cv2.imshow("img", img)
                key = chr(cv2.waitKey() & 255)
                if key == 'q':
                    break

        #if not visualize:
        #    iterator.set_description("acc: {0:.4f}; avg_ed: {0:.4f}".format(
        #        float(tp) / float(count), float(avg_ed) / float(count)))
    #with open(output_path + '_{}_{}_statistics.pkl'.format(initial_title,n_iter), 'wb') as sf:

    #    pkl.dump(test_letter_statistics.total_actions_hists, sf)

    if do_results and output_path is not None:
        print('printing results! :)')
        sorted_im_by_error = sorted(im_by_error.items(),
                                    key=operator.itemgetter(1))
        sorted_im = [key for (key, value) in sorted_im_by_error]
        all_im_pathes_no_new_line = [
            im.replace('\n', '') for im in all_im_pathes
        ]
        printed_res_best = ""
        printed_res_worst = ""
        for im in sorted_im[:20]:
            im_id = all_im_pathes_no_new_line.index(im)
            pred = all_pred_text[im_id]
            label = all_label_text[im_id]
            printed_res_best += im + '\n' + label + pred

        for im in list(reversed(sorted_im))[:20]:
            im_id = all_im_pathes_no_new_line.index(im)
            pred = all_pred_text[im_id]
            label = all_label_text[im_id]
            printed_res_worst += im + '\n' + label + pred

        with open(
                output_path + '_{}_{}_sorted_images_by_errors.txt'.format(
                    initial_title, n_iter), 'w') as fp:
            fp.writelines([
                key + ',' + str(value) + '\n'
                for (key, value) in sorted_im_by_error
            ])

        with open(
                output_path +
                '_{}_{}_res_on_best.txt'.format(initial_title, n_iter),
                'w') as fp:
            fp.writelines([printed_res_best])
            with open(
                    output_path +
                    '_{}_{}_res_on_worst.txt'.format(initial_title, n_iter),
                    'w') as fp:
                fp.writelines([printed_res_worst])
        os.makedirs(output_path, exist_ok=True)
        with open(
                output_path + '_{}_{}_pred.txt'.format(initial_title, n_iter),
                'w') as fp:
            fp.writelines(all_pred_text)
        with open(
                output_path + '_{}_{}_label.txt'.format(initial_title, n_iter),
                'w') as fp:
            fp.writelines(all_label_text)
        with open(output_path + '_{}_{}_im.txt'.format(initial_title, n_iter),
                  'w') as fp:
            fp.writelines(all_im_pathes)
        stop_characters = ['-', '.', '༎', '༑', '།', '་']

        all_pred_text = [
            ''.join(c for c in line if not c in stop_characters)
            for line in all_pred_text
        ]
        with open(
                output_path +
                '_{}_{}_pred_no_stopchars.txt'.format(initial_title, n_iter),
                'w') as rf:
            rf.writelines(all_pred_text)
        all_label_text = [
            ''.join(c for c in line if not c in stop_characters)
            for line in all_label_text
        ]
        with open(
                output_path +
                '_{}_{}_label_no_stopchars.txt'.format(initial_title, n_iter),
                'w') as rf:
            rf.writelines(all_label_text)

    acc = float(avg_accuracy) / float(count)
    avg_ed = float(avg_ed) / float(count)
    avg_no_stop_ed = float(avg_no_stop_ed) / float(count)
    if loss_function is not None:
        avg_loss = float(avg_loss) / float(count)
        return acc, avg_ed, avg_no_stop_ed, avg_loss
    return acc, avg_ed, avg_no_stop_ed
Beispiel #2
0
def test_attn(net,
              data,
              abc,
              cuda,
              visualize,
              batch_size=1,
              tb_writer=None,
              n_iter=0,
              initial_title="",
              is_trian=True,
              output_path=None):
    collate = lambda x: text_collate(x, do_mask=True)
    net.eval()
    data_loader = DataLoader(data,
                             batch_size=1,
                             num_workers=2,
                             shuffle=False,
                             collate_fn=collate)
    stop_characters = ['-', '.', '༎', '༑', '།', '་']
    count = 0
    tp = 0
    avg_ed = 0
    avg_no_stop_ed = 0
    avg_loss = 0
    min_ed = 1000
    iterator = tqdm(data_loader)
    all_pred_text = all_label_text = all_im_pathes = []
    test_letter_statistics = Statistics()
    with torch.no_grad():
        for i, sample in enumerate(iterator):
            if is_trian and (i > 1000):
                break
            imgs = Variable(sample["img"])
            mask = sample["mask"]
            padded_labels = sample["padded_seq"]
            if cuda:
                imgs = imgs.cuda()
                mask = mask.cuda()
                padded_labels = padded_labels.cuda()

            img_seq_lens = sample["im_seq_len"]

            # Forward propagation
            decoder_outputs, decoder_hidden, other = net(
                imgs, img_seq_lens, mask, None, teacher_forcing_ratio=0)

            # Get loss
            loss = NLLLoss()
            loss.reset()
            zero_labels = torch.zeros_like(padded_labels[:, 1])
            max_label_size = padded_labels.size(1)
            for step, step_output in enumerate(decoder_outputs):
                batch_size = padded_labels.size(0)
                if (step + 1) < max_label_size:
                    loss.eval_batch(
                        step_output.contiguous().view(batch_size, -1),
                        padded_labels[:, step + 1])
                else:
                    loss.eval_batch(
                        step_output.contiguous().view(batch_size, -1),
                        zero_labels)
            # Backward propagation
            total_loss = loss.get_loss().data[0]
            avg_loss += total_loss
            labels_flatten = Variable(sample["seq"]).view(-1)
            label_lens = Variable(sample["seq_len"].int())
            preds_text = net.predict(other)
            padded_labels = (sample["padded_seq"].numpy()).tolist()
            lens = sample["seq_len"].numpy().tolist()
            label_text = net.padded_seq_to_txt(padded_labels, lens)
            if output_path is not None:

                all_pred_text = all_pred_text + [
                    pd + '\n' for pd in preds_text
                ]
                all_label_text = all_label_text + [
                    lb + '\n' for lb in label_text
                ]
                all_im_pathes.append(
                    sample["im_path"] +
                    '\n')  #[imp +'\n' for imp in sample["im_path"]]

            if i == 0:
                if tb_writer is not None:
                    tb_writer.show_images(
                        sample["img"],
                        label_text=[lb + '\n' for lb in label_text],
                        pred_text=[pd + '\n' for pd in preds_text],
                        n_iter=n_iter,
                        initial_title=initial_title)

            pos = 0
            key = ''
            for i in range(len(label_text)):
                cur_out_no_stops = ''.join(c for c in label_text[i]
                                           if not c in stop_characters)
                cur_gts_no_stops = ''.join(c for c in preds_text[i]
                                           if not c in stop_characters)
                cur_ed = editdistance.eval(preds_text[i], label_text[i]) / max(
                    len(preds_text[i]), len(label_text[i]))
                errors, matches, bp = my_edit_distance_backpointer(
                    cur_out_no_stops, cur_gts_no_stops)
                test_letter_statistics.add_data(bp)
                my_no_stop_ed = errors / max(len(cur_out_no_stops),
                                             len(cur_gts_no_stops))
                cur_no_stop_ed = editdistance.eval(
                    cur_out_no_stops, cur_gts_no_stops) / max(
                        len(cur_out_no_stops), len(cur_gts_no_stops))

                if my_no_stop_ed != cur_no_stop_ed:
                    print('old ed: {} , vs. new ed: {}\n'.format(
                        my_no_stop_ed, cur_no_stop_ed))
                avg_no_stop_ed += cur_no_stop_ed
                avg_ed += cur_ed
                if cur_ed < min_ed: min_ed = cur_ed

                count += 1
                if visualize:
                    status = "pred: {}; gt: {}".format(preds_text[i],
                                                       label_text[i])
                    iterator.set_description(status)
                    img = imgs[i].permute(1, 2, 0).cpu().data.numpy().astype(
                        np.uint8)
                    cv2.imshow("img", img)
                    key = chr(cv2.waitKey() & 255)
                    if key == 'q':
                        break
            if key == 'q':
                break
            if not visualize:
                iterator.set_description(
                    "acc: {0:.4f}; avg_ed: {0:.4f}".format(
                        float(tp) / float(count),
                        float(avg_ed) / float(count)))
    with open(
            output_path +
            '_{}_{}_statistics.pkl'.format(initial_title, n_iter), 'wb') as sf:

        pkl.dump(test_letter_statistics.total_actions_hists, sf)

    if output_path is not None:
        os.makedirs(output_path, exist_ok=True)
        print('writing output')
        with open(
                output_path + '_{}_{}_pred.txt'.format(initial_title, n_iter),
                'w') as fp:
            fp.writelines(all_pred_text)
        with open(
                output_path + '_{}_{}_label.txt'.format(initial_title, n_iter),
                'w') as fp:
            fp.writelines(all_label_text)
        with open(output_path + '_{}_{}_im.txt'.format(initial_title, n_iter),
                  'w') as fp:
            fp.writelines(all_im_pathes)
        stop_characters = ['-', '.', '༎', '༑', '།', '་']

        all_pred_text = [
            ''.join(c for c in line if not c in stop_characters)
            for line in all_pred_text
        ]
        with open(
                output_path +
                '_{}_{}_pred_no_stopchars.txt'.format(initial_title, n_iter),
                'w') as rf:
            rf.writelines(all_pred_text)
        all_label_text = [
            ''.join(c for c in line if not c in stop_characters)
            for line in all_label_text
        ]
        with open(
                output_path +
                '_{}_{}_label_no_stopchars.txt'.format(initial_title, n_iter),
                'w') as rf:
            rf.writelines(all_label_text)
    acc = float(tp) / float(count)
    avg_ed = float(avg_ed) / float(count)
    avg_no_stop_ed = float(avg_no_stop_ed) / float(count)
    avg_loss = float(avg_loss) / float(count)
    return acc, avg_ed, avg_no_stop_ed, avg_loss