Python np_to_variableの例

プログラミング言語: Python

名前空間/パッケージ名: net_utils

メソッド/関数: np_to_variable

hotexamples.comのコード掲載数: 15

Python np_to_variable - 15件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのnet_utils.np_to_variableの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def eval_detection(opts, net=None):
  if net == None:
    net = OctShuffleMLT(attention=True)
    net_utils.load_net(opts.model, net)
    if opts.cuda:
      net.cuda()

  images, gt_boxes = load_annotation(opts.eval_list)  
  true_positives = 0
  false_positives = 0
  false_negatives = 0
  
  for i in range(images.shape[0]):
    image = np.expand_dims(images[i], axis=0)
    image_boxes_gt = np.array(gt_boxes[i])

    im_data = net_utils.np_to_variable(image, is_cuda=opts.cuda).permute(0, 3, 1, 2)
    seg_pred, rboxs, angle_pred, features = net(im_data)
    
    rbox = rboxs[0].data.cpu()[0].numpy()
    rbox = rbox.swapaxes(0, 1)
    rbox = rbox.swapaxes(1, 2)
    angle_pred = angle_pred[0].data.cpu()[0].numpy()
    segm = seg_pred[0].data.cpu()[0].numpy()
    segm = segm.squeeze(0)

    boxes =  get_boxes(segm, rbox, angle_pred, opts.segm_thresh)

    if (opts.debug):
      print(boxes.shape)
      print(image_boxes_gt.shape)
      print("============")

    false_positives += boxes.shape[0]
    false_negatives += image_boxes_gt.shape[0]
    for box in boxes:
      b = box[0:8].reshape(4,-1)
      poly = Polygon.Polygon(b)
      for box_gt in image_boxes_gt:
        b_gt = box_gt[0:8].reshape(4,-1)
        poly_gt = Polygon.Polygon(b_gt)
        intersection = poly_gt | poly
        union = poly_gt & poly
        iou = (intersection.area()+1.0) / (union.area()+1.0)-1.0
        if iou > 0.5:
          true_positives+=1
          false_negatives-=1
          false_positives-=1
          image_boxes_gt = np.array([bgt for bgt in image_boxes_gt if not np.array_equal(bgt, box_gt)])
          break
  print("tp: {} fp: {} fn: {}".format(true_positives, false_positives, false_negatives))
  precision = true_positives / (true_positives+false_positives)
  recall = true_positives / (true_positives+false_negatives)
  f_score = 2*precision*recall/(precision+recall)
  print("PRECISION: {} \t RECALL: {} \t F SCORE: {}".format(precision, recall, f_score))

コード例 #2

ファイルを表示

def dice_loss(segm_preds, score_maps, training_masks, multi_scale=False):

    score_maps = np.asarray(score_maps, dtype=np.uint8)
    training_masks = np.asarray(training_masks, dtype=np.uint8)

    smaps_var = net_utils.np_to_variable(score_maps, is_cuda=False)
    training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=False)
    segm_pred = segm_preds[0].squeeze(1)
    segm_pred1 = segm_preds[1].squeeze(1)
    inp = segm_pred * training_mask_var
    target = smaps_var * training_mask_var

    smooth = 1.
    iflat = inp.view(-1)
    tflat = target.view(-1)
    intersection = (iflat * tflat).sum()
    result = -((2. * intersection + smooth) /
               (iflat.sum() + tflat.sum() + smooth))
    if multi_scale:
        iou_gts = F.interpolate(smaps_var.unsqueeze(1),
                                size=(segm_pred1.size(1), segm_pred1.size(2)),
                                mode='bilinear',
                                align_corners=True).squeeze(1)
        iou_masks = F.interpolate(training_mask_var.unsqueeze(1),
                                  size=(segm_pred1.size(1),
                                        segm_pred1.size(2)),
                                  mode='bilinear',
                                  align_corners=True).squeeze(1)
        inp2 = segm_pred1 * iou_masks
        target2 = iou_gts * iou_masks

        # smooth = 1.
        iflat2 = inp2.view(-1)
        tflat2 = target2.view(-1)
        intersection2 = (iflat2 * tflat2).sum()
        result += -((2. * intersection2 + smooth) /
                    (iflat2.sum() + tflat2.sum() + smooth))

    return result

コード例 #3

ファイルを表示

ファイル: train.py プロジェクト: josedossantos10/OctShuffle-MLT

def main(opts):

    model_name = 'OCT-E2E-MLT'
    net = OctMLT(attention=True)
    print("Using {0}".format(model_name))

    learning_rate = opts.base_lr
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=opts.base_lr,
                                 weight_decay=weight_decay)
    step_start = 0
    if os.path.exists(opts.model):
        print('loading model from %s' % args.model)
        step_start, learning_rate = net_utils.load_net(args.model, net)

    if opts.cuda:
        net.cuda()

    net.train()

    data_generator = data_gen.get_batch(num_workers=opts.num_readers,
                                        input_size=opts.input_size,
                                        batch_size=opts.batch_size,
                                        train_list=opts.train_list,
                                        geo_type=opts.geo_type)

    dg_ocr = ocr_gen.get_batch(num_workers=2,
                               batch_size=opts.ocr_batch_size,
                               train_list=opts.ocr_feed_list,
                               in_train=True,
                               norm_height=norm_height,
                               rgb=True)

    train_loss = 0
    bbox_loss, seg_loss, angle_loss = 0., 0., 0.
    cnt = 0
    ctc_loss = CTCLoss()

    ctc_loss_val = 0
    box_loss_val = 0
    good_all = 0
    gt_all = 0

    best_step = step_start
    best_loss = 1000000
    best_model = net.state_dict()
    best_optimizer = optimizer.state_dict()
    best_learning_rate = learning_rate
    max_patience = 3000
    early_stop = False

    for step in range(step_start, opts.max_iters):

        # batch
        images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next(
            data_generator)
        im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(
            0, 3, 1, 2)
        start = timeit.timeit()
        try:
            seg_pred, roi_pred, angle_pred, features = net(im_data)
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            continue
        end = timeit.timeit()

        # backward

        smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda)
        training_mask_var = net_utils.np_to_variable(training_masks,
                                                     is_cuda=opts.cuda)
        angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4],
                                            is_cuda=opts.cuda)
        geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]],
                                          is_cuda=opts.cuda)

        try:
            loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred,
                            angle_gt, roi_pred, geo_gt)
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            continue

        bbox_loss += net.box_loss_value.data.cpu().numpy()
        seg_loss += net.segm_loss_value.data.cpu().numpy()
        angle_loss += net.angle_loss_value.data.cpu().numpy()

        train_loss += loss.data.cpu().numpy()
        optimizer.zero_grad()

        try:

            if step > 10000:  #this is just extra augumentation step ... in early stage just slows down training
                ctcl, gt_b_good, gt_b_all = process_boxes(images,
                                                          im_data,
                                                          seg_pred[0],
                                                          roi_pred[0],
                                                          angle_pred[0],
                                                          score_maps,
                                                          gt_idxs,
                                                          gtso,
                                                          lbso,
                                                          features,
                                                          net,
                                                          ctc_loss,
                                                          opts,
                                                          debug=opts.debug)
                ctc_loss_val += ctcl.data.cpu().numpy()[0]
                loss = loss + ctcl
                gt_all += gt_b_all
                good_all += gt_b_good

            imageso, labels, label_length = next(dg_ocr)
            im_data_ocr = net_utils.np_to_variable(imageso,
                                                   is_cuda=opts.cuda).permute(
                                                       0, 3, 1, 2)
            features = net.forward_features(im_data_ocr)
            labels_pred = net.forward_ocr(features)

            probs_sizes = torch.IntTensor(
                [(labels_pred.permute(2, 0, 1).size()[0])] *
                (labels_pred.permute(2, 0, 1).size()[1]))
            label_sizes = torch.IntTensor(
                torch.from_numpy(np.array(label_length)).int())
            labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int())
            loss_ocr = ctc_loss(labels_pred.permute(2, 0,
                                                    1), labels, probs_sizes,
                                label_sizes) / im_data_ocr.size(0) * 0.5

            loss_ocr.backward()
            loss.backward()

            optimizer.step()
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            pass
        cnt += 1
        if step % disp_interval == 0:

            if opts.debug:

                segm = seg_pred[0].data.cpu()[0].numpy()
                segm = segm.squeeze(0)
                cv2.imshow('segm_map', segm)

                segm_res = cv2.resize(score_maps[0],
                                      (images.shape[2], images.shape[1]))
                mask = np.argwhere(segm_res > 0)

                x_data = im_data.data.cpu().numpy()[0]
                x_data = x_data.swapaxes(0, 2)
                x_data = x_data.swapaxes(0, 1)

                x_data += 1
                x_data *= 128
                x_data = np.asarray(x_data, dtype=np.uint8)
                x_data = x_data[:, :, ::-1]

                im_show = x_data
                try:
                    im_show[mask[:, 0], mask[:, 1], 1] = 255
                    im_show[mask[:, 0], mask[:, 1], 0] = 0
                    im_show[mask[:, 0], mask[:, 1], 2] = 0
                except:
                    pass

                cv2.imshow('img0', im_show)
                cv2.imshow('score_maps', score_maps[0] * 255)
                cv2.imshow('train_mask', training_masks[0] * 255)
                cv2.waitKey(10)

            train_loss /= cnt
            bbox_loss /= cnt
            seg_loss /= cnt
            angle_loss /= cnt
            ctc_loss_val /= cnt
            box_loss_val /= cnt

            if train_loss < best_loss:
                best_step = step
                best_model = net.state_dict()
                best_loss = train_loss
                best_learning_rate = learning_rate
                best_optimizer = optimizer.state_dict()
            if best_step - step > max_patience:
                print("Early stopped criteria achieved.")
                save_name = os.path.join(
                    opts.save_path,
                    'BEST_{}_{}.h5'.format(model_name, best_step))
                state = {
                    'step': best_step,
                    'learning_rate': best_learning_rate,
                    'state_dict': best_model,
                    'optimizer': best_optimizer
                }
                torch.save(state, save_name)
                print('save model: {}'.format(save_name))
                opts.max_iters = step
                early_stop = True
            try:
                print(
                    'epoch %d[%d], loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f in %.3f'
                    % (step / batch_per_epoch, step, train_loss, bbox_loss,
                       seg_loss, angle_loss, ctc_loss_val,
                       good_all / max(1, gt_all), end - start))
                print('max_memory_allocated {}'.format(
                    torch.cuda.max_memory_allocated()))
            except:
                import sys, traceback
                traceback.print_exc(file=sys.stdout)
                pass

            train_loss = 0
            bbox_loss, seg_loss, angle_loss = 0., 0., 0.
            cnt = 0
            ctc_loss_val = 0
            good_all = 0
            gt_all = 0
            box_loss_val = 0

        #if step % valid_interval == 0:
        #  validate(opts.valid_list, net)
        if step > step_start and (step % batch_per_epoch == 0):
            save_name = os.path.join(opts.save_path,
                                     '{}_{}.h5'.format(model_name, step))
            state = {
                'step': step,
                'learning_rate': learning_rate,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
                'max_memory_allocated': torch.cuda.max_memory_allocated()
            }
            torch.save(state, save_name)
            print('save model: {}\tmax memory: {}'.format(
                save_name, torch.cuda.max_memory_allocated()))
    if not early_stop:
        save_name = os.path.join(opts.save_path, '{}.h5'.format(model_name))
        state = {
            'step': step,
            'learning_rate': learning_rate,
            'state_dict': net.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        torch.save(state, save_name)
        print('save model: {}'.format(save_name))

コード例 #4

ファイルを表示

ファイル: ocr_test_utils.py プロジェクト: nhh1501/E2E_MLT_VN

def test(net,
         codec,
         args,
         list_file='/home/busta/data/icdar_ch8_validation/ocr_valid.txt',
         norm_height=32,
         max_samples=1000000):

    codec_rev = {}
    index = 4
    for i in range(0, len(codec)):
        codec_rev[codec[i]] = index
        index += 1

    net = net.eval()
    #list_file = '/mnt/textspotter/tmp/90kDICT32px/train_list.txt'
    #list_file = '/home/busta/data/Challenge2_Test_Task3_Images/gt.txt'
    #list_file = '/home/busta/data/90kDICT32px/train_icdar_ch8.txt'
    fout = open('/tmp/ch8_valid.txt', 'w')
    fout_ocr = open('/tmp/ocr_valid.txt', 'w')

    dir_name = os.path.dirname(list_file)
    images = []
    with open(list_file, "r") as ins:
        for line in ins:
            images.append(line.strip())
            #if len(images) > 1000:
            #  break

    scripts = [
        '', 'DIGIT', 'LATIN', 'ARABIC', 'BENGALI', 'HANGUL', 'CJK', 'HIRAGANA',
        'KATAKANA'
    ]

    conf_matrix = np.zeros((len(scripts), len(scripts)), dtype=np.int)

    gt_script = {}
    ed_script = {}
    correct_ed1_script = {}
    correct_script = {}
    count_script = {}
    for scr in scripts:
        gt_script[scr] = 0
        ed_script[scr] = 0
        correct_script[scr] = 0
        correct_ed1_script[scr] = 0
        count_script[scr] = 0

    it = 0
    it2 = 0
    correct = 0
    correct_ed1 = 0
    ted = 0
    gt_all = 0
    images_count = 0
    bad_words = []

    for img in images:

        imageNo = it2
        #imageNo = random.randint(0, len(images) - 1)
        if imageNo >= len(images) or imageNo > max_samples:
            break

        image_name = img

        spl = image_name.split(",")
        delim = ","
        if len(spl) == 1:
            spl = image_name.split(" ")
            delim = " "
        image_name = spl[0].strip()
        gt_txt = ''
        if len(spl) > 1:
            gt_txt = spl[1].strip()
            if len(spl) > 2:
                gt_txt += delim + spl[2]

            if len(gt_txt) > 1 and gt_txt[0] == '"' and gt_txt[-1] == '"':
                gt_txt = gt_txt[1:len(gt_txt) - 1]

        it2 += 1
        if len(gt_txt) == 0:
            print(images[imageNo])
            continue

        if image_name[-1] == ',':
            image_name = image_name[0:-1]

        img_nameo = image_name
        image_name = '{0}/{1}'.format(dir_name, image_name)
        img = cv2.imread(image_name)

        if img is None:
            print(image_name)
            continue

        scale = norm_height / float(img.shape[0])
        width = int(img.shape[1] * scale)
        width = max(8, int(round(width / 4)) * 4)

        scaled = cv2.resize(img, (int(width), norm_height))
        #scaled = scaled[:, :, ::-1]
        scaled = np.expand_dims(scaled, axis=0)

        scaled = np.asarray(scaled, dtype=np.float)
        scaled /= 128
        scaled -= 1

        try:
            scaled_var = net_utils.np_to_variable(scaled,
                                                  is_cuda=args.cuda).permute(
                                                      0, 3, 1, 2)
            x = net.forward_features(scaled_var)
            ctc_f = net.forward_ocr(x)
            ctc_f = ctc_f.data.cpu().numpy()
            ctc_f = ctc_f.swapaxes(1, 2)

            labels = ctc_f.argmax(2)
            det_text, conf, dec_s, _ = print_seq_ext(labels[0, :], codec)
        except:
            print('bad image')
            det_text = ''

        det_text = det_text.strip()
        gt_txt = gt_txt.strip()

        try:
            if 'ARABIC' in ud.name(gt_txt[0]):
                #gt_txt = gt_txt[::-1]
                det_text = det_text[::-1]
        except:
            continue

        it += 1

        scr_count = [0, 0, 0, 0, 0, 0, 0, 0, 0]
        scr_count = np.array(scr_count)

        for c_char in gt_txt:
            assigned = False
            for idx, scr in enumerate(scripts):
                if idx == 0:
                    continue
                symbol_name = ud.name(c_char)
                if scr in symbol_name:
                    scr_count[idx] += 1
                    assigned = True
                    break
            if not assigned:
                scr_count[0] += 1

        maximum_indices = np.where(scr_count == np.max(scr_count))
        script = scripts[maximum_indices[0][0]]

        det_count = [0, 0, 0, 0, 0, 0, 0, 0, 0]
        det_count = np.array(det_count)
        for c_char in det_text:
            assigned = False
            for idx, scr in enumerate(scripts):
                if idx == 0:
                    continue
                try:
                    symbol_name = ud.name(c_char)
                    if scr in symbol_name:
                        det_count[idx] += 1
                        assigned = True
                        break
                except:
                    pass
            if not assigned:
                det_count[0] += 1

        maximum_indices_det = np.where(det_count == np.max(det_count))
        script_det = scripts[maximum_indices_det[0][0]]

        conf_matrix[maximum_indices[0][0], maximum_indices_det[0][0]] += 1

        edit_dist = distance(det_text.lower(), gt_txt.lower())
        ted += edit_dist
        gt_all += len(gt_txt)

        gt_script[script] += len(gt_txt)
        ed_script[script] += edit_dist
        images_count += 1

        fout_ocr.write('{0}, "{1}"\n'.format(os.path.basename(image_name),
                                             det_text.strip()))

        if det_text.lower() == gt_txt.lower():
            correct += 1
            correct_ed1 += 1
            correct_script[script] += 1
            correct_ed1_script[script] += 1
        else:
            if edit_dist == 1:
                correct_ed1 += 1
                correct_ed1_script[script] += 1
            image_prev = "<img src=\"{0}\" height=\"32\" />".format(img_nameo)
            bad_words.append(
                (gt_txt, det_text, edit_dist, image_prev, img_nameo))
            print('{0} - {1} / {2:.2f} - {3:.2f}'.format(
                det_text, gt_txt, correct / float(it), ted / 3.0))

        count_script[script] += 1
        fout.write('{0}|{1}|{2}|{3}\n'.format(os.path.basename(image_name),
                                              gt_txt, det_text, edit_dist))

    print('Test accuracy: {0:.3f}, {1:.2f}, {2:.3f}'.format(
        correct / float(images_count), ted / 3.0, ted / float(gt_all)))

    itf = open("per_script_accuracy.csv", "w")
    itf.write(
        'Script & Accuracy & Edit Distance & ed1 & Ch instances & Im Instances \\\\\n'
    )
    for scr in scripts:
        correct_scr = correct_script[scr]
        correct_scr_ed1 = correct_ed1_script[scr]
        all = count_script[scr]
        ted_scr = ed_script[scr]
        gt_all_scr = gt_script[scr]
        print(' Script:{3} Acc : {0:.3f}, {1:.2f}, {2:.3f}, {4}'.format(
            correct_scr / float(max(all, 1)), ted_scr / 3.0,
            ted_scr / float(max(gt_all_scr, 1)), scr, gt_all_scr))

        itf.write(
            '{0} & {1:.3f} & {5:.3f} &  {2:.3f} & {3} & {4} \\\\\n'.format(
                scr.title(), correct_scr / float(max(all, 1)),
                ted_scr / float(max(gt_all_scr, 1)), gt_all_scr, all,
                correct_scr_ed1 / float(max(all, 1))))

    itf.write('{0} & {1:.3f} & {5:.3f} &  {2:.3f} & {3} & {4} \\\\\n'.format(
        'Total', correct / float(max(images_count, 1)),
        ted / float(max(gt_all, 1)), gt_all, images_count,
        correct_ed1 / float(max(images_count, 1))))
    itf.close()

    print(conf_matrix)
    np.savetxt("conf_matrix.csv",
               conf_matrix,
               delimiter=' & ',
               fmt='%d',
               newline=' \\\\\n')

    itf = open("conf_matrix_out.csv", "w")
    itf.write(' & ')
    delim = ""
    for scr in scripts:
        itf.write(delim)
        itf.write(scr.title())
        delim = " & "
    itf.write('\\\\\n')

    script_no = 0
    with open("conf_matrix.csv", "r") as ins:
        for line in ins:
            line = scripts[script_no].title() + " & " + line
            itf.write(line)
            script_no += 1
            if script_no >= len(scripts):
                break

    fout.close()
    fout_ocr.close()
    net.train()

    pd.options.display.max_rows = 9999
    #pd.options.display.max_cols = 9999

    if len(bad_words) > 0:
        wworst = sorted(bad_words, key=lambda x: x[2])

        ww = np.asarray(wworst, np.object)
        ww = ww[0:1500, :]
        df2 = pd.DataFrame({
            'gt': ww[:, 0],
            'pred': ww[:, 1],
            'ed': ww[:, 2],
            'image': ww[:, 3]
        })

        html = df2.to_html(escape=False)
        report = open('{0}/ocr_bad.html'.format(dir_name), 'w')
        report.write(html)
        report.close()

        wworst = sorted(bad_words, key=lambda x: x[2], reverse=True)

        ww = np.asarray(wworst, np.object)
        ww = ww[0:1500, :]
        df2 = pd.DataFrame({
            'gt': ww[:, 0],
            'pred': ww[:, 1],
            'ed': ww[:, 2],
            'image': ww[:, 3]
        })

        html = df2.to_html(escape=False)
        report = open('{0}/ocr_not_sobad.html'.format(dir_name), 'w')
        report.write(html)
        report.close()

    return correct / float(images_count), ted

コード例 #5

ファイルを表示

ファイル: train.1.py プロジェクト: windzhougithub/FOTS.pytorch-1

def main(opts):

  nclass = len(alphabet) + 1
  model_name = 'E2E-MLT'
  net = OwnModel(attention=True, nclass=nclass)
  print("Using {0}".format(model_name))
  if opts.cuda:
    net.cuda()
  learning_rate = opts.base_lr
  optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay)

  ### 第一种：只修改conv11的维度 
  # model_dict = net.state_dict()
  # if os.path.exists(opts.model):
  #     # 载入预训练模型
  #     print('loading pretrained model from %s' % opts.model)
  #     # pretrained_model = OwnModel(attention=True, nclass=7325)
  #     pretrained_model = ModelResNetSep2(attention=True, nclass=7500)
  #     pretrained_model.load_state_dict(torch.load(opts.model)['state_dict'])
  #     pretrained_dict = pretrained_model.state_dict()
  #
  #     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'conv11' not in k and 'rnn' not in k}
  #     # 2. overwrite entries in the existing state dict
  #     model_dict.update(pretrained_dict)
  #     # 3. load the new state dict
  #     net.load_state_dict(model_dict)

  ### 第二种：直接接着前面训练
  if os.path.exists(opts.model):
    print('loading model from %s' % args.model)
    step_start, learning_rate = net_utils.load_net(args.model, net, optimizer)
  ### 
  
  step_start = 0
  net.train()

  converter = strLabelConverter(alphabet)
  ctc_loss = CTCLoss()

  e2edata = E2Edataset(train_list=opts.train_list)
  e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=4, shuffle=True, collate_fn=E2Ecollate)
  
  train_loss = 0
  bbox_loss, seg_loss, angle_loss = 0., 0., 0.
  cnt = 0
  ctc_loss_val = 0
  ctc_loss_val2 = 0
  box_loss_val = 0
  gt_g_target = 0
  gt_g_proc = 0
  
  
  for step in range(step_start, opts.max_iters):

    loss = 0

    # batch
    images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next(data_generator)
    im_data = net_utils.np_to_variable(images.transpose(0, 3, 1, 2), is_cuda=opts.cuda)
    # im_data = torch.from_numpy(images).type(torch.FloatTensor).permute(0, 3, 1, 2).cuda()           # permute(0,3,1,2)和cuda的先后顺序有影响
    start = timeit.timeit()
    try:
      seg_pred, roi_pred, angle_pred, features = net(im_data)
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      continue
    end = timeit.timeit()
    
    # for EAST loss
    smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda)
    training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=opts.cuda)
    angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4], is_cuda=opts.cuda)
    geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]], is_cuda=opts.cuda)
    
    try:
      loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred, angle_gt, roi_pred, geo_gt)
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      continue
      
    bbox_loss += net.box_loss_value.data.cpu().numpy() 
    seg_loss += net.segm_loss_value.data.cpu().numpy()
    angle_loss += net.angle_loss_value.data.cpu().numpy()  
    train_loss += loss.data.cpu().numpy()
    
       
    try:
      # 10000步之前都是用文字的标注区域训练的
      if step > 10000 or True: #this is just extra augumentation step ... in early stage just slows down training
        # ctcl, gt_target , gt_proc = process_boxes(images, im_data, seg_pred[0], roi_pred[0], angle_pred[0], score_maps, gt_idxs, gtso, lbso, features, net, ctc_loss, opts, converter, debug=opts.debug)
        ctcl= process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=True)
        gt_target = 1
        gt_proc = 1

        ctc_loss_val += ctcl.data.cpu().numpy()[0]
        loss = ctcl
        gt_g_target = gt_target
        gt_g_proc = gt_proc
        train_loss += ctcl.item()
      
      # -训练ocr识别部分的时候，采用一个data_generater生成
      # imageso, labels, label_length = next(dg_ocr)              # 其中应该有对倾斜文本的矫正
      # im_data_ocr = net_utils.np_to_variable(imageso, is_cuda=opts.cuda).permute(0, 3, 1, 2)
      # features = net.forward_features(im_data_ocr)
      # labels_pred = net.forward_ocr(features)
      # probs_sizes =  torch.IntTensor( [(labels_pred.permute(2,0,1).size()[0])] * (labels_pred.permute(2,0,1).size()[1]) )
      # label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int() )
      # labels = torch.IntTensor( torch.from_numpy(np.array(labels)).int() )
      # loss_ocr = ctc_loss(labels_pred.permute(2,0,1), labels, probs_sizes, label_sizes) / im_data_ocr.size(0) * 0.5
      # loss_ocr.backward()
      # ctc_loss_val2 += loss_ocr.item()

      net.zero_grad()
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      pass


    cnt += 1
    if step % disp_interval == 0:
      if opts.debug:
        
        segm = seg_pred[0].data.cpu()[0].numpy()
        segm = segm.squeeze(0)
        cv2.imshow('segm_map', segm)
        
        segm_res = cv2.resize(score_maps[0], (images.shape[2], images.shape[1]))
        mask = np.argwhere(segm_res > 0)
        
        x_data = im_data.data.cpu().numpy()[0]
        x_data = x_data.swapaxes(0, 2)
        x_data = x_data.swapaxes(0, 1)
        
        x_data += 1
        x_data *= 128
        x_data = np.asarray(x_data, dtype=np.uint8)
        x_data = x_data[:, :, ::-1]
        
        im_show = x_data
        try:
          im_show[mask[:, 0], mask[:, 1], 1] = 255 
          im_show[mask[:, 0], mask[:, 1], 0] = 0 
          im_show[mask[:, 0], mask[:, 1], 2] = 0
        except:
          pass
        
        cv2.imshow('img0', im_show) 
        cv2.imshow('score_maps', score_maps[0] * 255)
        cv2.imshow('train_mask', training_masks[0] * 255)
        cv2.waitKey(10)
      
      train_loss /= cnt
      bbox_loss /= cnt
      seg_loss /= cnt
      angle_loss /= cnt
      ctc_loss_val /= cnt
      ctc_loss_val2 /= cnt
      box_loss_val /= cnt
      try:
        print('epoch %d[%d], loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, gt_t/gt_proc:[%d/%d] lv2 %.3f' % (
          step / batch_per_epoch, step, train_loss, bbox_loss, seg_loss, angle_loss, ctc_loss_val, gt_g_target, gt_g_proc , ctc_loss_val2))
      except:
        import sys, traceback
        traceback.print_exc(file=sys.stdout)
        pass
    
      train_loss = 0
      bbox_loss, seg_loss, angle_loss = 0., 0., 0.
      cnt = 0
      ctc_loss_val = 0
      good_all = 0
      gt_all = 0
      box_loss_val = 0
      
    # for save mode
    #  validate(opts.valid_list, net)
    if step > step_start and (step % batch_per_epoch == 0):
      save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step))
      state = {'step': step,
               'learning_rate': learning_rate,
              'state_dict': net.state_dict(),
              'optimizer': optimizer.state_dict()}
      torch.save(state, save_name)
      print('save model: {}'.format(save_name))

コード例 #6

ファイルを表示

ファイル: align_demo.py プロジェクト: windzhougithub/FOTS.pytorch-1

  if args.cuda:
    print('Using cuda ...')
    net = net.cuda()

  imagelist = glob.glob(args.test_folder)
  with torch.no_grad():
    for path in imagelist:
      # path = '/home/yangna/deepblue/OCR/data/ICDAR2015/ch4_test_images/img_405.jpg'
      im = cv2.imread(path)

      im_resized, (ratio_h, ratio_w) = resize_image(im, scale_up=False)
      images = np.asarray([im_resized], dtype=np.float)
      images /= 128
      images -= 1
      im_data = net_utils.np_to_variable(images.transpose(0, 3, 1, 2), is_cuda=args.cuda)
      seg_pred, rboxs, angle_pred, features = net(im_data)

      rbox = rboxs[0].data.cpu()[0].numpy()                   # 转变成h,w,c
      rbox = rbox.swapaxes(0, 1)
      rbox = rbox.swapaxes(1, 2)

      angle_pred = angle_pred[0].data.cpu()[0].numpy()

      segm = seg_pred[0].data.cpu()[0].numpy()
      segm = segm.squeeze(0)

      draw2 = np.copy(im_resized)
      boxes =  get_boxes(segm, rbox, angle_pred, args.segm_thresh)

      img = Image.fromarray(draw2)

コード例 #7

ファイルを表示

ファイル: eval.py プロジェクト: wisdal/NAVI-STR

            print(img_name)

            img = cv2.imread(img_name)

            #font = cv2.FONT_HERSHEY_SIMPLEX
            #cv2.putText(img,'cs',(10,img.shape[0] -40), font, 0.8,(255,255,255),2,cv2.LINE_AA)

            im_resized, (ratio_h, ratio_w) = resize_image(
                img, max_size=1848 * 1024,
                scale_up=True)  #1348*1024 #1848*1024
            #im_resized = im_resized[:, :, ::-1]
            images = np.asarray([im_resized], dtype=np.float)
            images /= 128
            images -= 1
            im_data = net_utils.np_to_variable(images,
                                               is_cuda=args.cuda).permute(
                                                   0, 3, 1, 2)

            [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data)
            iou = iou_pred.data.cpu()[0].numpy()
            iou = iou.squeeze(0)

            iou_pred1 = iou_pred1.data.cpu()[0].numpy()
            iou_pred1 = iou_pred1.squeeze(0)

            #ioud = segm_predd.data.cpu()[0].numpy()
            #ioud = ioud.squeeze(0)

            rbox = rboxs[0].data.cpu()[0].numpy()
            rbox = rbox.swapaxes(0, 1)
            rbox = rbox.swapaxes(1, 2)

コード例 #8

ファイルを表示

ファイル: ocr_tools.py プロジェクト: irynakostyshyn/License-Plate-Recognition

def main(opts):
    # pairs = c1, c2, label

    model_name = 'ICCV_OCR'
    net = OCRModel()

    if opts.cuda:
        net.cuda()

    optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)
    step_start = 0
    if os.path.exists(opts.model):
        print('loading model from %s' % args.model)
        step_start, learning_rate = net_utils.load_net(args.model, net)
    else:
        learning_rate = base_lr
    print('train')
    net.train()

    # test(net)

    ctc_loss = CTCLoss(blank=0).cuda()

    data_generator = ocr_gen.get_batch(num_workers=opts.num_readers,
                                       batch_size=opts.batch_size,
                                       train_list=opts.train_list,
                                       in_train=True)

    train_loss = 0
    cnt = 0
    tq = tqdm(range(step_start, 10000000))
    for step in tq:

        # batch
        images, labels, label_length = next(data_generator)
        im_data = net_utils.np_to_variable(images,
                                           is_cuda=opts.cuda,
                                           volatile=False).permute(0, 3, 1, 2)
        labels_pred = net(im_data)

        # backward
        '''
    acts: Tensor of (seqLength x batch x outputDim) containing output from network
        labels: 1 dimensional Tensor containing all the targets of the batch in one sequence
        act_lens: Tensor of size (batch) containing size of each output sequence from the network
        act_lens: Tensor of (batch) containing label length of each example
    '''
        torch.backends.cudnn.deterministic = True
        probs_sizes = Variable(
            torch.IntTensor([(labels_pred.permute(2, 0, 1).size()[0])] *
                            (labels_pred.permute(2, 0, 1).size()[1]))).long()
        label_sizes = Variable(
            torch.IntTensor(torch.from_numpy(
                np.array(label_length)).int())).long()
        labels = Variable(
            torch.IntTensor(torch.from_numpy(np.array(labels)).int())).long()
        optimizer.zero_grad()
        #probs = nn.functional.log_softmax(labels_pred, dim=94)

        labels_pred = labels_pred.permute(2, 0, 1)

        loss = ctc_loss(labels_pred, labels, probs_sizes,
                        label_sizes) / opts.batch_size  # change 1.9.
        if loss.item() == np.inf:
            continue
        #
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        cnt += 1
        # if step % disp_interval == 0:
        #     train_loss /= cnt
        #     print('epoch %d[%d], loss: %.3f, lr: %.5f ' % (
        #         step / batch_per_epoch, step, train_loss, learning_rate))
        #
        #     train_loss = 0
        #     cnt = 0
        tq.set_description(
            'epoch %d[%d], loss: %.3f, lr: %.5f ' %
            (step / batch_per_epoch, step, train_loss / cnt, learning_rate))
        #
        if step > step_start and (step % batch_per_epoch == 0):
            save_name = os.path.join(opts.save_path,
                                     '{}_{}.h5'.format(model_name, step))
            state = {
                'step': step,
                'learning_rate': learning_rate,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, save_name)
            print('save model: {}'.format(save_name))

            test(net)

コード例 #9

ファイルを表示

ファイル: ocr_tools.py プロジェクト: irynakostyshyn/License-Plate-Recognition

def test(net, list_file='/home/liepieshov/dataset/en_words/test.csv'):
    net = net.eval()
    fout = open('./valid.txt', 'w')

    dir_name = os.path.dirname(list_file)
    images, bucket, label = ocr_gen.get_info_csv(list_file)

    it = 0
    correct = 0
    ted = 0
    gt_all = 0
    while True:

        imageNo = it

        if imageNo >= len(images):
            break

        image_name = images[imageNo]
        gt_txt = label[imageNo]

        img = cv2.imread(image_name, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(image_name)
            continue
        if img.shape[0] > img.shape[1] * 2 and len(gt_txt) > 3:
            img = np.transpose(img)
            img = cv2.flip(img, flipCode=1)

        scaled = np.expand_dims(img, axis=2)
        scaled = np.expand_dims(scaled, axis=0)

        scaled = np.asarray(scaled, dtype=np.float)
        scaled /= 128
        scaled -= 1

        scaled_var = net_utils.np_to_variable(scaled,
                                              is_cuda=args.cuda,
                                              volatile=False).permute(
                                                  0, 3, 1, 2)
        ctc_f = net(scaled_var)
        ctc_f = ctc_f.data.cpu().numpy()
        ctc_f = ctc_f.swapaxes(1, 2)

        labels = ctc_f.argmax(2)
        det_text, conf, dec_s = print_seq_ext(labels[0, :])

        it += 1

        edit_dist = editdistance.eval(
            str(det_text).lower(),
            str(gt_txt).lower())
        ted += edit_dist
        gt_all += len(str(gt_txt))

        if str(det_text).lower() == str(gt_txt).lower():
            correct += 1
        else:
            print('{0} - {1} / {2:.2f} - {3:.2f}'.format(
                det_text, gt_txt, correct / float(it), ted / 3.0))

        fout.write('{0}|{1}|{2}|{3}\n'.format(os.path.basename(image_name),
                                              gt_txt, det_text, edit_dist))

    print('Test accuracy: {0:.3f}, {1:.2f}, {2:.3f}'.format(
        correct / float(it), ted / 3.0, ted / float(gt_all)))

    fout.close()
    net.train()

コード例 #10

ファイルを表示

def main(opts):

  nclass = len(alphabet) + 1
  model_name = 'E2E-MLT'
  net = OwnModel(attention=True, nclass=nclass)
  print("Using {0}".format(model_name))
  if opts.cuda:
    net.cuda()
  learning_rate = opts.base_lr
  optimizer = torch.optim.Adam(net.parameters(), lr=opts.base_lr, weight_decay=weight_decay)

  ### 第一种：只修改conv11的维度 
  # model_dict = net.state_dict()
  # if os.path.exists(opts.model):
  #     # 载入预训练模型
  #     print('loading pretrained model from %s' % opts.model)
  #     # pretrained_model = OwnModel(attention=True, nclass=7325)
  #     pretrained_model = ModelResNetSep2(attention=True, nclass=7500)
  #     pretrained_model.load_state_dict(torch.load(opts.model)['state_dict'])
  #     pretrained_dict = pretrained_model.state_dict()
  #
  #     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict and 'conv11' not in k and 'rnn' not in k}
  #     # 2. overwrite entries in the existing state dict
  #     model_dict.update(pretrained_dict)
  #     # 3. load the new state dict
  #     net.load_state_dict(model_dict)

  ### 第二种：直接接着前面训练
  if os.path.exists(opts.model):
    print('loading model from %s' % args.model)
    step_start, learning_rate = net_utils.load_net(args.model, net, optimizer)
  ### 
  
  step_start = 0
  net.train()

  converter = strLabelConverter(alphabet)
  ctc_loss = CTCLoss()

  e2edata = E2Edataset(train_list=opts.train_list)
  e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=4, shuffle=True, collate_fn=E2Ecollate)
  
  train_loss = 0
  bbox_loss, seg_loss, angle_loss = 0., 0., 0.
  cnt = 0
  ctc_loss_val = 0
  ctc_loss_val2 = 0
  box_loss_val = 0
  gt_g_target = 0
  gt_g_proc = 0
  
  
  for step in range(step_start, opts.max_iters):

    loss = 0

    # batch
    images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next(data_generator)
    im_data = net_utils.np_to_variable(images.transpose(0, 3, 1, 2), is_cuda=opts.cuda)
    # im_data = torch.from_numpy(images).type(torch.FloatTensor).permute(0, 3, 1, 2).cuda()       # permute(0,3,1,2)和cuda的先后顺序有影响
    start = timeit.timeit()
    try:
      seg_pred, roi_pred, angle_pred, features = net(im_data)
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      continue
    end = timeit.timeit()
    
    # for EAST loss
    smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda)
    training_mask_var = net_utils.np_to_variable(training_masks, is_cuda=opts.cuda)
    angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4], is_cuda=opts.cuda)
    geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]], is_cuda=opts.cuda)
    
    try:
      loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred, angle_gt, roi_pred, geo_gt)
    except:
      import sys, traceback
      traceback.print_exc(file=sys.stdout)
      continue
      
    bbox_loss += net.box_loss_value.data.cpu().numpy() 
    seg_loss += net.segm_loss_value.data.cpu().numpy()
    angle_loss += net.angle_loss_value.data.cpu().numpy()  
    train_loss += loss.data.cpu().numpy()
    
       
    try:
      # 10000步之前都是用文字的标注区域训练的
      if step > 10000 or True: #this is just extra augumentation step ... in early stage just slows down training
    # ctcl, gt_target , gt_proc = process_boxes(images, im_data, seg_pred[0], roi_pred[0], angle_pred[0], score_maps, gt_idxs, gtso, lbso, features, net, ctc_loss, opts, converter, debug=opts.debug)
    ctcl= process_crnn(im_data, gtso, lbso, net, ctc_loss, converter, training=True)
    gt_target = 1
    gt_proc = 1

    ctc_loss_val += ctcl.data.cpu().numpy()[0]
    loss = ctcl
    gt_g_target = gt_target
    gt_g_proc = gt_proc
    train_loss += ctcl.item()
      
      # -训练ocr识别部分的时候，采用一个data_generater生成
      # imageso, labels, label_length = next(dg_ocr)          # 其中应该有对倾斜文本的矫正
      # im_data_ocr = net_utils.np_to_variable(imageso, is_cuda=opts.cuda).permute(0, 3, 1, 2)
      # features = net.forward_features(im_data_ocr)
      # labels_pred = net.forward_ocr(features)
      # probs_sizes =  torch.IntTensor( [(labels_pred.permute(2,0,1).size()[0])] * (labels_pred.permute(2,0,1).size()[1]) )
      # label_sizes = torch.IntTensor( torch.from_numpy(np.array(label_length)).int() )
      # labels = torch.IntTensor( torch.from_numpy(np.array(labels)).int() )
      # loss_ocr = ctc_loss(labels_pred.permute(2,0,1), labels, probs_sizes, label_sizes) / im_data_ocr.size(0) * 0.5
      # loss_ocr.backward()
      # ctc_loss_val2 += loss_ocr.item()

      net.zero_grad()
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    except:

コード例 #11

ファイルを表示

ファイル: train_ocr.py プロジェクト: josedossantos10/OctShuffle-MLT

def main(opts):

    model_name = 'OctGatedMLT'
    net = OctMLT(attention=True)
    acc = []

    if opts.cuda:
        net.cuda()

    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=base_lr,
                                 weight_decay=weight_decay)
    step_start = 0
    if os.path.exists(opts.model):
        print('loading model from %s' % args.model)
        step_start, learning_rate = net_utils.load_net(
            args.model,
            net,
            optimizer,
            load_ocr=opts.load_ocr,
            load_detection=opts.load_detection,
            load_shared=opts.load_shared,
            load_optimizer=opts.load_optimizer,
            reset_step=opts.load_reset_step)
    else:
        learning_rate = base_lr

    step_start = 0

    net.train()

    if opts.freeze_shared:
        net_utils.freeze_shared(net)

    if opts.freeze_ocr:
        net_utils.freeze_ocr(net)

    if opts.freeze_detection:
        net_utils.freeze_detection(net)

    #acc_test = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height)
    #acc.append([0, acc_test])
    ctc_loss = CTCLoss()

    data_generator = ocr_gen.get_batch(num_workers=opts.num_readers,
                                       batch_size=opts.batch_size,
                                       train_list=opts.train_list,
                                       in_train=True,
                                       norm_height=opts.norm_height,
                                       rgb=True)

    train_loss = 0
    cnt = 0

    for step in range(step_start, 300000):
        # batch
        images, labels, label_length = next(data_generator)
        im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(
            0, 3, 1, 2)
        features = net.forward_features(im_data)
        labels_pred = net.forward_ocr(features)

        # backward
        '''
    acts: Tensor of (seqLength x batch x outputDim) containing output from network
        labels: 1 dimensional Tensor containing all the targets of the batch in one sequence
        act_lens: Tensor of size (batch) containing size of each output sequence from the network
        act_lens: Tensor of (batch) containing label length of each example
    '''

        probs_sizes = torch.IntTensor(
            [(labels_pred.permute(2, 0, 1).size()[0])] *
            (labels_pred.permute(2, 0, 1).size()[1]))
        label_sizes = torch.IntTensor(
            torch.from_numpy(np.array(label_length)).int())
        labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int())
        loss = ctc_loss(labels_pred.permute(2, 0, 1), labels, probs_sizes,
                        label_sizes) / im_data.size(0)  # change 1.9.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if not np.isinf(loss.data.cpu().numpy()):
            train_loss += loss.data.cpu().numpy()[0] if isinstance(
                loss.data.cpu().numpy(), list) else loss.data.cpu().numpy(
                )  #net.bbox_loss.data.cpu().numpy()[0]
            cnt += 1

        if opts.debug:
            dbg = labels_pred.data.cpu().numpy()
            ctc_f = dbg.swapaxes(1, 2)
            labels = ctc_f.argmax(2)
            det_text, conf, dec_s = print_seq_ext(labels[0, :], codec)

            print('{0} \t'.format(det_text))

        if step % disp_interval == 0:

            train_loss /= cnt
            print('epoch %d[%d], loss: %.3f, lr: %.5f ' %
                  (step / batch_per_epoch, step, train_loss, learning_rate))

            train_loss = 0
            cnt = 0

        if step > step_start and (step % batch_per_epoch == 0):
            save_name = os.path.join(opts.save_path,
                                     '{}_{}.h5'.format(model_name, step))
            state = {
                'step': step,
                'learning_rate': learning_rate,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, save_name)
            print('save model: {}'.format(save_name))

            #acc_test, ted = test(net, codec, opts,  list_file=opts.valid_list, norm_height=opts.norm_height)
            #acc.append([0, acc_test, ted])
            np.savez('train_acc_{0}'.format(model_name), acc=acc)

コード例 #12

ファイルを表示

def main(opts):
    model_name = 'E2E-MLT'
    # net = ModelResNetSep2(attention=True)
    net = ModelResNetSep_crnn(
        attention=True,
        multi_scale=True,
        num_classes=400,
        fixed_height=norm_height,
        net='densenet',
    )
    # net = ModelResNetSep_final(attention=True)
    print("Using {0}".format(model_name))
    ctc_loss = nn.CTCLoss()
    if opts.cuda:
        net.to(device)
        ctc_loss.to(device)
    learning_rate = opts.base_lr
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=opts.base_lr,
                                 weight_decay=weight_decay)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode='max', factor=0.5, patience=4, verbose=True)
    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer,
                                                  base_lr=0.0006,
                                                  max_lr=0.001,
                                                  step_size_up=3000,
                                                  cycle_momentum=False)
    step_start = 0
    if os.path.exists(opts.model):
        print('loading model from %s' % args.model)
        # net_dict = net.state_dict()
        step_start, learning_rate = net_utils.load_net(args.model, net,
                                                       optimizer)
    #     step_start, learning_rate = net_utils.load_net(args.model, net, None)
    #
    #   step_start = 0
    net_utils.adjust_learning_rate(optimizer, learning_rate)

    net.train()

    data_generator = data_gen.get_batch(num_workers=opts.num_readers,
                                        input_size=opts.input_size,
                                        batch_size=opts.batch_size,
                                        train_list=opts.train_path,
                                        geo_type=opts.geo_type,
                                        normalize=opts.normalize)

    dg_ocr = ocr_gen.get_batch(num_workers=2,
                               batch_size=opts.ocr_batch_size,
                               train_list=opts.ocr_feed_list,
                               in_train=True,
                               norm_height=norm_height,
                               rgb=True,
                               normalize=opts.normalize)

    # e2edata = E2Edataset(train_list=opts.eval_path, normalize= opts.normalize)
    # e2edataloader = torch.utils.data.DataLoader(e2edata, batch_size=opts.batch_size, shuffle=True, collate_fn=E2Ecollate
    #                                           )

    train_loss = 0
    train_loss_temp = 0
    bbox_loss, seg_loss, angle_loss = 0., 0., 0.
    cnt = 1

    # ctc_loss = CTCLoss()

    ctc_loss_val = 0
    ctc_loss_val2 = 0
    ctcl = torch.tensor([0])
    box_loss_val = 0
    good_all = 0
    gt_all = 0
    train_loss_lr = 0
    cntt = 0
    time_total = 0
    now = time.time()

    for step in range(step_start, opts.max_iters):
        # scheduler.batch_step()

        # batch
        images, image_fns, score_maps, geo_maps, training_masks, gtso, lbso, gt_idxs = next(
            data_generator)
        im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(
            0, 3, 1, 2)
        start = timeit.timeit()
        # cv2.imshow('img', images)
        try:
            seg_pred, roi_pred, angle_pred, features = net(im_data)
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            continue
        end = timeit.timeit()

        # backward

        smaps_var = net_utils.np_to_variable(score_maps, is_cuda=opts.cuda)
        training_mask_var = net_utils.np_to_variable(training_masks,
                                                     is_cuda=opts.cuda)
        angle_gt = net_utils.np_to_variable(geo_maps[:, :, :, 4],
                                            is_cuda=opts.cuda)
        geo_gt = net_utils.np_to_variable(geo_maps[:, :, :, [0, 1, 2, 3]],
                                          is_cuda=opts.cuda)

        try:
            # ? loss
            loss = net.loss(seg_pred, smaps_var, training_mask_var, angle_pred,
                            angle_gt, roi_pred, geo_gt)
        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            continue

            # @ loss_val
        if not (torch.isnan(loss) or torch.isinf(loss)):
            train_loss_temp += loss.data.cpu().numpy()

        optimizer.zero_grad()

        try:

            if step > 1000 or True:  # this is just extra augumentation step ... in early stage just slows down training
                ctcl, gt_b_good, gt_b_all = process_boxes(images,
                                                          im_data,
                                                          seg_pred[0],
                                                          roi_pred[0],
                                                          angle_pred[0],
                                                          score_maps,
                                                          gt_idxs,
                                                          gtso,
                                                          lbso,
                                                          features,
                                                          net,
                                                          ctc_loss,
                                                          opts,
                                                          debug=opts.debug)

                # ? loss
                loss = loss + ctcl
                gt_all += gt_b_all
                good_all += gt_b_good

            imageso, labels, label_length = next(dg_ocr)
            im_data_ocr = net_utils.np_to_variable(imageso,
                                                   is_cuda=opts.cuda).permute(
                                                       0, 3, 1, 2)
            # features = net.forward_features(im_data_ocr)
            labels_pred = net.forward_ocr(im_data_ocr)

            probs_sizes = torch.IntTensor([(labels_pred.size()[0])] *
                                          (labels_pred.size()[1])).long()
            label_sizes = torch.IntTensor(
                torch.from_numpy(np.array(label_length)).int()).long()
            labels = torch.IntTensor(torch.from_numpy(
                np.array(labels)).int()).long()
            loss_ocr = ctc_loss(labels_pred, labels, probs_sizes,
                                label_sizes) / im_data_ocr.size(0) * 0.5

            loss_ocr.backward()
            # @ loss_val
            # ctc_loss_val2 += loss_ocr.item()

            loss.backward()

            clipping_value = 0.5
            torch.nn.utils.clip_grad_norm_(net.parameters(), clipping_value)
            if opts.d1:
                print('loss_nan', torch.isnan(loss))
                print('loss_inf', torch.isinf(loss))
                print('lossocr_nan', torch.isnan(loss_ocr))
                print('lossocr_inf', torch.isinf(loss_ocr))

            if not (torch.isnan(loss) or torch.isinf(loss)
                    or torch.isnan(loss_ocr) or torch.isinf(loss_ocr)):
                bbox_loss += net.box_loss_value.data.cpu().numpy()
                seg_loss += net.segm_loss_value.data.cpu().numpy()
                angle_loss += net.angle_loss_value.data.cpu().numpy()
                train_loss += train_loss_temp
                ctc_loss_val2 += loss_ocr.item()
                ctc_loss_val += ctcl.data.cpu().numpy()[0]
                # train_loss += loss.data.cpu().numpy()[0] #net.bbox_loss.data.cpu().numpy()[0]
                optimizer.step()
                scheduler.step()
                train_loss_temp = 0
                cnt += 1

        except:
            import sys, traceback
            traceback.print_exc(file=sys.stdout)
            pass

        if step % disp_interval == 0:

            if opts.debug:

                segm = seg_pred[0].data.cpu()[0].numpy()
                segm = segm.squeeze(0)
                cv2.imshow('segm_map', segm)

                segm_res = cv2.resize(score_maps[0],
                                      (images.shape[2], images.shape[1]))
                mask = np.argwhere(segm_res > 0)

                x_data = im_data.data.cpu().numpy()[0]
                x_data = x_data.swapaxes(0, 2)
                x_data = x_data.swapaxes(0, 1)

                if opts.normalize:
                    x_data += 1
                    x_data *= 128
                x_data = np.asarray(x_data, dtype=np.uint8)
                x_data = x_data[:, :, ::-1]

                im_show = x_data
                try:
                    im_show[mask[:, 0], mask[:, 1], 1] = 255
                    im_show[mask[:, 0], mask[:, 1], 0] = 0
                    im_show[mask[:, 0], mask[:, 1], 2] = 0
                except:
                    pass

                cv2.imshow('img0', im_show)
                cv2.imshow('score_maps', score_maps[0] * 255)
                cv2.imshow('train_mask', training_masks[0] * 255)
                cv2.waitKey(10)

            train_loss /= cnt
            bbox_loss /= cnt
            seg_loss /= cnt
            angle_loss /= cnt
            ctc_loss_val /= cnt
            ctc_loss_val2 /= cnt
            box_loss_val /= cnt
            train_loss_lr += (train_loss)

            cntt += 1
            time_now = time.time() - now
            time_total += time_now
            now = time.time()
            for param_group in optimizer.param_groups:
                learning_rate = param_group['lr']
            save_log = os.path.join(opts.save_path, 'loss.txt')

            f = open(save_log, 'a')
            f.write(
                'epoch %d[%d], lr: %f, loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f, lv2: %.3f, time: %.2f s, cnt: %d\n'
                % (step / batch_per_epoch, step, learning_rate, train_loss,
                   bbox_loss, seg_loss, angle_loss, ctc_loss_val,
                   good_all / max(1, gt_all), ctc_loss_val2, time_now, cnt))
            f.close()
            try:

                print(
                    'epoch %d[%d], lr: %f, loss: %.3f, bbox_loss: %.3f, seg_loss: %.3f, ang_loss: %.3f, ctc_loss: %.3f, rec: %.5f, lv2: %.3f, time: %.2f s, cnt: %d\n'
                    %
                    (step / batch_per_epoch, step, learning_rate, train_loss,
                     bbox_loss, seg_loss, angle_loss, ctc_loss_val,
                     good_all / max(1, gt_all), ctc_loss_val2, time_now, cnt))
            except:
                import sys, traceback
                traceback.print_exc(file=sys.stdout)
                pass

            train_loss = 0
            bbox_loss, seg_loss, angle_loss = 0., 0., 0.
            cnt = 0
            ctc_loss_val = 0
            ctc_loss_val2 = 0
            good_all = 0
            gt_all = 0
            box_loss_val = 0

        # if step % valid_interval == 0:
        #  validate(opts.valid_list, net)
        if step > step_start and (step % batch_per_epoch == 0):
            for param_group in optimizer.param_groups:
                learning_rate = param_group['lr']
                print('learning_rate', learning_rate)
            save_name = os.path.join(opts.save_path,
                                     '{}_{}.h5'.format(model_name, step))
            state = {
                'step': step,
                'learning_rate': learning_rate,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, save_name)
            #evaluate
            re_tpe2e, re_tp, re_e1, precision = evaluate_e2e_crnn(
                root=args.eval_path,
                net=net,
                norm_height=norm_height,
                name_model=save_name,
                normalize=args.normalize,
                save_dir=args.save_path)
            # CER,WER = evaluate_crnn(e2edataloader,net)

            # scheduler.step(re_tpe2e)
            f = open(save_log, 'a')
            f.write(
                'time epoch [%d]: %.2f s, loss_total: %.3f, lr:%f, re_tpe2e = %f, re_tp = %f, re_e1 = %f, precision = %f\n'
                % (step / batch_per_epoch, time_total, train_loss_lr / cntt,
                   learning_rate, re_tpe2e, re_tp, re_e1, precision))
            f.close()
            print(
                'time epoch [%d]: %.2f s, loss_total: %.3f, re_tpe2e = %f, re_tp = %f, re_e1 = %f, precision = %f'
                % (step / batch_per_epoch, time_total, train_loss_lr / cntt,
                   re_tpe2e, re_tp, re_e1, precision))
            #print('time epoch [%d]: %.2f s, loss_total: %.3f' % (step / batch_per_epoch, time_total,train_loss_lr/cntt))
            print('save model: {}'.format(save_name))
            time_total = 0
            cntt = 0
            train_loss_lr = 0
            net.train()

コード例 #13

ファイルを表示

ファイル: demo.py プロジェクト: dipikakhullar/ocr

def run_model_input_image(im, show_boxes=False):
  predictions = {}
  parser = argparse.ArgumentParser()
  parser.add_argument('-cuda', type=int, default=1)
  parser.add_argument('-model', default='e2e-mlt-rctw.h5')
  parser.add_argument('-segm_thresh', default=0.5)

  font2 = ImageFont.truetype("Arial-Unicode-Regular.ttf", 18)

  args = parser.parse_args()

  net = ModelResNetSep2(attention=True)
  net_utils.load_net(args.model, net)
  net = net.eval()

  if args.cuda:
    print('Using cuda ...')
    net = net.cuda()

  with torch.no_grad():
    # im = Image.open(im)
    # im = im.convert('RGB')
    im = np.asarray(im)
    im = im[...,:3]
    im_resized, (ratio_h, ratio_w) = resize_image(im, scale_up=False)
    images = np.asarray([im_resized], dtype=np.float)
    images /= 128
    images -= 1
    im_data = net_utils.np_to_variable(images, is_cuda=args.cuda).permute(0, 3, 1, 2)
    seg_pred, rboxs, angle_pred, features = net(im_data)

    rbox = rboxs[0].data.cpu()[0].numpy()
    rbox = rbox.swapaxes(0, 1)
    rbox = rbox.swapaxes(1, 2)

    angle_pred = angle_pred[0].data.cpu()[0].numpy()


    segm = seg_pred[0].data.cpu()[0].numpy()
    segm = segm.squeeze(0)

    draw2 = np.copy(im_resized)
    boxes =  get_boxes(segm, rbox, angle_pred, args.segm_thresh)

    img = Image.fromarray(draw2)
    draw = ImageDraw.Draw(img)

    #if len(boxes) > 10:
    #  boxes = boxes[0:10]

    out_boxes = []
    prediction_i = []
    for box in boxes:

        pts  = box[0:8]
        pts = pts.reshape(4, -1)

        det_text, conf, dec_s = ocr_image(net, codec, im_data, box)
        if len(det_text) == 0:
            continue

        width, height = draw.textsize(det_text, font=font2)
        center =  [box[0], box[1]]
        draw.text((center[0], center[1]), det_text, fill = (0,255,0),font=font2)
        out_boxes.append(box)

        # det_text is one prediction
        prediction_i.append(det_text.lower())

    predictions["frame"] = prediction_i

    # show each image boxes and output in pop up window.
    show_image_with_boxes(img, out_boxes, show=show_boxes)

  print(predictions)
  return predictions

コード例 #14

ファイルを表示

def evaluate_e2e_crnn(root,
                      net,
                      norm_height=48,
                      name_model='E2E',
                      normalize=False,
                      save=False,
                      cuda=True,
                      save_dir='eval'):
    #Decription : evaluate model E2E
    net = net.eval()
    # if cuda:
    #   print('Using cuda ...')
    #   net = net.to(device)

    images = glob.glob(os.path.join(root, '*.jpg'))
    png = glob.glob(os.path.join(root, '*.png'))
    images.extend(png)
    png = glob.glob(os.path.join(root, '*.JPG'))
    images.extend(png)

    imagess = np.asarray(images)

    tp_all = 0
    gt_all = 0
    tp_e2e_all = 0
    gt_e2e_all = 0
    tp_e2e_ed1_all = 0
    detecitons_all = 0
    eval_text_length = 2
    segm_thresh = 0.5
    min_height = 8
    idx = 0

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    note_path = os.path.join(save_dir, 'note_eval.txt')
    note_file = open(note_path, 'a')

    with torch.no_grad():

        index = np.arange(0, imagess.shape[0])
        # np.random.shuffle(index)
        for i in index:
            img_name = imagess[i]
            base_nam = os.path.basename(img_name)
            #
            # if args.evaluate == 1:
            res_gt = base_nam.replace(".jpg", '.txt').replace(".png", '.txt')
            res_gt = '{0}/gt_{1}'.format(root, res_gt)
            if not os.path.exists(res_gt):
                res_gt = base_nam.replace(".jpg", '.txt').replace("_", "")
                res_gt = '{0}/gt_{1}'.format(root, res_gt)
                if not os.path.exists(res_gt):
                    print('missing! {0}'.format(res_gt))
                    gt_rect, gt_txts = [], []
            # continue
            gt_rect, gt_txts = load_gt(res_gt)

            # print(img_name)
            img = cv2.imread(img_name)

            im_resized, _ = resize_image(
                img, max_size=1848 * 1024,
                scale_up=False)  # 1348*1024 #1848*1024
            images = np.asarray([im_resized], dtype=np.float)

            if normalize:
                images /= 128
                images -= 1
            im_data = net_utils.np_to_variable(images, is_cuda=cuda).permute(
                0, 3, 1, 2)

            [iou_pred, iou_pred1], rboxs, angle_pred, features = net(im_data)
            iou = iou_pred.data.cpu()[0].numpy()
            iou = iou.squeeze(0)

            rbox = rboxs[0].data.cpu()[0].numpy()
            rbox = rbox.swapaxes(0, 1)
            rbox = rbox.swapaxes(1, 2)

            detections = get_boxes(iou, rbox,
                                   angle_pred[0].data.cpu()[0].numpy(),
                                   segm_thresh)

            im_scalex = im_resized.shape[1] / img.shape[1]
            im_scaley = im_resized.shape[0] / img.shape[0]

            detetcions_out = []
            detectionso = np.copy(detections)
            if len(detections) > 0:
                detections[:, 0] /= im_scalex
                detections[:, 2] /= im_scalex
                detections[:, 4] /= im_scalex
                detections[:, 6] /= im_scalex

                detections[:, 1] /= im_scaley
                detections[:, 3] /= im_scaley
                detections[:, 5] /= im_scaley
                detections[:, 7] /= im_scaley

            for bid, box in enumerate(detections):

                boxo = detectionso[bid]
                # score = boxo[8]
                boxr = boxo[0:8].reshape(-1, 2)
                # box_area = area(boxr.reshape(8))

                # conf_factor = score / box_area

                center = (boxr[0, :] + boxr[1, :] + boxr[2, :] +
                          boxr[3, :]) / 4

                dw = boxr[2, :] - boxr[1, :]
                dw2 = boxr[0, :] - boxr[3, :]
                dh = boxr[1, :] - boxr[0, :]
                dh2 = boxr[3, :] - boxr[2, :]

                h = math.sqrt(dh[0] * dh[0] + dh[1] * dh[1]) + 1
                h2 = math.sqrt(dh2[0] * dh2[0] + dh2[1] * dh2[1]) + 1
                h = (h + h2) / 2
                w = math.sqrt(dw[0] * dw[0] + dw[1] * dw[1])
                w2 = math.sqrt(dw2[0] * dw2[0] + dw2[1] * dw2[1])
                w = (w + w2) / 2

                if ((h - 1) / im_scaley) < min_height:
                    continue

                input_W = im_data.size(3)
                input_H = im_data.size(2)
                target_h = norm_height

                scale = target_h / h
                target_gw = int(w * scale + target_h / 4)
                target_gw = max(8, int(round(target_gw / 8)) * 8)
                xc = center[0]
                yc = center[1]
                w2 = w
                h2 = h

                angle = math.atan2((boxr[2][1] - boxr[1][1]),
                                   boxr[2][0] - boxr[1][0])
                angle2 = math.atan2((boxr[3][1] - boxr[0][1]),
                                    boxr[3][0] - boxr[0][0])
                angle = (angle + angle2) / 2

                # show pooled image in image layer
                scalex = (w2 + h2 / 4) / input_W
                scaley = h2 / input_H

                th11 = scalex * math.cos(angle)
                th12 = -math.sin(angle) * scaley * input_H / input_W
                th13 = (2 * xc - input_W - 1) / (input_W - 1)

                th21 = math.sin(angle) * scalex * input_W / input_H
                th22 = scaley * math.cos(angle)
                th23 = (2 * yc - input_H - 1) / (input_H - 1)

                t = np.asarray([th11, th12, th13, th21, th22, th23],
                               dtype=np.float)
                t = torch.from_numpy(t).type(torch.FloatTensor)
                t = t.to(device)
                theta = t.view(-1, 2, 3)

                grid = F.affine_grid(
                    theta, torch.Size((1, 3, int(target_h), int(target_gw))))
                x = F.grid_sample(im_data, grid)

                # features = net.forward_features(x)
                # labels_pred = net.forward_ocr(features)
                labels_pred = net.forward_ocr(x)
                labels_pred = labels_pred.permute(1, 2, 0)

                ctc_f = labels_pred.data.cpu().numpy()
                ctc_f = ctc_f.swapaxes(1, 2)

                labels = ctc_f.argmax(2)

                conf = np.mean(np.exp(ctc_f.max(2)[labels > 3]))
                if conf < 0.02:
                    continue

                det_text, conf2, dec_s, word_splits = print_seq_ext(
                    labels[0, :], codec)
                det_text = det_text.strip()

                if conf < 0.01 and len(det_text) == 3:
                    continue

                if len(det_text) > 0:
                    dtxt = det_text.strip()
                    if len(dtxt) >= eval_text_length:
                        # print('{0} - {1}'.format(dtxt, conf_factor))
                        boxw = np.copy(boxr)
                        boxw[:, 1] /= im_scaley
                        boxw[:, 0] /= im_scalex
                        boxw = boxw.reshape(8)

                        detetcions_out.append([boxw, dtxt])

            pix = img

            # if args.evaluate == 1:
            tp, tp_e2e, gt_e2e, tp_e2e_ed1, detection_to_gt, pixx = evaluate_image(
                pix,
                detetcions_out,
                gt_rect,
                gt_txts,
                eval_text_length=eval_text_length)
            tp_all += tp
            gt_all += len(gt_txts)
            tp_e2e_all += tp_e2e
            gt_e2e_all += gt_e2e
            tp_e2e_ed1_all += tp_e2e_ed1
            detecitons_all += len(detetcions_out)
            # print(gt_all)
            if save:
                cv2.imwrite('{0}/{1}'.format(save_dir, base_nam), pixx)

            # print("	E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f}".format(
            #   tp_e2e_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, gt_e2e_all)),
            #   tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            #   tp_all / float(max(1, detecitons_all))))

        note_file.write(
            'Model{4}---E2E recall tp_e2e:{0:.3f} / tp:{1:.3f} / e1:{2:.3f}, precision: {3:.3f} \n'
            .format(tp_e2e_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, gt_e2e_all)),
                    tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
                    tp_all / float(max(1, detecitons_all)), name_model))

        note_file.close()
    return (tp_e2e_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, gt_e2e_all)),
            tp_e2e_ed1_all / float(max(1, gt_e2e_all)),
            tp_all / float(max(1, detecitons_all)))

コード例 #15

ファイルを表示

def main(opts):
  
  model_name = 'E2E-MLT'
  net = ModelResNetSep_final(attention=True)
  acc = []
  ctc_loss = nn.CTCLoss()
  if opts.cuda:
    net.cuda()
    ctc_loss.cuda()
  optimizer = torch.optim.Adam(net.parameters(), lr=base_lr, weight_decay=weight_decay)
  scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0005, max_lr=0.001, step_size_up=3000,
                                                cycle_momentum=False)
  step_start = 0  
  if os.path.exists(opts.model):
    print('loading model from %s' % args.model)
    step_start, learning_rate = net_utils.load_net(args.model, net, optimizer)
  else:
    learning_rate = base_lr

  for param_group in optimizer.param_groups:
    param_group['lr'] = base_lr
    learning_rate = param_group['lr']
    print(param_group['lr'])
  
  step_start = 0  

  net.train()
  
  #acc_test = test(net, codec, opts, list_file=opts.valid_list, norm_height=opts.norm_height)
  #acc.append([0, acc_test])
    
  # ctc_loss = CTCLoss()
  ctc_loss = nn.CTCLoss()

  data_generator = ocr_gen.get_batch(num_workers=opts.num_readers,
          batch_size=opts.batch_size, 
          train_list=opts.train_list, in_train=True, norm_height=opts.norm_height, rgb = True, normalize= True)
  
  val_dataset = ocrDataset(root=opts.valid_list, norm_height=opts.norm_height , in_train=False,is_crnn=False)
  val_generator = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False,
                                                collate_fn=alignCollate())


  # val_generator1 = torch.utils.data.DataLoader(val_dataset, batch_size=2, shuffle=False,
  #                                              collate_fn=alignCollate())

  cnt = 1
  cntt = 0
  train_loss_lr = 0
  time_total = 0
  train_loss = 0
  now = time.time()

  for step in range(step_start, 300000):
    # batch
    images, labels, label_length = next(data_generator)
    im_data = net_utils.np_to_variable(images, is_cuda=opts.cuda).permute(0, 3, 1, 2)
    features = net.forward_features(im_data)
    labels_pred = net.forward_ocr(features)
    
    # backward
    '''
    acts: Tensor of (seqLength x batch x outputDim) containing output from network
        labels: 1 dimensional Tensor containing all the targets of the batch in one sequence
        act_lens: Tensor of size (batch) containing size of each output sequence from the network
        act_lens: Tensor of (batch) containing label length of each example
    '''
    
    probs_sizes =  torch.IntTensor([(labels_pred.permute(2, 0, 1).size()[0])] * (labels_pred.permute(2, 0, 1).size()[1])).long()
    label_sizes = torch.IntTensor(torch.from_numpy(np.array(label_length)).int()).long()
    labels = torch.IntTensor(torch.from_numpy(np.array(labels)).int()).long()
    loss = ctc_loss(labels_pred.permute(2,0,1), labels, probs_sizes, label_sizes) / im_data.size(0) # change 1.9.
    optimizer.zero_grad()
    loss.backward()

    clipping_value = 0.5
    torch.nn.utils.clip_grad_norm_(net.parameters(),clipping_value)
    if not (torch.isnan(loss) or torch.isinf(loss)):
      optimizer.step()
      scheduler.step()
    # if not np.isinf(loss.data.cpu().numpy()):
      train_loss += loss.data.cpu().numpy() #net.bbox_loss.data.cpu().numpy()[0]
      # train_loss += loss.data.cpu().numpy()[0] #net.bbox_loss.data.cpu().numpy()[0]
      cnt += 1
    
    if opts.debug:
      dbg = labels_pred.data.cpu().numpy()
      ctc_f = dbg.swapaxes(1, 2)
      labels = ctc_f.argmax(2)
      det_text, conf, dec_s,_ = print_seq_ext(labels[0, :], codec)
      
      print('{0} \t'.format(det_text))
    
    
    
    if step % disp_interval == 0:
      for param_group in optimizer.param_groups:
        learning_rate = param_group['lr']
        
      train_loss /= cnt
      train_loss_lr += train_loss
      cntt += 1
      time_now = time.time() - now
      time_total += time_now
      now = time.time()
      save_log = os.path.join(opts.save_path, 'loss_ocr.txt')
      f = open(save_log, 'a')
      f.write(
        'epoch %d[%d], loss_ctc: %.3f,time: %.2f s, lr: %.5f, cnt: %d\n' % (
          step / batch_per_epoch, step, train_loss, time_now,learning_rate, cnt))
      f.close()

      print('epoch %d[%d], loss_ctc: %.3f,time: %.2f s, lr: %.5f, cnt: %d\n' % (
          step / batch_per_epoch, step, train_loss, time_now,learning_rate, cnt))

      train_loss = 0
      cnt = 1

    if step > step_start and (step % batch_per_epoch == 0):
      CER, WER = eval_ocr(val_generator, net)
      net.train()
      for param_group in optimizer.param_groups:
        learning_rate = param_group['lr']
        # print(learning_rate)

      save_name = os.path.join(opts.save_path, '{}_{}.h5'.format(model_name, step))
      state = {'step': step,
               'learning_rate': learning_rate,
              'state_dict': net.state_dict(),
              'optimizer': optimizer.state_dict()}
      torch.save(state, save_name)
      print('save model: {}'.format(save_name))
      save_logg = os.path.join(opts.save_path, 'note_eval.txt')
      fe = open(save_logg, 'a')
      fe.write('time epoch [%d]: %.2f s, loss_total: %.3f, CER = %f, WER = %f\n' % (
      step / batch_per_epoch, time_total, train_loss_lr / cntt, CER, WER))
      fe.close()
      print('time epoch [%d]: %.2f s, loss_total: %.3f, CER = %f, WER = %f' % (
      step / batch_per_epoch, time_total, train_loss_lr / cntt, CER, WER))
      time_total = 0
      cntt = 0
      train_loss_lr = 0