def to_string(self, res_crnn): strings = [] for i in range(len(res_crnn)): chars = [alphabet[c] for c in np.argmax(res_crnn[i], axis=1)] res_str = decode(chars) strings.append(res_str) return strings
if len(words) > 0: res_crnn = rec_model.predict(words) xy = rboxes[:,:2] xy[:,0] = xy[:,0] - rboxes[:,2] / 2 xy = xy / input_size * [vid_w, vid_h] for i in range(len(words)): idxs = np.argmax(res_crnn[i], axis=1) confs = res_crnn[i][range(len(idxs)),idxs] non_blank_mask = idxs != len(alphabet)-1 if np.any(non_blank_mask): mean_conf = np.mean(confs[non_blank_mask]) chars = [alphabet[c] for c in idxs] res_str = decode(chars) # filter based on recognition threshold #if mean_conf > 0.7-0.4*np.exp(-0.1*np.sum(non_blank_mask)): if mean_conf > 0.6: b = boxes[i].reshape((-1,1,2)) * [vid_w, vid_h] b = np.asarray(np.round(b), dtype=np.int32) cv2.polylines(img1, [b], True, (0,0,255)) #cv2.imwrite('croped_word_%03i.png' % (i), words[i]) cv2.putText(img2, res_str, tuple(xy[i].astype(int)), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255,255,255), 1) else: #print('drop %5.3f %s' % (mean_conf, res_str)) pass