def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None: """Overlay the result on a given image Args: page: image encoded as a numpy array in uint8 interactive: whether the display should be interactive preserve_aspect_ratio: pass True if you passed True to the predictor """ visualize_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio) plt.show(**kwargs)
def test_visualize_page(): pages = _mock_pages() image = np.ones((300, 200, 3)) visualization.visualize_page(pages[0].export(), image, words_only=False) visualization.visualize_page(pages[0].export(), image, words_only=True, interactive=False) # geometry checks with pytest.raises(ValueError): visualization.create_obj_patch([1, 2], (100, 100)) with pytest.raises(ValueError): visualization.create_obj_patch((1, 2), (100, 100)) with pytest.raises(ValueError): visualization.create_obj_patch((1, 2, 3, 4, 5), (100, 100))
def main(): # Wide mode st.set_page_config(layout="wide") # Designing the interface st.title("docTR: Document Text Recognition") # For newline st.write('\n') # Instructions st.markdown( "*Hint: click on the top-right corner of an image to enlarge it!*") # Set the columns cols = st.columns((1, 1, 1, 1)) cols[0].subheader("Input page") cols[1].subheader("Segmentation heatmap") cols[2].subheader("OCR output") cols[3].subheader("Page reconstitution") # Sidebar # File selection st.sidebar.title("Document selection") # Disabling warning st.set_option('deprecation.showfileUploaderEncoding', False) # Choose your own image uploaded_file = st.sidebar.file_uploader( "Upload files", type=['pdf', 'png', 'jpeg', 'jpg']) if uploaded_file is not None: if uploaded_file.name.endswith('.pdf'): doc = DocumentFile.from_pdf(uploaded_file.read()).as_images() else: doc = DocumentFile.from_images(uploaded_file.read()) page_idx = st.sidebar.selectbox( "Page selection", [idx + 1 for idx in range(len(doc))]) - 1 cols[0].image(doc[page_idx]) # Model selection st.sidebar.title("Model selection") det_arch = st.sidebar.selectbox("Text detection model", DET_ARCHS) reco_arch = st.sidebar.selectbox("Text recognition model", RECO_ARCHS) # For newline st.sidebar.write('\n') if st.sidebar.button("Analyze page"): if uploaded_file is None: st.sidebar.write("Please upload a document") else: with st.spinner('Loading model...'): predictor = ocr_predictor(det_arch, reco_arch, pretrained=True) with st.spinner('Analyzing...'): # Forward the image to the model processed_batches = predictor.det_predictor.pre_processor( [doc[page_idx]]) out = predictor.det_predictor.model(processed_batches[0], return_model_output=True) seg_map = out["out_map"] seg_map = tf.squeeze(seg_map[0, ...], axis=[2]) seg_map = cv2.resize( seg_map.numpy(), (doc[page_idx].shape[1], doc[page_idx].shape[0]), interpolation=cv2.INTER_LINEAR) # Plot the raw heatmap fig, ax = plt.subplots() ax.imshow(seg_map) ax.axis('off') cols[1].pyplot(fig) # Plot OCR output out = predictor([doc[page_idx]]) fig = visualize_page(out.pages[0].export(), doc[page_idx], interactive=False) cols[2].pyplot(fig) # Page reconsitution under input page page_export = out.pages[0].export() img = out.pages[0].synthesize() cols[3].image(img, clamp=True) # Display JSON st.markdown("\nHere are your analysis results in JSON format:") st.json(page_export)
def main(det_archs, reco_archs): """Build a streamlit layout""" # Wide mode st.set_page_config(layout="wide") # Designing the interface st.title("docTR: Document Text Recognition") # For newline st.write("\n") # Instructions st.markdown( "*Hint: click on the top-right corner of an image to enlarge it!*") # Set the columns cols = st.columns((1, 1, 1, 1)) cols[0].subheader("Input page") cols[1].subheader("Segmentation heatmap") cols[2].subheader("OCR output") cols[3].subheader("Page reconstitution") # Sidebar # File selection st.sidebar.title("Document selection") # Disabling warning st.set_option("deprecation.showfileUploaderEncoding", False) # Choose your own image uploaded_file = st.sidebar.file_uploader( "Upload files", type=["pdf", "png", "jpeg", "jpg"]) if uploaded_file is not None: if uploaded_file.name.endswith(".pdf"): doc = DocumentFile.from_pdf(uploaded_file.read()) else: doc = DocumentFile.from_images(uploaded_file.read()) page_idx = st.sidebar.selectbox( "Page selection", [idx + 1 for idx in range(len(doc))]) - 1 page = doc[page_idx] cols[0].image(page) # Model selection st.sidebar.title("Model selection") st.sidebar.markdown("**Backend**: " + ("TensorFlow" if is_tf_available() else "PyTorch")) det_arch = st.sidebar.selectbox("Text detection model", det_archs) reco_arch = st.sidebar.selectbox("Text recognition model", reco_archs) # For newline st.sidebar.write("\n") if st.sidebar.button("Analyze page"): if uploaded_file is None: st.sidebar.write("Please upload a document") else: with st.spinner("Loading model..."): predictor = load_predictor(det_arch, reco_arch, forward_device) with st.spinner("Analyzing..."): # Forward the image to the model seg_map = forward_image(predictor, page, forward_device) seg_map = np.squeeze(seg_map) seg_map = cv2.resize(seg_map, (page.shape[1], page.shape[0]), interpolation=cv2.INTER_LINEAR) # Plot the raw heatmap fig, ax = plt.subplots() ax.imshow(seg_map) ax.axis("off") cols[1].pyplot(fig) # Plot OCR output out = predictor([page]) fig = visualize_page(out.pages[0].export(), page, interactive=False) cols[2].pyplot(fig) # Page reconsitution under input page page_export = out.pages[0].export() if "rotation" not in det_arch: img = out.pages[0].synthesize() cols[3].image(img, clamp=True) # Display JSON st.markdown("\nHere are your analysis results in JSON format:") st.json(page_export)