def test_get_token_bounding_box(): s = symbol(tokens=[Token("x", "atom", 0, 1)]) token_locations = { token_id(0, 1): [BoundingBox(0.01, 0.01, 0.01, 0.01, 0)] } box = get_symbol_bounding_box(s, symbol_id(), token_locations) assert box == BoundingBox(0.01, 0.01, 0.01, 0.01, 0)
def test_get_character_bounding_box(): s = symbol(characters=[0]) character_locations = { character_id(0): [BoundingBox(0.01, 0.01, 0.01, 0.01, 0)] } box = get_symbol_bounding_box(s, symbol_id(), character_locations) assert box == BoundingBox(0.01, 0.01, 0.01, 0.01, 0)
def load_equation_token_locations( arxiv_id: ArxivId, ) -> Optional[Dict[TokenId, List[BoundingBox]]]: token_locations: Dict[TokenId, List[BoundingBox]] = {} token_locations_path = os.path.join( directories.arxiv_subdir("equation-tokens-locations", arxiv_id), "entity_locations.csv", ) if not os.path.exists(token_locations_path): logging.warning( "Could not find bounding boxes information for %s. Skipping", arxiv_id, ) return None for record in load_from_csv(token_locations_path, HueLocationInfo): equation_index, token_index = [int(t) for t in record.entity_id.split("-")] token_id = TokenId(record.tex_path, equation_index, token_index) box = BoundingBox( page=int(record.page), left=record.left, top=record.top, width=record.width, height=record.height, ) if token_id not in token_locations: token_locations[token_id] = [] token_locations[token_id].append(box) return token_locations
def _to_pdf_coordinates( bounding_box: Rectangle, image_width: int, image_height: int, pdf_page_width: float, pdf_page_height: float, page: int, ) -> BoundingBox: """ Convert a "bounding_box" in pixel coordinates in a raster image to PDF coordinates. """ left = bounding_box.left top = bounding_box.top right = bounding_box.left + bounding_box.width bottom = bounding_box.top + bounding_box.height pdf_left = left * (pdf_page_width / float(image_width)) pdf_right = right * (pdf_page_width / float(image_width)) # Set PDF coordinates relative to the document bottom. Because image coordinates are relative # to the image's top, flip the y-coordinates. pdf_top = pdf_page_height - (top * (pdf_page_height / float(image_height))) pdf_bottom = pdf_page_height - (bottom * (pdf_page_height / float(image_height))) return BoundingBox( left=pdf_left, top=pdf_top, width=pdf_right - pdf_left, height=pdf_top - pdf_bottom, page=page, )
def get_symbol_bounding_box( symbol: Symbol, symbol_id: SymbolId, character_boxes: CharacterLocations) -> Optional[BoundingBox]: boxes = [] for character_index in symbol.characters: character_id = CharacterId(symbol_id.tex_path, symbol_id.equation_index, character_index) boxes.extend(character_boxes.get(character_id, [])) if len(boxes) == 0: return None # Boxes for a symbol should be on only one page. if len({box.page for box in boxes}) > 1: logging.warning( # pylint: disable=logging-not-lazy ("Boxes found on more than one page for symbol %s. " + "Only the boxes for one page will be considered."), symbol, ) page = boxes[0].page boxes_on_page = list(filter(lambda b: b.page == page, boxes)) left = min([box.left for box in boxes_on_page]) right = max([box.left + box.width for box in boxes_on_page]) top = min([box.top for box in boxes_on_page]) bottom = max([box.top + box.height for box in boxes_on_page]) return BoundingBox(left, top, right - left, bottom - top, page)
def load_locations( arxiv_id: ArxivId, entity_name: str) -> Optional[Dict[EntityId, List[BoundingBox]]]: """ Load bounding boxes for each entity. Entities can have multiple bounding boxes (as will be the case if they are split over multiple lines). """ boxes_by_entity_id: Dict[EntityId, List[BoundingBox]] = defaultdict(list) bounding_boxes_path = os.path.join( directories.arxiv_subdir(f"{entity_name}-locations", arxiv_id), "entity_locations.csv", ) if not os.path.exists(bounding_boxes_path): logging.warning( "Could not find bounding boxes information for entity of type %s for paper %s. Skipping.", entity_name, arxiv_id, ) return None for hue_info in load_from_csv(bounding_boxes_path, EntityLocationInfo): box = BoundingBox( page=hue_info.page, left=hue_info.left, top=hue_info.top, width=hue_info.width, height=hue_info.height, ) boxes_by_entity_id[hue_info.entity_id].append(box) return boxes_by_entity_id
def test_merge_bounding_boxes(): s = symbol(tokens=[0, 1]) token_locations = { token_id(0): [ BoundingBox(0.01, 0.01, 0.01, 0.01, 0), # Expand the bounding box downward .01 of the page BoundingBox(0.01, 0.02, 0.01, 0.01, 0), ], # Expand the bounding box rightward 10 pixels token_id(1): [BoundingBox(0.02, 0.01, 0.01, 0.01, 0)], # Ignore this bounding box for an irrelevant token token_id(2): [BoundingBox(0.03, 0.01, 0.01, 0.01, 0)], } box = get_symbol_bounding_box(s, symbol_id(), token_locations) assert box.left == 0.01 assert box.top == 0.01 assert abs(box.width - 0.02) < 0.0001 assert abs(box.height - 0.02) < 0.0001
def fetch_boxes(arxiv_id: ArxivId, schema: str, version: Optional[int], types: List[str]) -> Optional[RegionsByPageAndType]: # Discover the most recent version of data in the database for the paper. setup_database_connections(schema) if version is None: version_number = (Version.select(fn.Max( Version.index)).join(Paper).where( Paper.arxiv_id == arxiv_id).scalar()) if version_number is None: logging.warning( # pylint: disable=logging-not-lazy "There are no entities for paper %s in database schema %s", arxiv_id, schema, ) return None version = int(version_number) # Load bounding boxes from rows in the tables. rows = (EntityModel.select( EntityModel.id, EntityModel.type, BoundingBoxModel.left, BoundingBoxModel.top, BoundingBoxModel.width, BoundingBoxModel.height, BoundingBoxModel.page, ).join(Paper).switch(EntityModel).join(BoundingBoxModel).where( EntityModel.version == version, Paper.arxiv_id == arxiv_id, EntityModel.type << types, ).dicts()) boxes_by_entity_db_id: Dict[str, List[BoundingBox]] = defaultdict(list) types_by_entity_db_id: Dict[str, str] = {} for row in rows: boxes_by_entity_db_id[row["id"]].append( BoundingBox( row["left"], row["top"], row["width"], row["height"], row["page"], )) types_by_entity_db_id[row["id"]] = row["type"] regions: RegionsByPageAndType = defaultdict(list) for db_id, bounding_boxes in boxes_by_entity_db_id.items(): by_page = group_by_page(bounding_boxes) for page, page_boxes in by_page.items(): key = (page, types_by_entity_db_id[db_id]) rectangles = frozenset([ FloatRectangle(b.left, b.top, b.width, b.height) for b in page_boxes ]) regions[key].append(rectangles) return regions
def on_next(self, payload): df = payload.dfs.get('detections', pd.DataFrame()).drop_duplicates() plt.title(f'{len(df)} Detections') h, w = payload.frame.shape[0], payload.frame.shape[1] plt.ylim(h, 0) plt.xlim(0, w) # if len(df) > 0: # df['frame'] = df.frame.astype('category') # sns.scatterplot(x='x', y='y', hue='frame', data=df) # sns.scatterplot(x='x', y='y', data=df) ## adjust limits # max_y = max(df.y) # max_x = max(df.x) vehicle_detections = list(payload.vehicle_detections) from common.types import BoundingBox boxes = (d.bounding_box for d in vehicle_detections) # boxes = (b.get_scaled(0.5) for b in boxes) boxes = (BoundingBox(b.x, b.y + b.h, b.w, int(round(b.h * 0.2))) for b in boxes) for box in boxes: # Create a Rectangle patch rect = patches.Rectangle((box.x, box.y), box.w, box.h, linewidth=1, edgecolor='r', facecolor='black') # Add the patch to the Axes self.ax.add_patch(rect) pause_time = 0.001 plt.pause(pause_time)
def extract_bounding_boxes( diff_image: np.ndarray, page_number: int, hue: float, masks: Optional[Iterable[FloatRectangle]] = None, ) -> List[BoundingBox]: """ See 'PixelMerger' for description of how bounding boxes are extracted. Masks are assumed to be non-intersecting. Masks should be expressed as ratios relative to the page's width and height instead of pixel values---left, top, width, and height all have values in the range 0..1). """ image_height, image_width, _ = diff_image.shape pixel_masks = None if masks is not None: pixel_masks = [ Rectangle( left=round(m.left * image_width), top=round(m.top * image_height), width=round(m.width * image_width), height=round(m.height * image_height), ) for m in masks ] pixel_boxes = list( find_boxes_with_color(diff_image, hue, masks=pixel_masks)) boxes = [] for box in pixel_boxes: left_ratio = float(box.left) / image_width top_ratio = float(box.top) / image_height width_ratio = float(box.width) / image_width height_ratio = float(box.height) / image_height boxes.append( BoundingBox(left_ratio, top_ratio, width_ratio, height_ratio, page_number)) return boxes
def load_hue_locations( arxiv_id: ArxivId, entity_name: str ) -> Optional[Dict[HueIteration, List[BoundingBox]]]: """ Load bounding boxes for each entity. Entities are indexes by the hue they were colored and the iteraction of coloring in which they were assigned that hue. Entities can have multiple bounding boxes (e.g., if they are split over multiple lines). """ boxes_by_hue_iteration: Dict[HueIteration, List[BoundingBox]] = {} bounding_boxes_path = os.path.join( directories.arxiv_subdir(f"{entity_name}-locations", arxiv_id), "entity_locations.csv", ) if not os.path.exists(bounding_boxes_path): logging.warning( "Could not find bounding boxes information entity of type %s for paper %s. Skipping.", entity_name, arxiv_id, ) return None for hue_info in load_from_csv(bounding_boxes_path, HueLocationInfo): box = BoundingBox( page=hue_info.page, left=hue_info.left, top=hue_info.top, width=hue_info.width, height=hue_info.height, ) hue_iteration = HueIteration(hue_info.hue, hue_info.iteration) if hue_iteration not in boxes_by_hue_iteration: boxes_by_hue_iteration[hue_iteration] = [] boxes_by_hue_iteration[hue_iteration].append(box) return boxes_by_hue_iteration
def upload_symbols( processing_summary: PaperProcessingResult, data_version: Optional[int] ) -> None: arxiv_id = processing_summary.arxiv_id entities = [es.entity for es in processing_summary.entities] symbols = cast(List[SerializableSymbol], entities) symbols_by_id = {sid(s): s for s in symbols} entity_infos: List[EntityUploadInfo] = [] # Load MathML matches for partially matching of symbols. matches: Matches = {} matches_path = os.path.join( directories.arxiv_subdir("symbol-matches", processing_summary.arxiv_id), "matches.csv", ) if os.path.exists(matches_path): for match in file_utils.load_from_csv(matches_path, Match): if match.queried_mathml not in matches: matches[match.queried_mathml] = [] matches[match.queried_mathml].append(match) else: logging.warning( "Could not find symbol matches information for paper %s.", arxiv_id, ) # Load parent-child relationships for symbols. children: Dict[SymbolId, List[SymbolId]] = defaultdict(list) parents: Dict[SymbolId, SymbolId] = {} children_path = os.path.join( directories.arxiv_subdir("detected-symbols", arxiv_id), "symbol_children.csv" ) if os.path.exists(children_path): for parent in file_utils.load_from_csv(children_path, SerializableChild): pid = f"{parent.tex_path}-{parent.equation_index}-{parent.symbol_index}" cid = f"{parent.tex_path}-{parent.equation_index}-{parent.child_index}" parents[cid] = pid children[pid].append(cid) else: logging.warning( "Could not find file mapping from symbol to their children for paper %s.", arxiv_id, ) # Load contexts that the symbols appear in. Sort them by the symbol MathML. context_data_missing = False contexts_path = os.path.join( directories.arxiv_subdir("contexts-for-symbols", arxiv_id), "contexts.csv", ) if not os.path.exists(contexts_path): logging.warning( # pylint: disable=logging-not-lazy "Contexts have not been found for symbols for arXiv paper %s. " + "Symbol data will be uploaded without contexts.", arxiv_id, ) context_data_missing = True symbol_contexts = {} mathml_contexts = defaultdict(list) if not context_data_missing: for context in file_utils.load_from_csv(contexts_path, Context): tex_path = context.tex_path symbol_id = f"{tex_path}-{context.entity_id}" symbol_contexts[symbol_id] = context symbol = symbols_by_id[symbol_id] mathml_contexts[symbol.mathml].append(context) # Prepare collections of formulae that each symbol was found in. symbol_formulas = {} mathml_formulas: Dict[str, Set[DefiningFormula]] = defaultdict(set) for symbol in symbols: if ( symbol.is_definition and symbol.equation is not None and symbol.relative_start is not None and symbol.relative_end is not None ): highlighted = wrap_span( symbol.equation, symbol.relative_start, symbol.relative_end, before=r"\htmlClass{match-highlight}{", after="}", braces=True, ) formula = DefiningFormula( tex=highlighted, tex_path=symbol.tex_path, equation_id=str(symbol.equation_index), ) symbol_formulas[sid(symbol)] = formula mathml_formulas[symbol.mathml].add(formula) entity_infos = [] for localized_entity in processing_summary.entities: symbol = cast(SerializableSymbol, localized_entity.entity) boxes = [ BoundingBox(l.left, l.top, l.width, l.height, l.page) for l in localized_entity.locations ] # Get context and formula of the symbol, and other matching ones. symbol_context = symbol_contexts.get(sid(symbol)) matching_contexts = mathml_contexts.get(symbol.mathml, []) other_context_texs = [] other_context_sentence_ids = [] for c in matching_contexts: matching_sentence_id = f"{c.tex_path}-{c.sentence_id}" if matching_sentence_id not in other_context_sentence_ids: other_context_texs.append(c.snippet) other_context_sentence_ids.append(matching_sentence_id) matching_formulas = mathml_formulas.get(symbol.mathml, set()) other_formula_texs = [] other_formula_ids = [] for f in matching_formulas: equation_id = f"{f.tex_path}-{f.equation_id}" if equation_id not in other_formula_ids: other_formula_texs.append(f.tex) other_formula_ids.append(equation_id) # Package up data for the symbol. tags: List[str] = [] MAX_BOX_HEIGHT = 0.1 for b in boxes: if b.height > MAX_BOX_HEIGHT: logging.debug( # pylint: disable=logging-not-lazy "Detected large bounding box for symbol with height %f for entity %s of paper " + "%s. Entity will be given a tag indicating it is unexpectedly large.", b.height, f"{localized_entity.entity.tex_path}-{localized_entity.entity.id_}", arxiv_id, ) tags.append("large") break data: EntityData = { "tex": f"${symbol.tex}$", "tex_start": symbol.start, "tex_end": symbol.end, "type": symbol.type_, "mathml": symbol.mathml, "mathml_near_matches": [m.matching_mathml for m in matches[symbol.mathml]], "snippets": other_context_texs, "defining_formulas": other_formula_texs, "is_definition": symbol.is_definition or False, "tags": tags, } # Create links between this symbol, its sentence, and related symbols. sentence_id = ( f"{symbol_context.tex_path}-{symbol_context.sentence_id}" if symbol_context is not None else None ) parent_id = parents.get(sid(symbol)) child_ids = children.get(sid(symbol), []) relationships: EntityRelationships = { "equation": EntityReference( type_="equation", id_=f"{symbol.tex_path}-{symbol.equation_index}", ), "parent": EntityReference(type_="symbol", id_=parent_id), "children": [EntityReference(type_="symbol", id_=id_) for id_ in child_ids], "sentence": EntityReference(type_="sentence", id_=sentence_id) if sentence_id is not None else EntityReference(type_="sentence", id_=None), "defining_formula_equations": [ EntityReference(type_="equation", id_=id_) for id_ in other_formula_ids ], "snippet_sentences": [ EntityReference(type_="sentence", id_=id_) for id_ in other_context_sentence_ids ], } # Save all data for this symbol entity_information = EntityUploadInfo( id_=sid(symbol), type_="symbol", bounding_boxes=boxes, data=data, relationships=relationships, ) entity_infos.append(entity_information) upload_entities( processing_summary.s2_id, arxiv_id, entity_infos, data_version, )
def _execute(self, payload): frame = payload.original_frame raw_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frames = self.frames median = np.median( frames, axis=0).astype(float) if len(frames) else np.ndarray( (0, )) # .reshape(100, 100) if len(self.detection_mask) == 0: self.detection_mask = np.zeros(raw_gray.shape, dtype='uint8') # df = payload.dfs.get('detections', pd.DataFrame()).drop_duplicates() # if len(df) > 0: vehicle_detections = list(payload.vehicle_detections) boxes = (d.bounding_box for d in vehicle_detections) # boxes = (b.get_scaled(0.5) for b in boxes) bb_h_percentage = 0.2 boxes = [ BoundingBox(b.x, int(round(b.y + b.h * (1 - bb_h_percentage))), b.w, int(round(b.h * bb_h_percentage))) for b in boxes ] for box in boxes: # y_start = max(0, box.y) # y_end = max(box.y + box.h, 0) # x_start = max(0, box.x) # x_end = max(0, box.x + box.w) # self.detection_mask[y_start: y_end, x_start:x_end] = 1 self.detection_mask[box.y:box.y + box.h, box.x:box.x + box.w] = 1 # cv2.imshow('detection_mask',self.detection_mask*255) # cv2.waitKey(0) gray = raw_gray.copy() # * (1 - self.detection_mask) if len(median): idxs = np.where(self.detection_mask == 1) detections_median = np.median(median[idxs]) gray[idxs] = detections_median self.frames.append(gray) title = 'Median' cv2.namedWindow(title, cv2.WINDOW_NORMAL) if len(median): cv2.imshow(title, median / 255) # cv2.imshow(title, frame) # cv2.imshow(title, gray / 255) # cv2.imwrite(r'median.jpg', median, ) return payload src = median / 255 from experimental import demo_erosion_dilatation src = (1 - src) * 255 demo_erosion_dilatation(src, iterations=2) erosion_size = 5 erosion_type = cv2.MORPH_ELLIPSE element = cv2.getStructuringElement( erosion_type, (2 * erosion_size + 1, 2 * erosion_size + 1), (erosion_size, erosion_size)) erosion_dst = cv2.erode(src, element, iterations=1) cv2.imshow('erosion', erosion_dst) erosion_dst = cv2.dilate(src, element, iterations=2) cv2.imshow('dialation', erosion_dst) cv2.waitKey(0) element = cv2.getStructuringElement( erosion_type, (2 * erosion_size + 1, 2 * erosion_size + 1), (erosion_size, erosion_size)) cv2.waitKey(1) # ============================================ return payload
def load_hues(self, arxiv_id: ArxivId, iteration: str) -> List[HueSearchRegion]: equation_boxes_path = os.path.join( directories.arxiv_subdir("hue-locations-for-equations", arxiv_id), "hue_locations.csv", ) bounding_boxes: Dict[EquationId, BoundingBoxesByFile] = {} for location_info in file_utils.load_from_csv(equation_boxes_path, HueLocationInfo): equation_id = EquationId( tex_path=location_info.tex_path, equation_index=int(location_info.entity_id), ) if equation_id not in bounding_boxes: bounding_boxes[equation_id] = {} file_path = location_info.relative_file_path if file_path not in bounding_boxes[equation_id]: bounding_boxes[equation_id][file_path] = [] box = BoundingBox( page=location_info.page, left=location_info.left, top=location_info.top, width=location_info.width, height=location_info.height, ) bounding_boxes[equation_id][file_path].append(box) token_records_by_equation: Dict[EquationId, Dict[ int, EquationTokenColorizationRecord]] = {} token_hues_path = os.path.join( directories.iteration( "sources-with-colorized-equation-tokens", arxiv_id, iteration, ), "entity_hues.csv", ) for record in file_utils.load_from_csv( token_hues_path, EquationTokenColorizationRecord): equation_id = EquationId(tex_path=record.tex_path, equation_index=record.equation_index) token_index = int(record.token_index) if equation_id not in token_records_by_equation: token_records_by_equation[equation_id] = {} token_records_by_equation[equation_id][token_index] = record hue_searches = [] for equation_id, boxes_by_file in bounding_boxes.items(): for file_path, boxes in boxes_by_file.items(): masks_by_page: MasksForPages = {} for box in boxes: if box.page not in masks_by_page: masks_by_page[box.page] = [] masks_by_page[box.page].append( Rectangle(box.left, box.top, box.width, box.height)) if equation_id in token_records_by_equation: for token_index, record in token_records_by_equation[ equation_id].items(): hue_searches.append( HueSearchRegion( hue=record.hue, record=record, relative_file_path=file_path, masks=masks_by_page, )) return hue_searches
def box(left: float, top: float, width: float, height: float, page: int): return BoundingBox(left, top, width, height, page)
def test_get_token_bounding_box(): s = symbol(tokens=[0]) token_locations = {token_id(0): [BoundingBox(0.01, 0.01, 0.01, 0.01, 0)]} box = get_symbol_bounding_box(s, symbol_id(), token_locations) assert box == BoundingBox(0.01, 0.01, 0.01, 0.01, 0)
def fetch_boxes(arxiv_id: ArxivId, schema: str, version: Optional[int], types: List[str]) -> Optional[RegionsByPageAndType]: # Discover the most recent version of data in the database for the paper. setup_database_connections(schema) if version is None: version_number = (Version.select(fn.Max( Version.index)).join(Paper).where( Paper.arxiv_id == arxiv_id).scalar()) if version_number is None: logging.warning( # pylint: disable=logging-not-lazy "There are no entities for paper %s in database schema %s", arxiv_id, schema, ) return None version = int(version_number) # Load bounding boxes from rows in the tables. # Note that filtering of entities occurs in two stages: # 1. In the 'where' function call in the query below. # 2. When iterating over the returned 'rows' object. # In general, it is encouraged to write filtering conditions in the where clause to # consolidate conditions and as it could be faster. That said, if it is particularly tricky # to filter (e.g., involving many interrelated conditions), filters can be written as rules # in the loop over the rows. rows = ( EntityModel.select( EntityModel.id, EntityModel.type, BoundingBoxModel.left, BoundingBoxModel.top, BoundingBoxModel.width, BoundingBoxModel.height, BoundingBoxModel.page, # Aggregate data for an entity into an array, where each field # is a dictionary: {"key", "...", "value", "..."}. All values will # be of type string. fn.json_agg( fn.json_build_object("key", EntityDataModel.key, "value", EntityDataModel.value)).alias("data"), ).join(Paper).switch(EntityModel).join(BoundingBoxModel).switch( EntityModel).join(EntityDataModel).where( EntityModel.version == version, Paper.arxiv_id == arxiv_id, EntityModel.type << types, ).group_by( EntityModel.id, EntityModel.type, BoundingBoxModel.left, BoundingBoxModel.top, BoundingBoxModel.width, BoundingBoxModel.height, BoundingBoxModel.page, ).dicts()) boxes_by_entity_db_id: Dict[str, List[BoundingBox]] = defaultdict(list) types_by_entity_db_id: Dict[str, str] = {} for row in rows: if row["type"] == "symbol": if any([ d["key"] == "type" and d["value"] not in GOLD_SYMBOL_TYPES for d in row["data"] ]): continue boxes_by_entity_db_id[row["id"]].append( BoundingBox( row["left"], row["top"], row["width"], row["height"], row["page"], )) types_by_entity_db_id[row["id"]] = row["type"] regions: RegionsByPageAndType = defaultdict(list) for db_id, bounding_boxes in boxes_by_entity_db_id.items(): by_page = group_by_page(bounding_boxes) for page, page_boxes in by_page.items(): key = (page, types_by_entity_db_id[db_id]) rectangles = frozenset([ FloatRectangle(b.left, b.top, b.width, b.height) for b in page_boxes ]) regions[key].append(rectangles) return regions
def detect(self, image): # load our input image and grab its spatial dimensions min_confidence = self.min_confidence threshold = self.threshold net = self.net labels = self.labels (H, W) = image.shape[:2] # determine only the *output* layer names that we need from YOLO ln = net.getLayerNames() ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] # construct a blob from the input image and then perform a forward # pass of the YOLO object detector, giving us our bounding boxes and # associated probabilities blob = cv2.dnn.blobFromImage(image, scalefactor=self.scale_factor, size=self.blob_size, swapRB=True, crop=False) net.setInput(blob) layer_outputs = net.forward(ln) # initialize our lists of detected bounding boxes, confidences, and # class IDs, respectively boxes = [] confidences = [] class_ids = [] # loop over each of the layer outputs for output in layer_outputs: # loop over each of the detections for detection in output: # extract the class ID and confidence (i.e., probability) of # the current object detection scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] # filter out weak predictions by ensuring the detected # probability is greater than the minimum probability if confidence > min_confidence: # scale the bounding box coordinates back relative to the # size of the image, keeping in mind that YOLO actually # returns the center (x, y)-coordinates of the bounding # box followed by the boxes' width and height box = detection[0:4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") # use the center (x, y)-coordinates to derive the top and # and left corner of the bounding box x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) # update our list of bounding box coordinates, confidences, # and class IDs boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) class_ids.append(class_id) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes idxs = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, threshold) detections = [] # ensure at least one detection exists if len(idxs) > 0: # loop over the indexes we are keeping for i in idxs.flatten(): # extract the bounding box coordinates (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) class_id = class_ids[i] label = labels[class_id] confidence = confidences[i] bounding_box = BoundingBox(x, y, w, h) detection = Detection(label, confidence, bounding_box) detections.append(detection) return detections
def load(self) -> Iterator[SymbolData]: for arxiv_id in self.arxiv_ids: s2_id = get_s2_id(arxiv_id) if s2_id is None: continue symbols_with_ids = file_utils.load_symbols(arxiv_id) if symbols_with_ids is None: continue boxes: Dict[SymbolId, BoundingBox] = {} boxes_path = os.path.join( directories.arxiv_subdir("symbol-locations", arxiv_id), "symbol_locations.csv", ) if not os.path.exists(boxes_path): logging.warning( "Could not find bounding boxes information for %s. Skipping", arxiv_id, ) continue for location in file_utils.load_from_csv(boxes_path, SymbolLocation): symbol_id = SymbolId( tex_path=location.tex_path, equation_index=location.equation_index, symbol_index=location.symbol_index, ) box = BoundingBox( page=int(location.page), left=location.left, top=location.top, width=location.width, height=location.height, ) boxes[symbol_id] = box matches: Matches = {} matches_path = os.path.join( directories.arxiv_subdir("symbol-matches", arxiv_id), "matches.csv") if not os.path.exists(matches_path): logging.warning( "Could not find symbol matches information for %s. Skipping", arxiv_id, ) continue for match in file_utils.load_from_csv(matches_path, Match): if match.queried_mathml not in matches: matches[match.queried_mathml] = [] matches[match.queried_mathml].append(match) sentence_data_missing = False sentences_path = os.path.join( directories.arxiv_subdir("sentences-for-symbols", arxiv_id), "entity_sentences.csv", ) if not os.path.exists(sentences_path): logging.warning( # pylint: disable=logging-not-lazy "Symbols for arXiv paper %s have not been aligned to sentences. " + "Symbol data will be uploaded without links to sentences", arxiv_id, ) sentence_data_missing = True if not sentence_data_missing: symbol_sentences = {} for pair in file_utils.load_from_csv(sentences_path, EntitySentencePairIds): tex_path = pair.tex_path equation_index, symbol_index = [ int(t) for t in pair.entity_id.split("-") ] sentence_key = SentenceKey(pair.tex_path, pair.sentence_id) symbol_id = SymbolId(tex_path, equation_index, symbol_index) symbol_sentences[symbol_id] = sentence_key yield SymbolData( arxiv_id, s2_id, symbols_with_ids, boxes, symbol_sentences, matches, )
def load(self) -> Iterator[SymbolData]: for arxiv_id in self.arxiv_ids: s2_id = get_s2_id(arxiv_id) if s2_id is None: continue symbols_with_ids = file_utils.load_symbols(arxiv_id) if symbols_with_ids is None: continue symbols_by_id = {s.symbol_id: s.symbol for s in symbols_with_ids} boxes: Dict[SymbolId, BoundingBox] = {} boxes_path = os.path.join( directories.arxiv_subdir("symbol-locations", arxiv_id), "symbol_locations.csv", ) if not os.path.exists(boxes_path): logging.warning( "Could not find bounding boxes information for %s. Skipping", arxiv_id, ) continue for location in file_utils.load_from_csv(boxes_path, SymbolLocation): symbol_id = SymbolId( tex_path=location.tex_path, equation_index=location.equation_index, symbol_index=location.symbol_index, ) box = BoundingBox( page=int(location.page), left=location.left, top=location.top, width=location.width, height=location.height, ) boxes[symbol_id] = box matches: Matches = {} matches_path = os.path.join( directories.arxiv_subdir("symbol-matches", arxiv_id), "matches.csv") if not os.path.exists(matches_path): logging.warning( "Could not find symbol matches information for %s. Skipping", arxiv_id, ) continue for match in file_utils.load_from_csv(matches_path, Match): if match.queried_mathml not in matches: matches[match.queried_mathml] = [] matches[match.queried_mathml].append(match) context_data_missing = False contexts_path = os.path.join( directories.arxiv_subdir("contexts-for-symbols", arxiv_id), "contexts.csv", ) if not os.path.exists(contexts_path): logging.warning( # pylint: disable=logging-not-lazy "Contexts have not been found for symbols for arXiv paper %s. " + "Symbol data will be uploaded without contexts.", arxiv_id, ) context_data_missing = True symbol_contexts = {} mathml_contexts = defaultdict(list) if not context_data_missing: for context in file_utils.load_from_csv( contexts_path, Context): tex_path = context.tex_path equation_index, symbol_index = [ int(t) for t in context.entity_id.split("-") ] symbol_id = SymbolId(tex_path, equation_index, symbol_index) symbol_contexts[symbol_id] = context symbol = symbols_by_id[symbol_id] mathml_contexts[symbol.mathml].append(context) symbol_formulas = {} mathml_formulas = defaultdict(set) for id_, symbol in symbols_by_id.items(): if (symbol.is_definition and symbol.equation is not None and symbol.relative_start is not None and symbol.relative_end is not None): highlighted = wrap_span( symbol.equation, symbol.relative_start, symbol.relative_end, before=r"\htmlClass{match-highlight}{", after="}", braces=True, ) formula = DefiningFormula( tex=highlighted, tex_path=id_.tex_path, equation_id=id_.equation_index, ) symbol_formulas[id_] = formula mathml_formulas[symbol.mathml].add(formula) yield SymbolData( arxiv_id, s2_id, symbols_with_ids, boxes, symbol_contexts, symbol_formulas, mathml_contexts, mathml_formulas, matches, )