def __record_page_table_and_metadata(self, answers): # retrieve page (testar none) book_id = self.get_book_id() page_num = self.task.info["page"] page = data_manager.get_page(book_id, page_num) if (page == None): logger.warn("The page from the book " + book_id + " and page number " + str(page_num) + " doesn't exist.") return for i in range(0, len(answers)): answer = answers[i] page_table_dict = dict(bookid=book_id, pageid=page.id, local_url="/books/" + book_id + "/metadados/tabelasBaixa/image" + str(page_num) + "_" + str(i) + ".png", top_pos=answer['top'], left_pos=answer['left'], right_pos=(answer['left'] + answer['width']), bottom_pos=(answer['top'] + answer['height'])) data_manager.record_page_table(page_table_dict) # retrieve page table page_table = data_manager.get_page_table_by_position(page.id, page_table_dict['top_pos'], page_table_dict['left_pos'], page_table_dict['right_pos'], page_table_dict['bottom_pos']) if (page_table == None): logger.warn("The page table from the book " + book_id + " and page " + str(page.id) + " doesn't exist.") return metadata = answer['text'] subject_name = priority_task_manager.get_subject(metadata['assunto']) data_manager.record_metadata(dict(bookid=book_id, pageid=page.id, pagetableid=page_table.id, source=metadata['fontes'], title=metadata['titulo'], subtitle=metadata['subtitulo'], subject=(metadata['outros'] if subject_name == "Outros" else subject_name), initial_date=metadata['dataInicial'], final_date=metadata['dataFinal']))
def add_next_task(self): try: # Verify the answer of the question to create a new task if (self.task.info.has_key("answer") and self.task.info["answer"] == "Yes"): # record a page on mbdb bookId = self.get_book_id() archurl = self.task.info["url_m"] pg = self.task.info["page"] data_manager.record_page(dict(bookid=bookId, archiveURL=archurl, page_num=pg)) if (self.__checkIfNextTaskWasCreated()): logger.warn(Meb_exception_tt1(3, self.task.id)) raise Meb_exception_tt1(3, self.task.id) info = dict(link=self.task.info["url_m"], page=self.task.info["page"]) tt2_app_short_name = self.app_short_name[:-1] + "2" tt2_app = ttapps.Apptt_meta(short_name=tt2_app_short_name) tt2_task = tt2_app.add_task(info) workflow_transaction_info = dict(task_id_1=self.task.id, task_id_2=tt2_task.id, task_id_3=None, task_id_4=None) data_manager.update_workflow_transaction_t2(workflow_transaction_info) return True else: raise Meb_exception_tt1(1, self.task.id) except Meb_exception_tt1 as e: logger.error(e) return False
def __record_cells(self, answer_json): book_id = self.get_book_id() page_num = self.task.info["page"] page = data_manager.get_page(book_id, page_num) if (page == None): logger.warn("The page from the book " + book_id + " and page number " + str(page_num) + " doesn't exist.") return cells = answer_json['cells'] human_values = answer_json['human_values'] for i in range(0, len(cells)): cell = cells[i] human_value = human_values[i] # retrieve page table page_table = data_manager.get_page_table_by_local_url(page.id, self.task.info["img_url"]) if (page_table == None): logger.warn("The page table from the book " + book_id + " and page " + str(page.id) + " doesn't exist.") return cell_info_dict = dict(bookid=book_id, pageid=page.id, pagetableid=page_table.id, text=human_value, x0=cell[0], y0=cell[1], x1=cell[2], y1=cell[3]) data_manager.record_cell(cell_info_dict)
def record_page_table(page_table_info_dict): pg_table = get_page_table_by_position( page_table_info_dict["pageid"], page_table_info_dict["top_pos"], page_table_info_dict["left_pos"], page_table_info_dict["right_pos"], page_table_info_dict["bottom_pos"], ) if pg_table != None: logger.warn( "The page table " + str(pg_table.id) + " from the book " + pg_table.book_id + " already exists in mbdb." ) return pg_table = page_table( page_table_info_dict["bookid"], page_table_info_dict["pageid"], page_table_info_dict["local_url"], page_table_info_dict["top_pos"], page_table_info_dict["left_pos"], page_table_info_dict["right_pos"], page_table_info_dict["bottom_pos"], ) try: db.session.add(pg_table) db.session.commit() except Exception as e: db.session.rollback() raise e
def record_metadata(metadata_info_dict): metadata_obj = get_metadata(metadata_info_dict["pagetableid"]) if metadata_obj != None: logger.warn( "The metadata " + str(metadata_obj.id) + " from the page table " + str(metadata_obj.page_table_id) + " and the book " + metadata_obj.book_id + " already exists in mbdb." ) return metadata_obj = metadata( metadata_info_dict["bookid"], metadata_info_dict["pageid"], metadata_info_dict["pagetableid"], metadata_info_dict["source"], metadata_info_dict["title"], metadata_info_dict["subtitle"], metadata_info_dict["subject"], metadata_info_dict["initial_date"], metadata_info_dict["final_date"], ) try: db.session.add(metadata_obj) db.session.commit() except Exception as e: db.session.rollback() raise e
def record_workflow_transaction(workflow_transaction_info_dict): workflow_transaction_obj = get_workflow_transaction(workflow_transaction_info_dict) if workflow_transaction_obj != None: logger.warn( "This workflow transaction already exists - Task 1 ID: " + str(workflow_transaction_obj.task_id_1) + ", Task 2 ID: " + str(workflow_transaction_obj.task_id_2) + ", Task 3 ID: " + str(workflow_transaction_obj.task_id_3) + ", Task 4 ID: " + str(workflow_transaction_obj.task_id_4) + "." ) return workflow_transaction_obj = workflow_transaction( workflow_transaction_info_dict["task_id_1"], workflow_transaction_info_dict["task_id_2"], workflow_transaction_info_dict["task_id_3"], workflow_transaction_info_dict["task_id_4"], ) try: db.session.add(workflow_transaction_obj) db.session.commit() except Exception as e: db.session.rollback() raise e
def add_next_task(self): if (self.__checkIfNextTaskWasCreated()): logger.warn(Meb_exception_tt3(3, self.task.id)) raise Meb_exception_tt3(3, self.task.id) try: linesAndColumnsMap = self.__loadAnswers() # didnt found a valid task group if self.task.info['hasZoom'] and linesAndColumnsMap == None: return False cells = cells_util.create_cells(linesAndColumnsMap["linhas"], linesAndColumnsMap["colunas"], linesAndColumnsMap["maxX"], linesAndColumnsMap["maxY"]) linkImg = self.task.info['img_url'] book_id = self.get_book_id() page = self.task.info['page'] table_id = self.task.info['table_id'] maxX = linesAndColumnsMap["maxX"] maxY = linesAndColumnsMap["maxY"] self.__runOCR(cells, book_id, page, table_id, maxX, maxY) values = self.__loadValues(book_id, page, table_id) confidences = self.__loadConfidences(book_id, page, table_id) infoDict = {} infoDict['cells'] = cells infoDict['img_url'] = linkImg infoDict['page'] = page infoDict['table_id'] = table_id infoDict['maxX'] = maxX infoDict['maxY'] = maxY infoDict['values'] = values infoDict['confidences'] = confidences tt4_app_short_name = self.app_short_name[:-1] + "4" tt4_app = ttapps.Apptt_transcribe(short_name=tt4_app_short_name) tt4_task = tt4_app.add_task(infoDict, priority=self.task.priority_0) workflow_transaction_info = dict(task_id_1=None, task_id_2=None, task_id_3=self.task.id, task_id_4=tt4_task.id) data_manager.update_workflow_transaction_t4(workflow_transaction_info) return True except Exception as e: logger.error(e) raise e
def record_page(page_info_dict): pg = get_page(page_info_dict["bookid"], page_info_dict["page_num"]) if pg != None: logger.warn("The page " + pg.page_num + " from the book " + pg.book_id + " already exists in mbdb.") return pg = page(page_info_dict["bookid"], page_info_dict["archiveURL"], page_info_dict["page_num"]) try: db.session.add(pg) db.session.commit() except Exception as e: db.session.rollback() raise e
def __validateTaskGroup(self, similarTasks): """ Verify if all tasks, that are similar to this task, and this task are completed. """ #NUMBER_OF_SIMILAR_TASKS = 3 for t in similarTasks: if(not t.state == "completed"): m = "invalid task (not completed): " + str(t) logger.warn(m) return False #m1 = "similar tasks: " + str(similarTasks) #m2 = "self.task.state: " + str(self.task.state) #logger.warn(m1) #logger.warn(m2) return True #self.task.state == "completed" and len(similarTasks) == NUMBER_OF_SIMILAR_TASKS
def record_book(info_book_dict): bk = get_book(info_book_dict["bookid"]) if bk != None: logger.warn("The book " + bk.title + " already exists in mbdb.") return bk = book( info_book_dict["bookid"], info_book_dict["title"], info_book_dict["publisher"], info_book_dict["contributor"], info_book_dict["volume"], info_book_dict["img"], ) try: db.session.add(bk) db.session.commit() except Exception as e: db.session.rollback() raise e
def record_cell(cell_info_dict): cell_obj = get_cell_by_position( cell_info_dict["pagetableid"], cell_info_dict["x0"], cell_info_dict["y0"], cell_info_dict["x1"], cell_info_dict["y1"], ) if cell_obj != None: logger.warn( "The cell " + str(cell_obj.id) + " from the page table " + str(cell_obj.page_table_id) + " and the book " + cell_obj.book_id + " already exists in mbdb." ) return cell_obj = cell( cell_info_dict["bookid"], cell_info_dict["pageid"], cell_info_dict["pagetableid"], cell_info_dict["text"], cell_info_dict["x0"], cell_info_dict["y0"], cell_info_dict["x1"], cell_info_dict["y1"], ) try: db.session.add(cell_obj) db.session.commit() except Exception as e: db.session.rollback() raise e
def update_workflow_transaction_t4(workflow_transaction_info_dict): workflow_transaction_obj = get_workflow_transaction_by_task_id_3(workflow_transaction_info_dict["task_id_3"]) if workflow_transaction_obj == None: logger.warn( "Updating a workflow transaction that doesn't exist - Task 1 ID: " + str(workflow_transaction_info_dict["task_id_1"]) + ", Task 2 ID: " + str(workflow_transaction_info_dict["task_id_2"]) + ", Task 3 ID: " + str(workflow_transaction_info_dict["task_id_3"]) + ", Task 4 ID: " + str(workflow_transaction_info_dict["task_id_4"]) + "." ) return workflow_transaction_obj.task_id_4 = workflow_transaction_info_dict["task_id_4"] try: db.session.commit() except Exception as e: db.session.rollback() raise e
def add_next_task(self): # Get the list of task_runs task_runs = self.get_task_runs() task_run = task_runs[-1] # Get only the last answer answer = task_run.info answer_info_json = json.loads(answer) if (answer != "0"): # page don't have table # record the page tables and its metadata on mbdb self.__record_page_table_and_metadata(answer_info_json) if (self.__checkIfNextTaskWasCreated()): logger.warn(Meb_exception_tt2(6, self.task.id)) raise Meb_exception_tt2(6, self.task.id) tt3_app_short_name = self.app_short_name[:-1] + "3" tt3_app = ttapps.Apptt_struct(short_name=tt3_app_short_name) bookId = self.get_book_id() imgId = self.task.info["page"] rotate = answer_info_json[0]["text"]["girar"] try: self.__downloadArchiveImages(bookId, imgId) self.__runLinesRecognition(bookId, imgId, rotate) # file with the lines recognized arch = open( "%s/books/%s/metadados/saida/image%s_model%s.txt" % ( app.config['CV_MODULES'], bookId, imgId, "1")) # get the lines recognitions tables_coords = self.__splitFile(arch) t3_tasks = [] for tableId in range(len(tables_coords)): self.__runAreaSelection(bookId, imgId, tableId, rotate) image_pieces = self.__getAreaSelection(bookId, imgId, tableId) table_subject_code = answer_info_json[tableId]['text']['assunto'] next_task_priority = priority_task_manager.get_priority(table_subject_code) if (len(image_pieces) > 0): for image_piece in image_pieces: info = dict(hasZoom=True, zoom=image_piece, coords=tables_coords[tableId], table_id=tableId, page=imgId, img_url=self.__url_table( bookId, imgId, tableId)) t3_tasks.append(tt3_app.add_task(info, priority=next_task_priority)) # add task to tt3_backend else: info = dict(hasZoom=False, coords=tables_coords[tableId], table_id=tableId, page=imgId, img_url=self.__url_table( bookId, imgId, tableId)) t3_tasks.append(tt3_app.add_task(info, priority=next_task_priority)) workflow_transaction = data_manager.get_workflow_transaction_by_task_id_2(self.task.id) workflow_transaction_info = dict(task_id_1=workflow_transaction.task_id_1, task_id_2=workflow_transaction.task_id_2, task_id_3=workflow_transaction.task_id_3, task_id_4=workflow_transaction.task_id_4) for i in range(0, len(t3_tasks)): workflow_transaction_info['task_id_3'] = t3_tasks[i].id if i == 0: data_manager.update_workflow_transaction_t3(workflow_transaction_info) else: data_manager.record_workflow_transaction(workflow_transaction_info) return True except Meb_exception_tt2 as e: raise e return False