def __create_app(self): """ Create a new app with name, shortname and description passed in constructor and with category_id = 1 and return app.id. Or return the app.id from the app registered in pybossa database. :returns: app.id :rtype: int """ apps = pbclient.find_app(short_name=self.short_name) if not len(apps) == 0: app = apps[0] msg = '{app_name} app is already registered in the DB'.format(app_name=app.name.encode('utf-8', 'replace')) logger.info(unicode(msg, "utf-8")) return app.id else: logger.info("The application is not registered in PyBOSSA. Creating it...") ans = pbclient.create_app(name=self.name, short_name=self.short_name, description=self.description) try: if ans: app = pbclient.find_app(short_name=self.short_name)[0] app.info = dict(newtask="%s/app/%s/newtask" % (flask_app.config['PYBOSSA_URL'], self.short_name)) app.category_id = 1 pbclient.update_app(app) return app.id except Exception as ex: logger.error(Meb_apps_exception(4, -1, self.short_name)) raise ex
def generate_metadata_file(self, metadata_dict, bookid): try: tmp_file = tempfile.mkstemp() f = open(tmp_file[1], "aw") f.write("Título: " + metadata_dict["title"] + "\n") f.write("Subtítulo: " + metadata_dict["subtitle"] + "\n") f.write("Assunto: " + subject_table_map.get_subject(metadata_dict["subject"]) + "\n") f.write("Fontes: " + metadata_dict["source"] + "\n") f.write("Título do Livro: " + metadata_dict["book_title"] + "\n") f.write("Página do Livro: " + str(metadata_dict["page_number"]) + "\n") f.write("Número da Tabela: " + str(metadata_dict["table_number"]) + "\n") f.close() f = open(tmp_file[1], "rb") data_mngr2.record_metadata_file(dict( book_id=bookid, page_number=metadata_dict["page_number"], table_number=metadata_dict["table_number"], mt_file=f.read() )) f.close() os.unlink(f.name) msg = "Metadata file generated with success. Content: " + str(metadata_dict) logger.info(msg) except Exception as e: logger.error(e) raise e
def __loadAnswers(self): """ Returns the info in json format to tasks, be them either zoom or not. obs.: the properties returned in json_answer are modified in this method: if the task has zoom, the answer equivalent to the group of tasks similar to this will be returned, in the same format of one simple task, without zoom. """ if(self.task.info['hasZoom']): similarTasks = self.__searchSimilarsTasks() if(not self.__validateTaskGroup(similarTasks)): logger.info("** Invalid task group detected **") return else: groupedAnswers = self.__joinTaskGroupAnswers(similarTasks) answer_json = {} answer_json['linhas'] = groupedAnswers['lines'] answer_json['colunas'] = groupedAnswers['columns'] answer_json['maxX'] = groupedAnswers['maxX'] answer_json['maxY'] = groupedAnswers['maxY'] return answer_json else: task_runs = self.get_task_runs() task_run = task_runs[-1] # Get the last answer answer = task_run.info answer_json = json.loads(answer) return answer_json
def __downloadArchiveImages(self, bookId, imgId, width=550, height=700, max_width=1650, max_height=2100): """ Download internet archive images to tt3_backend project :returns: True if the download was successful :rtype: bool """ try: archiveURL = "http://archive.org/download/%s/page/n%s_w%s_h%s" % ( bookId, imgId, max_width, max_height) logger.info("Downloading archive image: " + archiveURL) url_request = requests.get(archiveURL) fullImgPath = "%s/books/%s/alta_resolucao/image%s" % ( app.config['CV_MODULES'], bookId, imgId) fullImgPathJPG = fullImgPath + ".jpg" fullImgPathPNG = fullImgPath + ".png" fullImgFile = open(fullImgPathJPG, "w") fullImgFile.write(url_request.content) fullImgFile.close() # shell command to convert jpg to png command = 'convert %s -resize %dx%d! %s; rm %s; ' % ( fullImgPathJPG, max_width, max_height, fullImgPathPNG, fullImgPathJPG) # create image with low resolution lowImgPath = "%s/books/%s/baixa_resolucao/image%s" % ( app.config['CV_MODULES'], bookId, imgId) lowImgPathPNG = lowImgPath + ".png" command += 'convert %s -resize %dx%d! %s' % ( fullImgPathPNG, width, height, lowImgPathPNG) msg = "Command to download archive images: " + command logger.info(msg) call([command], shell=True) # calls the shell command return True except Exception as ex: logger.error(Meb_exception_tt2(5, self.task.id)) logger.error(ex) raise Meb_exception_tt2(5, self.task.id) return False
def __init__(self, **keyargs): if "short_name" in keyargs.keys(): if "_tt1" in keyargs['short_name']: short_name = keyargs['short_name'] else: logger.error(Meb_ttapps_exception(5, -1, "-")) raise Meb_ttapps_exception(5, -1, "-") else: logger.error(Meb_ttapps_exception(1, -1, "-")) raise Meb_ttapps_exception(1, -1, "-") if "title" in keyargs.keys(): title = keyargs['title'] + " " else: title = "" if keyargs.has_key("book_info"): data_mngr.record_book(keyargs["book_info"]) app_name = title + unicode("Seleção", "utf-8") super(Apptt_select, self).__init__( app_name, short_name, "Por favor. Selecione as páginas com tabela.") super(Apptt_select, self).set_template(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates" + "/template-select.html")), short_name)) super(Apptt_select, self).set_long_description(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates" + "/long_description-select.html")), short_name)) super(Apptt_select, self).add_app_infos( dict(thumbnail=flask_app.config['URL_TEMPLATES'] + "/images" + "/long_description_selection.png")) logger.info("Create task type 1")
def __init__(self, **keyargs): if "short_name" in keyargs.keys(): if "_tt4" in keyargs['short_name']: short_name = keyargs['short_name'] else: logger.error(Meb_ttapps_exception(8, -1, "-")) raise Meb_ttapps_exception(8, -1, "-") else: raise Meb_ttapps_exception(4, -1, "-") if "title" in keyargs.keys(): title = keyargs['title'] + " " else: title = "" app_name = title + unicode("Transcrição", "utf-8") super(Apptt_transcribe, self).__init__( app_name, short_name, "Por favor. Corrija o conteúdo das células da tabela.") super(Apptt_transcribe, self).set_template(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates/template-transcribe.html")), short_name)) super(Apptt_transcribe, self).set_long_description(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates" + "/long_description-transcribe.html")), short_name)) super(Apptt_transcribe, self).add_app_infos( dict( sched="incremental", thumbnail=flask_app.config['URL_TEMPLATES'] + "/images" + "/long_description_transcribe.png")) logger.info("Create task type 4")
def __init__(self, **keyargs): if "short_name" in keyargs.keys(): if "_tt2" in keyargs['short_name']: short_name = keyargs['short_name'] else: logger.error(Meb_ttapps_exception(6, -1, "-")) raise Meb_ttapps_exception(6, -1, "-") else: raise Meb_ttapps_exception(2, -1, "-") if "title" in keyargs.keys(): title = keyargs['title'] + " " else: title = "" app_name = title + unicode("Marcação", "utf-8") super(Apptt_meta, self).__init__( app_name, short_name, "Marque e descreva as tabelas ou corrija as marcações.") super(Apptt_meta, self).set_template(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates" + "/template-meta.html")), short_name)) super(Apptt_meta, self).set_long_description(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates" + "/long_description-meta.html")), short_name)) super(Apptt_meta, self).add_app_infos( dict( sched="incremental", thumbnail=flask_app.config['URL_TEMPLATES'] + "/images" + "/long_description_meta.png")) logger.info("Create task type 2")
def get_tt_images(bookId): """ Get public book images from internet archive server :returns: A list with dicts containing images urls and index. :rtype: list """ WIDTH = 550 HEIGHT = 700 logger.info('Contacting archive.org') url = "http://archive.org/metadata/" query = url + bookId urlobj = urllib2.urlopen(query) data = urlobj.read() urlobj.close() output = json.loads(data) imgList = [] if output: n_pages = None try: if output['metadata'].has_key('imagecount'): n_pages = output['metadata']['imagecount'] elif output['metadata'].has_key('numero_de_paginas_do_item'): n_pages = output['metadata']['numero_de_paginas_do_item'] except KeyError: logger.error(Archive_book_data_exception(1, "imagecount or numero_de_paginas_do_item")) raise Archive_book_data_exception(1, "imagecount or numero_de_paginas_do_item") imgUrls = "http://www.archive.org/download/" + bookId + "/page/n" for idx in range(int(n_pages)): logger.info('Retrieved img: %s' % idx) page = idx imgUrl_m = imgUrls + "%d_w%d_h%d" % (idx, WIDTH, HEIGHT) imgUrl_b = imgUrls + str(idx) imgList.append({'url_m': imgUrl_m, 'url_b': imgUrl_b, 'page': page}) return imgList
def __runLinesRecognition(self, bookId, imgId, rotate, model="1"): """ Call cpp software that recognizes lines into the table and writes lines coords into \ <tt3_backend_dir>/books/bookId/metadados/saida/image<imgId>.txt :returns: True if the write was successful :rtype: bool """ try: if rotate: # rotated table rotate = "-r" command = 'cd %s/TableTranscriber2/; sudo ./tabletranscriber2 ' \ '"/books/%s/baixa_resolucao/image%s.png" "model%s" "%s"' % ( app.config['CV_MODULES'], bookId, imgId, model, rotate) msg = "Command to run lines recognition software: " + command logger.info(msg) call([command], shell=True) # calls the shell command else: # not rotated table rotate = "-nr" command = 'cd %s/TableTranscriber2/; sudo ./tabletranscriber2 ' \ '"/books/%s/baixa_resolucao/image%s.png" "model%s" "%s"' % ( app.config['CV_MODULES'], bookId, imgId, model, rotate) msg = "Command to run lines recognition software: " + command logger.info(msg) call([command], shell=True) # calls the shell command return self.__checkFile(bookId, imgId) except Meb_exception_tt2 as e: logger.error(Meb_exception_tt2(3, self.task.id)) raise e except Exception as ex: logger.error(Meb_exception_tt2(2), self.task.id) raise ex
def __runOCR(self, cells, book_id, page, table_id, maxX, maxY): """ Run tesseract executor """ self.__saveCells(cells, book_id, page, table_id, maxX, maxY) try: command = 'cd %s/TesseractExecutorApp2/; sudo ./tesseractexecutorapp2 ' \ '"/books/%s/metadados/tabelasAlta/image%s_%d.png"' % ( app.config['CV_MODULES'], book_id, page, table_id) msg = "Command to run tesseract executor: " + command logger.info(msg) call([command], shell=True) except Exception as ex: logger.error(Meb_exception_tt3(4, self.task.id)) raise ex
def __init__(self, **keyargs): if "short_name" in keyargs.keys(): if "_tt3" in keyargs['short_name']: short_name = keyargs['short_name'] else: logger.error(Meb_ttapps_exception(7, -1, "-")) raise Meb_ttapps_exception(7, -1, "-") else: raise Meb_ttapps_exception(3, -1, "-") if "title" in keyargs.keys(): title = keyargs['title'] + " " else: title = "" app_name = title + unicode("Estrutura", "utf-8") super(Apptt_struct, self).__init__( app_name, short_name, "Por favor. Corrija as linhas e colunas da tabela.") super(Apptt_struct, self).set_template(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates/template-struct.html")), short_name)) super(Apptt_struct, self).set_long_description(meb_util.set_url( urllib2.urlopen( urllib2.Request( flask_app.config['URL_TEMPLATES'] + "/templates" + "/long_description-struct.html")), short_name)) try: self.__create_dirs(flask_app.config['CV_MODULES'], short_name[:-4]) logger.info("TT folders created") except OSError, e: logger.error(e)
def __runAreaSelection(self, bookId, imgId, tableId, rotate): """ Call cpp ZoomingSelector software that splits the tables and write the pieces at <tt3_backend_id>/books/bookId/selections/image<imgId>_tableId.txt :returns: True if the execution was ok :rtype: bool """ try: command = 'cd %s/ZoomingSelector/; sudo ./zoomingselector ' \ '"/books/%s/metadados/tabelasAlta/image%s_%d.png"' % ( app.config['CV_MODULES'], bookId, imgId, tableId) msg = "Command to run zoomingselector (area selection software) " + command logger.info(msg) call([command], shell=True) except Exception as ex: logger.error(Meb_exception_tt2(4, self.task.id)) raise ex
def __fileOutput(self, answer): """"" Writes tt2 answers into the file input for the lines recognitions :returns: True if the answer is saved at the file :rtype: bool """ try: pb_app_name = self.app_short_name bookId = pb_app_name[:-4] imgId = self.task.info["page"] msg = "File path:" + "%s/books/%s/metadados/entrada/image%s.txt" % \ (app.config["CV_MODULES"], bookId, imgId), "a" logger.info(msg) arch = open("%s/books/%s/metadados/entrada/image%s.txt" % ( app.config["CV_MODULES"], bookId, imgId), "w") for table in answer: x0 = int(table["left"]) x1 = int(table["width"] + x0) y0 = int(table["top"]) y1 = int(table["height"] + y0) arch.write( str(x0) + "," + str(y0) + "," + str(x1) + "," + str(y1) + "\n") arch.close() return True except IOError as e: print e logger.error(Meb_file_output_exception_tt2(1, self.task.id, bookId, imgId)) raise Meb_file_output_exception_tt2(1, self.task.id, bookId, imgId) return False