def __create_app(self):
     """
       Create a new app with name, shortname and description passed in constructor
       and with category_id = 1 and return app.id. Or return the app.id from the app registered
       in pybossa database.
       
       :returns: app.id
       :rtype: int
       
     """
     
     apps = pbclient.find_app(short_name=self.short_name)
     if not len(apps) == 0:
         app = apps[0]
         msg = '{app_name} app is already registered in the DB'.format(app_name=app.name.encode('utf-8', 'replace'))
         logger.info(unicode(msg, "utf-8"))
         return app.id
     else:
         logger.info("The application is not registered in PyBOSSA. Creating it...")
         ans = pbclient.create_app(name=self.name, short_name=self.short_name, description=self.description)
         try:
             if ans:
                 app = pbclient.find_app(short_name=self.short_name)[0]
                 app.info = dict(newtask="%s/app/%s/newtask" % (flask_app.config['PYBOSSA_URL'], self.short_name))
                 app.category_id = 1
                 pbclient.update_app(app)
                 return app.id
         except Exception as ex:
             logger.error(Meb_apps_exception(4, -1, self.short_name))
             raise ex
 def generate_metadata_file(self, metadata_dict, bookid):
     try:
         tmp_file = tempfile.mkstemp()
         f = open(tmp_file[1], "aw")
         
         f.write("Título: " + metadata_dict["title"] + "\n")
         f.write("Subtítulo: " + metadata_dict["subtitle"] + "\n")
         f.write("Assunto: " + subject_table_map.get_subject(metadata_dict["subject"]) + "\n")
         f.write("Fontes: " + metadata_dict["source"] + "\n")
         f.write("Título do Livro: " + metadata_dict["book_title"] + "\n")
         f.write("Página do Livro: " + str(metadata_dict["page_number"]) + "\n")
         f.write("Número da Tabela: " + str(metadata_dict["table_number"]) + "\n")
         
         f.close()
 
         f = open(tmp_file[1], "rb")
         data_mngr2.record_metadata_file(dict(
                                          book_id=bookid,
                                          page_number=metadata_dict["page_number"],
                                          table_number=metadata_dict["table_number"],
                                          mt_file=f.read()
                                          ))
         
         f.close()
         os.unlink(f.name)
         
         msg = "Metadata file generated with success. Content: " + str(metadata_dict)
         logger.info(msg)
     
     except Exception as e:
         logger.error(e)
         raise e
 def __loadAnswers(self):
     """
      Returns the info in json format to tasks, be them either zoom
      or not.
      
      obs.: the properties returned in json_answer are modified in this method: if
      the task has zoom, the answer equivalent to the group of tasks similar to this
      will be returned, in the same format of one simple task, without zoom.
     """
     
     if(self.task.info['hasZoom']):
         
         similarTasks = self.__searchSimilarsTasks()
             
         if(not self.__validateTaskGroup(similarTasks)):
             logger.info("** Invalid task group detected **")
             return
         else:
             groupedAnswers = self.__joinTaskGroupAnswers(similarTasks)
         
             answer_json = {}
             answer_json['linhas'] = groupedAnswers['lines']
             answer_json['colunas'] = groupedAnswers['columns']
             answer_json['maxX'] = groupedAnswers['maxX']
             answer_json['maxY'] = groupedAnswers['maxY']
             
             return answer_json
     else:
         task_runs = self.get_task_runs()
         
         task_run = task_runs[-1]  # Get the last answer
         answer = task_run.info
         answer_json = json.loads(answer)
         
         return answer_json
    def __downloadArchiveImages(self, bookId, imgId, width=550, height=700, max_width=1650, max_height=2100):
        """
        Download internet archive images to tt3_backend project
        
        :returns: True if the download was successful
        :rtype: bool
        
        """

        try:
            archiveURL = "http://archive.org/download/%s/page/n%s_w%s_h%s" % (
                bookId, imgId, max_width, max_height)
            
            logger.info("Downloading archive image: " + archiveURL)
            
            url_request = requests.get(archiveURL)

            fullImgPath = "%s/books/%s/alta_resolucao/image%s" % (
                app.config['CV_MODULES'], bookId, imgId)
            fullImgPathJPG = fullImgPath + ".jpg"
            fullImgPathPNG = fullImgPath + ".png"

            fullImgFile = open(fullImgPathJPG, "w")
            fullImgFile.write(url_request.content)
            fullImgFile.close()
    
            # shell command to convert jpg to png
            command = 'convert %s -resize %dx%d! %s; rm %s; ' % (
                fullImgPathJPG, max_width, max_height, fullImgPathPNG, fullImgPathJPG)

            # create image with low resolution
            lowImgPath = "%s/books/%s/baixa_resolucao/image%s" % (
                app.config['CV_MODULES'], bookId, imgId)
            lowImgPathPNG = lowImgPath + ".png"

            command += 'convert %s -resize %dx%d! %s' % (
                fullImgPathPNG, width, height, lowImgPathPNG)
            
            msg = "Command to download archive images: " + command
            logger.info(msg)
            
            call([command], shell=True)  # calls the shell command
            
            return True
        
        except Exception as ex:
            logger.error(Meb_exception_tt2(5, self.task.id))
            logger.error(ex)
            raise Meb_exception_tt2(5, self.task.id)

        return False
    def __init__(self, **keyargs):
        if "short_name" in keyargs.keys():
            if "_tt1" in keyargs['short_name']:
                short_name = keyargs['short_name']
            else:
                logger.error(Meb_ttapps_exception(5, -1, "-"))
                raise Meb_ttapps_exception(5, -1, "-")
        else:
            logger.error(Meb_ttapps_exception(1, -1, "-"))
            raise Meb_ttapps_exception(1, -1, "-")
        
        if "title" in keyargs.keys():
            title = keyargs['title'] + " "
        else:
            title = ""
        
        if keyargs.has_key("book_info"):
            data_mngr.record_book(keyargs["book_info"])
        
        app_name = title + unicode("Seleção", "utf-8")

        super(Apptt_select, self).__init__(
            app_name,
            short_name,
            "Por favor. Selecione as páginas com tabela.")

        super(Apptt_select, self).set_template(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates"
                    + "/template-select.html")), short_name))

        super(Apptt_select, self).set_long_description(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates"
                    + "/long_description-select.html")), short_name))
        
        super(Apptt_select, self).add_app_infos(
            dict(thumbnail=flask_app.config['URL_TEMPLATES']
                 + "/images" 
                 + "/long_description_selection.png"))
        
        logger.info("Create task type 1")
    def __init__(self, **keyargs):

        if "short_name" in keyargs.keys():
            if "_tt4" in keyargs['short_name']:
                short_name = keyargs['short_name']
            else:
                logger.error(Meb_ttapps_exception(8, -1, "-"))
                raise Meb_ttapps_exception(8, -1, "-")
        else:
            raise Meb_ttapps_exception(4, -1, "-")

        if "title" in keyargs.keys():
            title = keyargs['title'] + " "
        else:
            title = ""
        
        app_name = title + unicode("Transcrição", "utf-8")
    
        super(Apptt_transcribe, self).__init__(
            app_name, short_name,
            "Por favor. Corrija o conteúdo das células da tabela.")

        super(Apptt_transcribe, self).set_template(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates/template-transcribe.html")),
            short_name))

        super(Apptt_transcribe, self).set_long_description(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates"
                    + "/long_description-transcribe.html")), short_name))
        
        super(Apptt_transcribe, self).add_app_infos(
            dict(
                 sched="incremental",
                 thumbnail=flask_app.config['URL_TEMPLATES']
                 + "/images"
                 + "/long_description_transcribe.png"))
        
        logger.info("Create task type 4")
    def __init__(self, **keyargs):
        if "short_name" in keyargs.keys():
            if "_tt2" in keyargs['short_name']:
                short_name = keyargs['short_name']
            else:
                logger.error(Meb_ttapps_exception(6, -1, "-"))
                raise Meb_ttapps_exception(6, -1, "-")
        else:
            raise Meb_ttapps_exception(2, -1, "-")
        
        if "title" in keyargs.keys():
            title = keyargs['title'] + " "
        else:
            title = ""
        
        app_name = title + unicode("Marcação", "utf-8")

        super(Apptt_meta, self).__init__(
            app_name, short_name,
            "Marque e descreva as tabelas ou corrija as marcações.")

        super(Apptt_meta, self).set_template(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates"
                    + "/template-meta.html")), short_name))

        super(Apptt_meta, self).set_long_description(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates"
                    + "/long_description-meta.html")), short_name))
        
        super(Apptt_meta, self).add_app_infos(
            dict(
                sched="incremental",
                thumbnail=flask_app.config['URL_TEMPLATES']
                + "/images"
                + "/long_description_meta.png"))
        
        logger.info("Create task type 2")
def get_tt_images(bookId):
    """
    Get public book images from internet archive server

    :returns: A list with dicts containing images urls and index.
    :rtype: list

    """
    
    WIDTH = 550
    HEIGHT = 700

    logger.info('Contacting archive.org')

    url = "http://archive.org/metadata/"
    query = url + bookId
    urlobj = urllib2.urlopen(query)
    data = urlobj.read()
    urlobj.close()
    output = json.loads(data)
    imgList = []

    if output:
        n_pages = None
        try:
            if output['metadata'].has_key('imagecount'):
                n_pages = output['metadata']['imagecount']
            elif output['metadata'].has_key('numero_de_paginas_do_item'):
                n_pages = output['metadata']['numero_de_paginas_do_item']
        except KeyError:
            logger.error(Archive_book_data_exception(1, "imagecount or numero_de_paginas_do_item"))
            raise Archive_book_data_exception(1, "imagecount or numero_de_paginas_do_item")
            
        imgUrls = "http://www.archive.org/download/" + bookId + "/page/n"
        for idx in range(int(n_pages)):
            logger.info('Retrieved img: %s' % idx)
            page = idx
            imgUrl_m = imgUrls + "%d_w%d_h%d" % (idx, WIDTH, HEIGHT)
            imgUrl_b = imgUrls + str(idx)
            imgList.append({'url_m':  imgUrl_m, 'url_b': imgUrl_b,
                            'page': page})

    return imgList
    def __runLinesRecognition(self, bookId, imgId, rotate, model="1"):
        """
        Call cpp software that recognizes lines into the table and
        writes lines coords into \
        <tt3_backend_dir>/books/bookId/metadados/saida/image<imgId>.txt

        :returns: True if the write was successful
        :rtype: bool
        
        """
        
        try:
            if rotate:  # rotated table
                rotate = "-r"
                command = 'cd %s/TableTranscriber2/; sudo ./tabletranscriber2 ' \
                '"/books/%s/baixa_resolucao/image%s.png" "model%s" "%s"' % (
                app.config['CV_MODULES'], bookId, imgId, model, rotate)
                
                msg = "Command to run lines recognition software: " + command
                logger.info(msg)
                
                call([command], shell=True)  # calls the shell command
            
            else:  # not rotated table
                rotate = "-nr"
                command = 'cd %s/TableTranscriber2/; sudo ./tabletranscriber2 ' \
                '"/books/%s/baixa_resolucao/image%s.png" "model%s" "%s"' % (
                app.config['CV_MODULES'], bookId, imgId, model, rotate)
                
                msg = "Command to run lines recognition software: " + command
                logger.info(msg)
                
                call([command], shell=True)  # calls the shell command
                
            return self.__checkFile(bookId, imgId)
            
        except Meb_exception_tt2 as e:
            logger.error(Meb_exception_tt2(3, self.task.id))
            raise e
        except Exception as ex:
            logger.error(Meb_exception_tt2(2), self.task.id)
            raise ex
    def __runOCR(self, cells, book_id, page, table_id, maxX, maxY):
        """
          Run tesseract executor
        """
        
        self.__saveCells(cells, book_id, page, table_id, maxX, maxY)
        
        try:
            command = 'cd %s/TesseractExecutorApp2/; sudo ./tesseractexecutorapp2 ' \
            '"/books/%s/metadados/tabelasAlta/image%s_%d.png"' % (
            app.config['CV_MODULES'], book_id, page, table_id)
            
            msg = "Command to run tesseract executor: " + command
            logger.info(msg)
            
            call([command], shell=True)

        except Exception as ex:
            logger.error(Meb_exception_tt3(4, self.task.id))
            raise ex
    def __init__(self, **keyargs):

        if "short_name" in keyargs.keys():
            if "_tt3" in keyargs['short_name']:
                short_name = keyargs['short_name']
            else:
                logger.error(Meb_ttapps_exception(7, -1, "-"))
                raise Meb_ttapps_exception(7, -1, "-")
        else:
            raise Meb_ttapps_exception(3, -1, "-")

        if "title" in keyargs.keys():
            title = keyargs['title'] + " "
        else:
            title = ""

        app_name = title + unicode("Estrutura", "utf-8")
        
        super(Apptt_struct, self).__init__(
            app_name, short_name,
            "Por favor. Corrija as linhas e colunas da tabela.")

        super(Apptt_struct, self).set_template(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates/template-struct.html")),
            short_name))

        super(Apptt_struct, self).set_long_description(meb_util.set_url(
            urllib2.urlopen(
                urllib2.Request(
                    flask_app.config['URL_TEMPLATES']
                    + "/templates"
                    + "/long_description-struct.html")), short_name))

        try:
            self.__create_dirs(flask_app.config['CV_MODULES'], short_name[:-4])
            logger.info("TT folders created")
        except OSError, e:
            logger.error(e)
    def __runAreaSelection(self, bookId, imgId, tableId, rotate):
        """
        Call cpp ZoomingSelector software that splits the
        tables and write the pieces at
        <tt3_backend_id>/books/bookId/selections/image<imgId>_tableId.txt

        :returns: True if the execution was ok
        :rtype: bool
        """
        try:
            command = 'cd %s/ZoomingSelector/; sudo ./zoomingselector ' \
            '"/books/%s/metadados/tabelasAlta/image%s_%d.png"' % (
            app.config['CV_MODULES'], bookId, imgId, tableId)
            
            msg = "Command to run zoomingselector (area selection software) " + command 
            logger.info(msg)
            
            call([command], shell=True)
        
        except Exception as ex:
            logger.error(Meb_exception_tt2(4, self.task.id))
            raise ex
    def __fileOutput(self, answer):
        """""
        Writes tt2 answers into the file input for the lines recognitions

        :returns: True if the answer is saved at the file
        :rtype: bool
        """

        try:
            pb_app_name = self.app_short_name
            bookId = pb_app_name[:-4]
            imgId = self.task.info["page"]
            
            msg = "File path:" + "%s/books/%s/metadados/entrada/image%s.txt" % \
                (app.config["CV_MODULES"], bookId, imgId), "a"
            logger.info(msg)
            
            arch = open("%s/books/%s/metadados/entrada/image%s.txt" % (
                app.config["CV_MODULES"], bookId, imgId), "w")
            for table in answer:
                x0 = int(table["left"])
                x1 = int(table["width"] + x0)
                y0 = int(table["top"])
                y1 = int(table["height"] + y0)
                arch.write(
                    str(x0) + "," + str(y0) + "," + 
                    str(x1) + "," + str(y1) + "\n")
            arch.close()

            return True
        
        except IOError as e:
            print e
            logger.error(Meb_file_output_exception_tt2(1, self.task.id, bookId, imgId))
            raise Meb_file_output_exception_tt2(1, self.task.id, bookId, imgId)

        return False