def create_image_grains_list(self, imageList, timeList):
     returnList = []
     for i, img in enumerate(imageList):
         filename="shot"+str(i)+".png"
         content = StringIO()
         img.save(content,"PNG")
         obj = Grain(id=filename, content=content, graintype='image')
         obj.description = str(timeList[i])
         returnList.append(obj)
     return returnList
 def create_image_grains_list(self, imageList, timeList):
     returnList = []
     for i, img in enumerate(imageList):
         filename="shot"+str(i)+".png"
         content = StringIO()
         img.save(content,"PNG")
         obj = Grain(id=filename, content=content, graintype='image')
         obj.description = str(timeList[i])
         returnList.append(obj)
     return returnList
    def granulate(self):
        """ granulate some svg file (if `max` is specified, at most in `max` files) """
        max = self.max
        try:
            xmlobj = PrepareSVG()
            xml = xmlobj.removeUse(self.__svgfile.getvalue())
        except ExpatError:
            print "\nERROR: Could not parse the svg file!\n"
            return []

        doc = xml.documentElement
        num_tags = self.__countTags(
            xml)  # number of element tags (except block tags)
        self.__remainder = self.__setImgsPerFile(
            doc, max, num_tags)  # stores the remainder
        self.__svgTree = self.__createTreeBased(xml)
        self.__getAllDefinitions(xml)

        for child in doc.childNodes:
            self.__visitNode(self.__svgTree.documentElement, child)

        if self.__lastAdded:  # if the last things aren't written
            self.__writeSvgTree()

        xml.unlink()
        self.__svgTree.unlink()
        grain_list = []
        for grain in self.__list:
            new_grain = Grain(id='svg%s.svg' % (self.__list.index(grain) + 1),
                              content=grain,
                              mimetype='image/svg+xml',
                              graintype='svg')
            grain_list.append(new_grain)
        return {'file_list': grain_list}
    def __getImageDocumentList(self):
        """
            Extract the images from a document and return a list of Grain instances
        """
        image_list=[]
        #get the elements in the tags draw:image, where the image references are kept
        tag_images = self.__parseContent.getElementsByTagName('draw:image')
        #checks if an image element exists
        if len(tag_images):
            for item in tag_images:
                name=None
                if item.hasAttribute("xlink:href"):
                    path=item.getAttribute('xlink:href')
                    #checks if the path is empty
                    if "Pictures" in path:
                        #remove the file extension
                        name=path.replace("Pictures/","")
                    elif "ObjectReplacements" in path:
                        name=path.replace("./ObjectReplacements/","")
                        # removes the "./" of the path that could be "./ObjectReplacements/Object 2"
                        path = path.replace("./","")

                    #  happens when it has an image from a website
                    elif re.match("^http://.+[\.jpg|\.png|\.gif]$",path):
                        continue

                    if name is not None:
                        #checks the image extension
                        f, e = os.path.splitext(name)
                        if e.lower() in ['.png','.gif','.jpg']:
                            # verifies if the image is already in the list
                            if not name in [image.getId() for image in image_list]:
                                parent = item.parentNode
                                nChild = parent.nextSibling
                                objGran = Grain(graintype='image')
                                if nChild:
                                    text=[]
                                    caption = ''
                                    if nChild.nodeType is nChild.TEXT_NODE:
                                        text.append(nChild.data)
                                    for t in self.__getTextChildNodesImage(nChild,text):
                                        if t is not None: caption+=t
                                    objGran.setCaption(caption)
                                imagefile = StringIO(self.__zipFile.read(path))
                                objGran.setId(name)
                                objGran.setContent(imagefile)
                                image_list.append(objGran)
        if image_list:
            return image_list
        else:
            return []
 def create_video_grains_list(self):
     returnList = []
     video_grains_path = os.listdir(self.temporaryPathGrain)
     video_grains_path.sort()
     for i, video in enumerate(video_grains_path):
         filename="video_grain"+str(i)+".ogv"
         content = StringIO(open(self.temporaryPathGrain + "/" + video).read())
         content.name = filename
         content.filename = filename
         obj = Grain(id=filename, content=content, graintype='nsifile')
         returnList.append(obj)
     return returnList
Exemple #6
0
    def __getImageDocumentList(self):
        """
           Retrieves images from a PDF document
        """
        if os.system(
                'pdfimages -j "' +
                os.path.join(self.__pathFolder, self.Document.getFilename()) +
                '" ' + self.__pathFolder + '/imagegrain') == 256:
            #raise EOFError, "File has not the mandatory ending %EOF. File must be corrupted"
            return []
        # Lists the content of the temporary folder where the files are in.
        images = os.listdir(self.__pathFolder)
        images.remove(self.Document.getFilename())

        # Utiliza-se um algoritmo de descarte de imagens iguais
        resultImgListDict = comparaImage(self.__pathFolder)
        # Remove as imagens repetidas
        for imgDict in resultImgListDict:
            if imgDict.get('flag') is True:
                images.remove(imgDict.get('filename'))

        image_list = []
        for image in images:
            f, e = os.path.splitext(image)
            #convert the images .ppm or .pbm to files .png
            if e.lower() in ['.ppm', '.pbm']:
                try:
                    content = StringIO()
                    PIL.Image.open(os.path.join(self.__pathFolder,
                                                image)).save(content, "PNG")
                    image = f + ".png"
                except:
                    fileImage = open(self.__pathFolder + '/' + image, "r")
                    content = StringIO(fileImage.read())
                    fileImage.close()
            else:
                #XXX-In the variable 'images' is coming a directory, it
                # generates the error when trying to open directory as file.
                try:
                    fileImage = open(self.__pathFolder + '/' + image, "r")
                except IOError, e:
                    print e
                    continue
                content = StringIO(fileImage.read())
                fileImage.close()

            image_list.append(
                Grain(id=image, content=content, graintype='image'))
    def extractRegion(self):
        content_file = self.tool.makeNewSvgStringIO(
            self.document.getFilename(), self.document.getData())
        image = self.tool.makeNewSvgImage(content_file)

        new_content_file = self.tool.makeNewSvgStringIO(
            "new_" + self.document.getFilename(), StringIO.StringIO(''))
        new_svg = self.tool.makeNewSvgImage(new_content_file)

        region = Box(Point(self.x, self.y), self.w, self.h)

        new_image = self.tool.selectGrainsInRegion(region, image, new_svg)
        objGran = Grain(content=new_image.getContentFile(),
                        mimetype="image/svg+xml",
                        graintype='svg')
        return objGran
Exemple #8
0
        pdfFile = os.path.join(self.__pathFolder, self.Document.getFilename())
        outputXMLFolder = os.path.join(self.__pathFolder, "outputXMLFolder")
        try:
            converterObj = ExecuteConverter.ExecuteConverter()
            converterObj.extractTables(pdfFile, outputXMLFolder)
            tableListStr = converterObj.getTableList()
        except Exception, e:
            return tableList

        i = 0
        for table in tableListStr:
            # generate table name
            i += 1
            tableId = "Table" + str(i) + ".html"
            # finally, the Grain is created en added to the list
            grainObj = Grain(graintype='table')
            grainObj.setId(tableId)
            grainObj.setContent(StringIO(table))
            grainObj.setMimetype("text/html")
            tableList.append(grainObj)

        return tableList

    ### Public Methods ###

    def getThumbnailsDocument(self):
        """
            Extracts the metadata from pdf files using 'convert' tool
        """
        os.system(
            'evince-thumbnailer -s 128 "' +
Exemple #9
0
        pdfFile = os.path.join(self.__pathFolder,self.Document.getFilename())
        outputXMLFolder = os.path.join(self.__pathFolder,"outputXMLFolder")
        try:
            converterObj = ExecuteConverter.ExecuteConverter()
            converterObj.extractTables(pdfFile, outputXMLFolder)
            tableListStr = converterObj.getTableList()
        except Exception, e:
            return tableList

        i = 0
        for table in tableListStr:
            # generate table name
            i+=1
            tableId = "Table" + str(i) + ".html"
            # finally, the Grain is created en added to the list
            grainObj = Grain(graintype='table')
            grainObj.setId(tableId)
            grainObj.setContent(StringIO(table))
            grainObj.setMimetype("text/html")
            tableList.append(grainObj)

        return tableList

    ### Public Methods ###

    def getThumbnailsDocument(self):
        """
            Extracts the metadata from pdf files using 'convert' tool
        """
        os.system('evince-thumbnailer -s 128 "' + os.path.join(self.__pathFolder,self.Document.getFilename())  + '" ' + self.__pathFolder +'/thumbnail.png')
	file_content = StringIO(open(self.__pathFolder +'/thumbnail.png').read())
    def __getTableDocumentList(self):
        """
            Extract the tables from a document and return a list of Grain instances
        """
        table_list=[]
        # create an empty template
        template_str=self.__createNewOOoDocument()
        tables= self.__parseContent.getElementsByTagName('table:table')
        stylesDoc= self.__parseContent.getElementsByTagName('style:style')
        for t in tables:
            styles = self.__getAttributesR(t)
            table_name = t.getAttribute('table:name')
            imgHrefs=[]
            for img in t.getElementsByTagName("draw:image"):
                if img.hasAttribute("xlink:href"):
                    path=img.getAttribute('xlink:href')
                    #checks if the path is empty
                    if "ObjectReplacements" in path:
                        # remove th "./" of the path that could be "./ObjectReplacements/Object 2"
                        imgHrefs.append(path.replace("./",""))
                    # happens when it has an image from a website
                    elif re.match("^http://.+[\.jpg|\.png|\.gif]$",path):
                        continue
                    else:
                        imgHrefs.append(path)

            # extract legend
            objGran = Grain(graintype='table')
            leg=[]
            p = t.previousSibling
            n = t.nextSibling
            if p is not None:
              if p.hasChildNodes():
                  legenda = ''
                  for i in self.__getTextChildNodesTable(p,text=[]):
                      legenda+=i
                  leg.append(legenda)
              else:
                  leg.append(self.__getNodeText(p))
            if n is not None:
                if n.hasChildNodes():
                    legenda = ''
                    for j in self.__getTextChildNodesTable(n,text=[]):
                        legenda+=j
                    leg.append(legenda)
                else:
                    leg.append(self.__getNodeText(n))

            # join the strings to make a single legend
            caption = ' '.join([ i for i in leg if i is not None])

            objGran.setCaption(caption)
            # Creating an empty File
            table_name = t.getAttribute('table:name')
            new_table = StringIO()
            new_table.write(template_str)
            template_odt = zipfile.PyZipFile(new_table,'a')
            doc = parseString(template_odt.read('content.xml'))
            template_odt.close()
            office_text=doc.getElementsByTagName('office:text')
            office_text=office_text[0]

            # copy the table node from a document to a new table grain
            newTableNo=doc.importNode(t,True)
            office_text.appendChild(newTableNo)

            for sty in stylesDoc:
                if (sty.getAttribute('style:name') in styles):
                    office_automatic_styles=doc.getElementsByTagName('office:automatic-styles')
                    office_automatic_styles=office_automatic_styles[0]
                    office_automatic_styles.appendChild(doc.importNode(sty,True))
            if imgHrefs:
                for image in imgHrefs:
                    template_odt = zipfile.PyZipFile(new_table,'a')
                    template_odt.writestr(str(image),self.__zipFile.read(image))
                    template_odt.close()
            template_odt = zipfile.PyZipFile(new_table,'a')
            template_odt.writestr('content.xml',doc.toxml().encode('utf-8'))
            template_odt.close()
            if table_name:
                #objGran.setId(plone_utils.normalizeString(table_name))
                objGran.setId(table_name)
                objGran.setContent(new_table)
                objGran.setMimetype("application/vnd.oasis.opendocument.text")
                table_list.append(objGran)
        if table_list:
            return table_list
        else:
            return []