Ejemplo n.º 1
0
 def test_1stpara(self):
     """ Grab 1st paragraph and convert to string value """
     poem_odt = os.path.join(os.path.dirname(__file__), "examples", "serious_poem.odt")
     d = load(poem_odt)
     shouldbe = u"The boy stood on the burning deck,Whence allbuthim had fled.The flames that litthe battle'swreck,Shone o'er him, round the dead. "
     self.assertEquals(shouldbe, unicode(d.body))
     self.assertEquals(shouldbe, str(d.body))
Ejemplo n.º 2
0
 def test_metagenerator(self):
     """ Check that meta:generator is the original one """
     parastyles_odt = os.path.join(
         os.path.dirname(__file__), "examples", "parastyles.odt")
     d = load(parastyles_odt)
     meta = unicode(d.metaxml(),'utf-8')
     self.assertEqual(-1, meta.find(u"""<meta:generator>OpenOffice.org/2.3$Linux OpenOffice.org_project/680m6$Build-9226"""),"Must use the original generator string")
Ejemplo n.º 3
0
def merge(inputfile, textdoc):
    inputtextdoc = load(inputfile)

    # Need to make a copy of the list because addElement unlinks from the original
    for meta in inputtextdoc.meta.childNodes[:]:
        textdoc.meta.addElement(meta)

    for font in inputtextdoc.fontfacedecls.childNodes[:]:
        textdoc.fontfacedecls.addElement(font)

    for style in inputtextdoc.styles.childNodes[:]:
        textdoc.styles.addElement(style)

    for autostyle in inputtextdoc.automaticstyles.childNodes[:]:
        textdoc.automaticstyles.addElement(autostyle)


    for scripts in inputtextdoc.scripts.childNodes[:]:
        textdoc.scripts.addElement(scripts)

    for settings in inputtextdoc.settings.childNodes[:]:
        textdoc.settings.addElement(settings)

    for masterstyles in inputtextdoc.masterstyles.childNodes[:]:
        textdoc.masterstyles.addElement(masterstyles)

    for body in inputtextdoc.body.childNodes[:]:
        textdoc.body.addElement(body)

    textdoc.Pictures = inputtextdoc.Pictures
    return textdoc
Ejemplo n.º 4
0
    def test_percentage(self):
        """ Test that an automatic style can refer to a PercentageStyle as a datastylename """
        doc = OpenDocumentSpreadsheet()
        nonze = PercentageStyle(name='N11')
        nonze.addElement(Number(decimalplaces='2', minintegerdigits='1'))
        nonze.addElement(Text(text='%'))
        doc.automaticstyles.addElement(nonze)
        pourcent = Style(name='pourcent', family='table-cell', datastylename='N11')
        pourcent.addElement(ParagraphProperties(textalign='center'))
        pourcent.addElement(TextProperties(attributes={'fontsize':"10pt",'fontweight':"bold", 'color':"#000000" }))
        doc.automaticstyles.addElement(pourcent)

        table = Table(name='sheet1')
        tr = TableRow()
        tc = TableCell(formula='=AVERAGE(C4:CB62)/2',stylename='pourcent', valuetype='percentage')
        tr.addElement(tc)
        table.addElement(tr)
        doc.spreadsheet.addElement(table)
        doc.save("TEST.odt")
        self.saved = True
        d = load("TEST.odt")
        result = d.contentxml()
        self.assertNotEqual(-1, result.find(u'''<number:percentage-style'''))
        self.assertNotEqual(-1, result.find(u'''style:data-style-name="N11"'''))
        self.assertNotEqual(-1, result.find(u'''style:name="pourcent"'''))
Ejemplo n.º 5
0
    def __init__(self, input_file_name, output_file_name, processAnnotations=False):
        """
        Constructor
        """
        # Save the arguments
        self.input_file_name = input_file_name
        self.output_file_name = output_file_name
        self.processAnnotations = processAnnotations
        
        # Create the graph
        self.graph = ConjunctiveGraph()
        self.graph.bind('tablinker', TABLINKER)
        self.graph.bind('prov', PROV)
        self.graph.bind('dcat', DCAT)
        self.graph.bind('oa', OA)
        self.graph.bind('dcterms', DCTERMS)

        # Set a default namespace
        self.data_ns = Namespace("http://example.org/")
        self.graph.bind('data', self.data_ns)
        
        # Compress by default
        self.set_compress(True)
        
        self.basename = os.path.basename(input_file_name).split('.')[0]
                
        logger.info('[{}] Loading {}'.format(self.basename, input_file_name))
        self.book = load(unicode(input_file_name))
        self.stylesnames = {}
        for style in self.book.getElementsByType(Style):
            parentname = style.getAttrNS(STYLENS, 'parent-style-name')
            name = style.getAttrNS(STYLENS, 'name')
            if parentname != None:
                self.stylesnames[name] = parentname
Ejemplo n.º 6
0
 def test_extract_with_span(self):
     """ Extract a text with a bold/italic span """
     poem_odt = os.path.join(
         os.path.dirname(__file__), u"examples", u"simplestyles.odt")
     d = load(poem_odt)
     teletype.extractText(d.body)
     self.assertEqual(u'Plain textBoldItalicBold italicUnderlineUnderline italicUnderline bold italicKm2 - superscriptH2O - subscript', teletype.extractText(d.body))
Ejemplo n.º 7
0
def parse_opendocument(fin):
    """
    Con el fichero «fin» **ya abierto** lee todos los valores de las filas.
    Devuelve una lista con los campos cabecera y otra lista
    de listas con los valores de referencia de cálculo (filas de celdas).
    """
    res = []
    cabecera = ()
    cabecera_superior = ()
    doc = load(fin)
    tables = doc.spreadsheet.getElementsByType(Table)
    table = tables[0]   # Sólo tienen 1 hoja
    rows = table.getElementsByType(TableRow)
    numentradas = numsalidas = 0    # Por si no tuviera filas
    for row in rows:
        fila = convert_odrow(row)
        if es_cabecera(fila):
            # La segunda cabecera, la de verdad, machacará a la de (In, Out)
            # en la segunda iteración. La primera nos dará el # de ins y outs.
            cabecera = fila
            if esta_en("In", cabecera) or esta_en("Out", cabecera):  # Ojo, es
                # la fila que me dice cuántas entradas y salidas tiene la
                # tabla de cálculo.
                numentradas, numsalidas = find_ins_outs(cabecera)
                cabecera_superior = cabecera
            continue
        if fila:    # Si la fila está vacía, paso de ella.
            res.append(fila)
    return cabecera, res, numentradas, numsalidas, cabecera_superior
Ejemplo n.º 8
0
 def test_simplelist(self):
     """ Check that lists are loaded correctly """
     simplelist_odt = os.path.join(
         os.path.dirname(__file__), "examples", "simplelist.odt")
     d = load(simplelist_odt)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u"""<text:list text:style-name="L1"><text:list-item><text:p text:style-name="P1">Item A</text:p></text:list-item><text:list-item>"""))
Ejemplo n.º 9
0
 def test_chinese(self):
     """ Load a document containing Chinese content"""
     chinese_spreadsheet = os.path.join(
         os.path.dirname(__file__), u"examples", u"chinese_spreadsheet.ods")
     d = load(chinese_spreadsheet)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''工作表1'''))
Ejemplo n.º 10
0
def import_ods(path):
    doc = load(path)

    db = {}

    tables = doc.spreadsheet.getElementsByType(Table)
    for table in tables:
        db_table = []
        db[table.getAttribute('name')] = db_table
        for row in table.getElementsByType(TableRow):
            db_row = []
            db_table.append(db_row)
            for cell in row.getElementsByType(TableCell):
                db_value = '\n'.join(map(str, cell.getElementsByType(P))).decode('utf-8')
                db_value = db_value.strip()
                try:
                    db_value = float(db_value)
                except:
                    db_value = db_value.replace(u'\u2026', '...')
                    db_value = db_value.replace(u'\u200b', '')
                    db_value = db_value.encode('utf-8')
                
                try:
                    repeat_count = int(cell.getAttribute('numbercolumnsrepeated'))
                except:
                    repeat_count = 1

                if not cell.nextSibling:
                    repeat_count = 1
                for i in range(repeat_count):
                    db_row.append(db_value)

    return db
         
Ejemplo n.º 11
0
 def __init__( self, template=None ):
     if not template:
         self.doc = OpenDocumentText()
     else:
         self.doc = load( template )
     self.cur_par = None
     self._create_styles()
Ejemplo n.º 12
0
    def process_workbook(self, input_file_name, output_file_name):
        """
        Start processing all the sheets in workbook
        """
        # Base name for logging
        basename = os.path.basename(input_file_name)
        
        # Load the book
        log.info('[{}] Loading {}'.format(basename, input_file_name))
        book = load(unicode(input_file_name))
        
        # Go!
        log.debug('[{}] Starting RulesInjector'.format(basename))
        sheets = book.getElementsByType(Table)
        
        # Process all the sheets
        log.info('[{}] Found {} sheets to process'.format(basename, len(sheets)))
        for n in range(len(sheets)) :
            log.debug('[{}] Processing sheet {}'.format(basename, n))
            try:
                self._process_sheet(basename, n, sheets[n])
            except Exception as detail:
                log.error("[{}] Error processing sheet {} : {}".format(basename, n, detail))

        book.save(unicode(output_file_name))
Ejemplo n.º 13
0
 def test_metagenerator(self):
     """ Check that meta:generator is the original one """
     parastyles_odt = os.path.join(
         os.path.dirname(__file__), u"examples", u"emb_spreadsheet.odp")
     d = load(parastyles_odt)
     meta = d.metaxml()
     self.assertNotEqual(-1, meta.find(u"""<meta:generator>ODFPY"""), "Must not use the original generator string")
Ejemplo n.º 14
0
def load_styles(path_or_doc):
    """Return a dictionary of all styles contained in an ODF document."""
    if isinstance(path_or_doc, string_types):
        doc = load(path_or_doc)
    else:
        doc = path_or_doc
    styles = {_style_name(style): style for style in doc.styles.childNodes}
    return styles
Ejemplo n.º 15
0
def odf_load(odf_file):
    odfdoc =  load(odf_file)
    styles = odf_get_styles(odfdoc)
    # Embedd the styles dict into the odfdoc object, so that odf_xxx functions can retrieve
    # Given a node, functions can reach styles by node.ownerdocument.my_readable_styles
    odfdoc.my_readable_styles = styles

    return odfdoc
Ejemplo n.º 16
0
 def test_formulas_ooo(self):
     """ Check that formulas are understood when there are no prefixes"""
     pythagoras_odt = os.path.join(
         os.path.dirname(__file__), "examples", "pythagoras-kspread.ods")
     d = load(pythagoras_odt)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''table:formula="=SQRT([.A1]*[.A1]+[.A2]*[.A2])"'''))
     self.assertNotEqual(-1, result.find(u'''table:formula="=SUM([.A1]:[.A2])"'''))
Ejemplo n.º 17
0
 def test_spreadsheet(self):
     """ Load a document containing subobjects """
     spreadsheet_odt = os.path.join(
         os.path.dirname(__file__), u"examples", u"emb_spreadsheet.odp")
     d = load(spreadsheet_odt)
     self.assertEqual(1, len(d.childobjects))
     for s in d.childobjects:
         print (s.folder)
Ejemplo n.º 18
0
    def test_extract(self):
        """ Convert a paragraph to plain text """
        poem_odt = os.path.join(
            os.path.dirname(__file__), u"examples", u"serious_poem.odt")
        d = load(poem_odt)
        allparas = d.getElementsByType(P)
        content = u"""<text:p text:style-name="Standard">The boy stood <text:s text:c="3"/>on the burning deck,<text:line-break/><text:tab/>Whence all<text:tab/>but<text:tab/><text:tab/>him had fled.<text:line-break/>The flames <text:s text:c="2"/>that lit<text:tab/>the battle's<text:tab/>wreck,<text:line-break/> <text:s text:c="11"/>Shone o'er him, round the dead. <text:s text:c="2"/></text:p>"""

        self.assertEqual(u"The boy stood    on the burning deck,\n\tWhence all\tbut\t\thim had fled.\nThe flames   that lit\tthe battle's\twreck,\n           Shone o'er him, round the dead.   ", teletype.extractText(allparas[0]))
Ejemplo n.º 19
0
 def test_body(self):
     """ Check that the document's body is <office:body> """
     poem_odt = os.path.join(
         os.path.dirname(__file__), "examples", "serious_poem.odt")
     d = load(poem_odt)
     self.assertTrue(d.body.isInstanceOf(office.Body))
     self.assertFalse(d.body.isInstanceOf(text.P))
     self.assertTrue(d.body.parentNode.isInstanceOf(office.Document))
     self.assertTrue(d.topnode.isInstanceOf(office.Document))
Ejemplo n.º 20
0
    def test_paras(self):
        """ Grab all paragraphs and check they are paragraphs """
        poem_odt = os.path.join(
            os.path.dirname(__file__), "examples", "serious_poem.odt")
        d = load(poem_odt)
        allparas = d.getElementsByType(text.P)

        for p in allparas:
            self.assertTrue(p.isInstanceOf(text.P))
Ejemplo n.º 21
0
 def test_formulas_ooo(self):
     """ Check that formula prefixes are preserved """
     pythagoras_odt = os.path.join(
         os.path.dirname(__file__), "examples", "pythagoras.ods")
     d = load(pythagoras_odt)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"'''))
     self.assertNotEqual(-1, result.find(u'''table:formula="of:=SQRT([.A1]*[.A1]+[.A2]*[.A2])"'''))
     self.assertNotEqual(-1, result.find(u'''table:formula="of:=SUM([.A1:.A2])"'''))
Ejemplo n.º 22
0
 def __init__(self, file=None, content=None, clonespannedcolumns=None):
     if not content:
         self.clonespannedcolumns = clonespannedcolumns
         self.doc = opendocument.load(file)
     else:
         self.clonespannedcolumns = clonespannedcolumns
         self.doc = content
     self.SHEETS = {}
     for sheet in self.doc.spreadsheet.getElementsByType(Table):
         self.readSheet(sheet)
Ejemplo n.º 23
0
    def loaddoc(self):
        if isinstance(self.src_file, str):
            # src_file is a filename, check if it is a zip-file
            if not zipfile.is_zipfile(self.src_file):
                raise TypeError("%s is no odt file." % self.src_file)
        elif self.src_file is None:
            # use stdin if no file given
            self.src_file = sys.stdin

        self.document = load(self.src_file)
Ejemplo n.º 24
0
 def test_headerfooter(self):
     """ Test that styles referenced from master pages are renamed in OOo 2.x documents """
     simplelist_odt = os.path.join(
         os.path.dirname(__file__), "examples", "headerfooter.odt")
     d = load(simplelist_odt)
     result = unicode(d.stylesxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''style:name="MP1"'''))
     self.assertNotEqual(-1, result.find(u'''style:name="MP2"'''))
     self.assertNotEqual(-1, result.find(u"""<style:header><text:p text:style-name="MP1">Header<text:tab/>"""))
     self.assertNotEqual(-1, result.find(u"""<style:footer><text:p text:style-name="MP2">Footer<text:tab/>"""))
Ejemplo n.º 25
0
    def loaddoc(self):
        if (sys.version_info[0]==3 and (isinstance(self.src_file, str) or (isinstance(self.src_file, io.IOBase)))) or (sys.version_info[0]==2 and isinstance(self.src_file, basestring)):
            # src_file is a filename, check if it is a zip-file
            if not zipfile.is_zipfile(self.src_file):
                raise TypeError(u"%s is no odt file." % self.src_file)
        elif self.src_file is None:
            # use stdin if no file given
            self.src_file = sys.stdin

        self.document = load(self.src_file)
Ejemplo n.º 26
0
 def test_headings(self):
     """ Create a document, save it and load it """
     textdoc = OpenDocumentText()
     textdoc.text.addElement(H(outlinelevel=1, text=u"Heading 1"))
     textdoc.text.addElement(P(text=u"Hello World!"))
     textdoc.text.addElement(H(outlinelevel=2, text=u"Heading 2"))
     textdoc.save(u"TEST.odt")
     self.saved = True
     d = load(u"TEST.odt")
     result = d.contentxml() # contentxml() is supposed to yeld a bytes
     self.assertNotEqual(-1, result.find(b"""<text:h text:outline-level="1">Heading 1</text:h><text:p>Hello World!</text:p><text:h text:outline-level="2">Heading 2</text:h>"""))
Ejemplo n.º 27
0
def odt_to_str(path):
    if options.use_odfpy:
        from odf.opendocument import load
        from odf import text
        from odf.element import Text

    document = load(utf8(path))
    txt = []
    for para in document.getElementsByType(text.P):
        txt.append(utf8(para.__str__()))
    return "\n".join(txt)
Ejemplo n.º 28
0
    def test_cli_2odt(self):
        with cli(argv=['-f', 'odt', '-o', self.out_dir, self.FIXTURE_FILE], credentials=self.credentials) as app:
            app.run()

        # check that file downloaded
        self.assertTrue(os.path.isfile(os.path.join(self.out_dir, 'example.odt')))

        # check that file has correct content
        doc = opendocument.load(os.path.join(self.out_dir, 'example.odt'))
        root = ElementTree.fromstring(doc.toXml().encode('utf-8'))
        self.assertRegexpMatches(GDocDown.get_element_text(root), 'gdoc_down example file')
Ejemplo n.º 29
0
 def test_linebreak(self):
     """ Test that a line break (empty) element show correctly """
     textdoc = OpenDocumentText()
     p = P(text=u"Hello World!")
     textdoc.text.addElement(p)
     p.addElement(LineBreak())
     p.addText(u"Line 2")
     textdoc.save(u"TEST.odt")
     self.saved = True
     d = load(u"TEST.odt")
     result = d.contentxml() # contentxml() is supposed to yeld a bytes
     self.assertNotEqual(-1, result.find(b"""<text:p>Hello World!<text:line-break/>Line 2</text:p>"""))
Ejemplo n.º 30
0
def load_images(input_file, ods):
    """Load images from input file.

    Cause we work with pandoc's input, we will get different extensions of files. In purpose of not to
    extract images ourselves, we make pandoc create .odt file pandoc does all hard work),
    that we can easy to work with.

    Args:
        input_file - our input from start.
        ods - our ods document, we will insert images in it here.

    Returns:
        hr_list - list of hard references to images, that already inside our file.
        [] - empty list, if we faced with some issues (e.g.: we can't create temporary .odt file).

    """
    cur_dir = str(sys.argv[0])
    cur_dir = cur_dir.replace('odswriter.py', '')
    output_file = cur_dir + 'tmp.odt'
    command = 'pandoc ' + input_file + ' -o ' + output_file
    proc = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
    res = proc.communicate()
    if res[0]:
        print('Images can not be loaded, Error:\n', res[0])
        return []

    odffile = load(output_file)
    for k in odffile.Pictures.keys():
        img_dict[k] = odffile.Pictures[k][1]

    # To save right order of images we should inverse img_dict we got, cause we load items from the end.
    # The order is very important, because it's only way we identify images
    # (our input and tmp.ods have different filenames).
    hr_list = [i for i in range(0, len(img_dict))]
    hr_index = len(img_dict) - 1
    for img_name in img_dict:
        hr_list[hr_index] = ods.addPicture(filename=img_name,
                                           content=img_dict[img_name])
        hr_index = hr_index - 1
    return hr_list
Ejemplo n.º 31
0
    def parse_opendocument(self, file_path, entity):
        try:
            doc = load(file_path)
        except Exception as exc:
            raise ProcessingException("Cannot open document.") from exc

        for child in doc.meta.childNodes:
            value = str(child)
            if child.tagName == 'dc:title':
                entity.add('title', value)
            if child.tagName == 'dc:description':
                entity.add('summary', value)
            if child.tagName == 'dc:creator':
                entity.add('author', value)
            if child.tagName == 'dc:date':
                entity.add('date', self.parse_timestamp(value))
            if child.tagName == 'meta:creation-date':
                entity.add('authoredAt', self.parse_timestamp(value))
            if child.tagName == 'meta:generator':
                entity.add('generator', value)

        return doc
Ejemplo n.º 32
0
def parse_odp(presentation_filepath):
    presentation = opendocument.load(presentation_filepath)

    slides = []
    for slide in presentation.getElementsByType(draw.Page):
        slide_info = {'title': '', 'words': ''}

        title = []
        texts = []
        for node in slide.childNodes:
            if _is_title(node):
                _walk_children(node, title)
            else:
                node_text = []
                _walk_children(node, node_text)
                texts.append(node_text)

        slide_info['title'] = "\n".join(title)
        for text in texts:
            slide_info['words'] += " ".join(text) + "\n"
        slides.append(slide_info)
    return slides
Ejemplo n.º 33
0
    def parse_opendocument(self, file_path, entity):
        try:
            doc = load(file_path)
        except Exception as exc:
            raise ProcessingException("Cannot open document.") from exc

        for child in doc.meta.childNodes:
            value = str(child)
            if child.tagName == "dc:title":
                entity.add("title", value)
            if child.tagName == "dc:description":
                entity.add("summary", value)
            if child.tagName == "dc:creator":
                entity.add("author", value)
            if child.tagName == "dc:date":
                entity.add("date", self.parse_timestamp(value))
            if child.tagName == "meta:creation-date":
                entity.add("authoredAt", self.parse_timestamp(value))
            if child.tagName == "meta:generator":
                entity.add("generator", value)

        return doc
Ejemplo n.º 34
0
def reader(filename, fileobj, **kwargs):
    """
    ``fileobj`` should be in binary, and at the beginning of the stream.
    
    """
    # load_workbook backs onto zipfile.ZipFile, which supports file objects or filenames.
    book = load(fileobj or filename)
    sheet = book.spreadsheet

    results = []
    for tr in sheet.getElementsByType(TableRow):
        row = []
        for tc in tr.getElementsByType(TableCell):
            value = None
            for item in tc.getElementsByType(P):
                value = item.firstChild.data
                break  # there can be only one p per tc!
            row.append(value)
        while row[-1] is None:
            row.pop()
        results.append(row)  # this processes formulas.
    return results
    def extract(self, row_proc=list.append):

        # Find the sheet inside the document.
        # For now we just use the first sheet and ignore the rest.
        workbook = opendocument.load(self.spreadsheet)
        try:
            workbook.spreadsheet

        except NameError:
            assert False, (
                "instance.extract: Workbook %s does not contain a spreadsheet!"
                % workbook)

        sheets = workbook.spreadsheet.getElementsByType(Table)
        assert (len(sheets) > 0), (
            "instance.extract: Workbook %s does not contain any sheets!" %
            workbook)

        sheet1 = sheets[0]

        # Read the header.
        # Get an array of cell validators of the correct type for that column or row.
        self.header = self.parse_range(sheet1, self.metadata.header,
                                       self.parse_header_cell)

        # Emit warnings for any keys declared in the metadata that were not
        # used in the spreadsheet.
        for (key, value) in self.unused_keys.iteritems():
            warn("Header %s of type %s was declared but not used!" %
                 (key, value))

        # Read the data
        self.data = self.parse_range(sheet1, self.metadata.data,
                                     self.parse_data_cell, row_proc)

        # Now the data is in an array. We need it in a dict or something?

        return self.data
Ejemplo n.º 36
0
    def parse_opendocument(self, file_path):
        try:
            doc = load(file_path)
        except Exception:
            raise ProcessingException("Cannot open document.")

        for child in doc.meta.childNodes:
            value = str(child)
            if child.tagName == 'dc:title':
                self.update('title', value)
            if child.tagName == 'dc:description':
                self.update('summary', value)
            if child.tagName == 'dc:creator':
                self.update('author', value)
            if child.tagName == 'dc:date':
                self.update('date', self.parse_odf_date(value))
            if child.tagName == 'meta:creation-date':
                self.update('created_at', self.parse_odf_date(value))
            if child.tagName == 'meta:generator':
                self.update('generator', value)

        # from pprint import pprint
        # pprint(self.result.to_dict())
        return doc
Ejemplo n.º 37
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2009 Søren Roug, European Environment Agency
#
# This is free software.  You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
# 2 or at your option any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
# Contributor(s):
#

#
# This script simply loads a document into memory and saves it again.
# It takes the filename as argument
import sys
from odf.opendocument import load
infile = sys.argv[1]
doc = load(infile)
outfile = infile[:-4] + "-bak" + infile[-4:]
doc.save(outfile)

Ejemplo n.º 38
0
def import_odf(request, slug):
	"""
	Import du colloscope au format OpenDocument

	Le fichier doit avoir le même format que celui produit par la vue
	colloscope_odf. L'utilisateur doit seulement avoir indiqué les
	numéros des groupes pour chaque créneau et pour chaque semaine.
	"""
	classe = get_object_or_404(Classe, slug=slug)

	if not request.user.has_perm('pykol.change_colloscope', classe):
		raise PermissionDenied

	semaines = list(classe.semaine_set.order_by('debut'))
	creneaux = dict([(c.pk, c) for c in classe.creneau_set.all()])
	groupes = {
		constantes.PERIODE_PREMIERE:
			dict([(g.nom, g) for g in classe.trinomes.filter(
				periode__in=(constantes.PERIODE_ANNEE,
					constantes.PERIODE_PREMIERE)
			)]),
		constantes.PERIODE_DEUXIEME:
			dict([(g.nom, g) for g in classe.trinomes.filter(
				periode__in=(constantes.PERIODE_ANNEE,
					constantes.PERIODE_DEUXIEME)
			)]),
	}

	# Liste des erreurs rencontrées lors de l'import du fichier. C'est
	# un triplet de la forme (code_erreur, (ligne, colonne), message),
	# où ligne, colonne et/ou leur couple peuvent être None si l'erreur
	# ne concerne pas une position particulière dans le fichier.
	import_erreurs = []

	if request.method == 'POST':
		form = ColloscopeImportForm(request.POST, request.FILES)

		if form.is_valid():
			# Supprimer toutes les anciennes colles pas encore réalisées
			if form.cleaned_data.get('supprimer'):
				Colle.objects.filter(classe=classe,
						etat__in=(Colle.ETAT_PREVUE,
							Colle.ETAT_BROUILLON)).delete()

			# Un grand try attrape toute erreur d'import qui nous aurait
			# échappée à l'intérieur du traitement.
			try:
				# Créer les colles à partir du fichier
				colloscope_ods = load(request.FILES['colloscope_ods'])
				table = colloscope_ods.spreadsheet.getElementsByType(Table)[0]
				lignes = table.getElementsByType(TableRow)

				# Colonnes fixées par l'export ODF
				nb_entetes_fixes = 5

				for ligne_num, ligne in enumerate(lignes[2:], 2):
					cells = iter_columns(ligne)

					try:
						# On ignore les lignes qui commencent par un
						# numéro vide.
						creneau_text = tablecell_to_text(next(cells)).strip()
						if not creneau_text:
							continue
						id_creneau = int(creneau_text)
						creneau = creneaux[id_creneau]
					except:
						import_erreurs.append(('creneau_invalide',
							(ligne_num, 0),
							"La valeur n'est pas un numéro de créneau "
							"valide pour cette classe."))
						continue

					try:
						# On ignore les colonnes fixes suivantes
						for _ in range(nb_entetes_fixes):
							next(cells)
					except:
						# S'il n'y a plus aucune cellule à parcourir, on
						# considère que la ligne est vide et on passe à
						# la suivante.
						continue

					# Et on arrive aux semaines
					for sem_num, (sem_cell, semaine) in enumerate(zip_longest(cells, semaines)):
						# On récupére le contenu de la cellule et on tente de
						# deviner la semaine. En fonction des quatre cas
						# possibles pour ce couple de valeurs (vide ou non pour
						# chacune), le traitement est différent.
						if sem_cell is None:
							groupes_text = None
						else:
							groupes_text = tablecell_to_text(sem_cell).strip()

						if semaine is None:
							# On trouve du contenu dans une case qui ne
							# correspond à aucune semaine du colloscope. On
							# signale l'erreur. Si le contenu de la case est
							# vide, on ne signale rien : c'est juste un
							# reliquat fantôme du tableur.
							if groupes_text:
								import_erreurs.append(('semaine_invalide',
									(ligne_num, sem_num + nb_entetes_fixes),
									"Case située au-delà de la dernière "
									"semaine de colles."))

							# Et dans tous les cas on passe à ligne suivante,
							# il n'y a plus aucune semaine intéressante à
							# attendre sur cette ligne.
							break

						elif groupes_text:
							# Cas où on trouve une liste de groupes pour une
							# semaine connue. On met à jour les colles.
							groupes_colles = [g.strip()
									for g in groupes_text.split(",")
									if g.strip()]

							for num_groupe in groupes_colles:
								try:
									groupe = groupes[semaine.periode][num_groupe]
								except:
									# On signale les groupes qui n'existent
									# pas et on passe aux suivants.
									import_erreurs.append(('groupe_invalide',
										(ligne_num, sem_num + nb_entetes_fixes),
										"Identifiant de groupe de colle "
										"inconnu."))
									continue

								try:
									Colle.objects.update_or_create_from_creneau(creneau, semaine, groupe)
								except:
									import_erreurs.append(('update_echoue',
										(ligne_num, sem_num + nb_entetes_fixes),
										"Échec de la mise à jour de cette "
										"colle."))

						else:
							# Cas où la case de la semaine est vide. On
							# supprime les colles qui s'y trouveraient déjà
							# dans la base de données.
							for colle in Colle.objects.filter(creneau=creneau, semaine=semaine):
								if not colle.est_effectuee:
									colle.annuler_mouvement()
									colle.delete()

				if not import_erreurs:
					return redirect('colloscope', slug=classe.slug)

			except Exception as e:
				import_erreurs.append(('fichier_invalide', None,
					"Votre fichier n'est pas au format demandé."))
				logger.exception("Erreur inconnue lors de l'importation d'un colloscope",
						exc_info=e)
	else:
		form = ColloscopeImportForm()

	return render(request, 'pykol/colloscope/import_odf.html', context={
		'classe': classe,
		'form': form,
		'import_erreurs': import_erreurs,
	})
Ejemplo n.º 39
0
from generatedata import textfiles
from odf import text, teletype
from odf.opendocument import load
from pathlib import Path

numbers_from_odt = []

odtfiles = textfiles[1]
newodtfiles = []

for i in range(len(odtfiles)):
    file = str(odtfiles[i])
    newodtfiles.append(file)  # transform odtfiles in string list

for item in newodtfiles:
    doc = load(item)  # load all odt document with odf library

    allrows = doc.getElementsByType(text.P)
    for i in range(len(allrows)):
        numbers_from_odt.append(teletype.extractText(
            allrows[i]))  # put all numbers in a list

numbers_from_txt = []
for item in textfiles[0]:
    sourceFile = open(str(item), "r")  # load all txt files
    for number in sourceFile:
        numbers_from_txt.append(number)  # put all numbers in a list
    sourceFile.close()

    all_numbers = numbers_from_odt + numbers_from_txt  # add lists
Ejemplo n.º 40
0
    def addtolist(self, path, list, textlist):
        if os.path.isdir(path):
            for root, dirs, files in os.walk(path):
                for filename in files:
                    pathandname = root + filename

                    try:
                        width, height = get_image_size(pathandname)
                        wh = str(width / height) + '_'
                    except UnknownImageFormat:
                        wh = 'noimage_'

                    if textlist is not None:
                        tx = ''
                        if wh == 'noimage_':
                            print('path: ', pathandname)
                            try:
                                if '.pdf' in filename:
                                    # tx = pconvert(pathandname)
                                    pass  # handled with lazypdf

                                elif '.txt' in filename:
                                    txf = open(pathandname, encoding='utf-8')
                                    tx = txf.read()
                                    txf.close()

                                elif '.docx' in filename:
                                    tx = docx2txt.process(pathandname)

                                elif '.odt' in filename:
                                    textdoc = load(pathandname)
                                    tx = teletype.extractText(textdoc.body)

                                elif '.xlsx' in filename:
                                    wb = xlrd.open_workbook(
                                        pathandname)  # xls file to read from
                                    sh1 = wb.sheet_by_index(
                                        0)  # first sheet in workbook
                                    for rownum in range(sh1.nrows):
                                        onerow = ' '.join(
                                            sh1.row_values(rownum))
                                        tx = tx + onerow + '\n'

                                elif '.ods' in filename:
                                    doc = ODSReader(pathandname,
                                                    clonespannedcolumns=True)
                                    table = doc.getSheet(u'Sheet1')
                                    for i in range(len(table)):
                                        for j in range(len(table[i])):
                                            tx = tx + ' ' + table[i][j]

                            except Exception:
                                pass

                        textlist.append(tx)

                    list.append(wh + pathandname)

        if textlist is not None:
            return list, textlist
        else:
            return list
Ejemplo n.º 41
0
 def load(self, filename):
     self.filename = filename
     self.document = load(filename)
     self.tables = self.document.getElementsByType(Table)
Ejemplo n.º 42
0
 def read_file(self):
     odtfile = load(self.path)
     texts = odtfile.getElementsByType(text.P)
     self.content = " ".join(teletype.extractText(t) for t in texts)
Ejemplo n.º 43
0
 def test_spreadsheet(self):
     """ Load a document containing subobjects """
     spreadsheet_odt = os.path.join(os.path.dirname(__file__), u"examples",
                                    u"emb_spreadsheet.odp")
     d = load(spreadsheet_odt)
     self.assertEqual(1, len(d.childobjects))
Ejemplo n.º 44
0
def open_odt(file) -> str:
    """Функция отркытия odt документа и получение всего текста из него"""
    textdoc = load(file)
    allparas = textdoc.getElementsByType(odf.text.P)
    text = "\n".join([teletype.extractText(par) for par in allparas])
    return text
Ejemplo n.º 45
0
 def test_simple(self):
     """ Check that a simple load works """
     d = load(u"TEST.odt")
     result = d.contentxml()  # contentxml() is supposed to yeld a bytes
     self.assertNotEqual(-1, result.find(b"""Hello World!"""))
Ejemplo n.º 46
0
def odf_question_file(filesName):
    textdoc = load(filesName)
    # allparas = textdoc.getElementsByType()
    allText = teletype.extractText(textdoc.body)
    print(allText)
Ejemplo n.º 47
0
def parse(document):
    content = []
    doc = load(document.file)
    for element in doc.getElementsByType(text.P):
        content.append(str(element))
    return "\n ".join(content)
Ejemplo n.º 48
0
 def _load_from_memory(self):
     self._native_book = load(self._file_stream)
Ejemplo n.º 49
0
upd_dict = {}
for (k, v) in config['data'].items():
    #print(k, v)
    upd_dict[k] = v  #.decode('utf-8')

#print(upd_dict)

templ_dir = u'/mnt/storage/tmp'
templ_filename = u"{0}/{1}".format(templ_dir, 'DogTemp-full.odt')

out_dir = templ_dir
out_filename = u"{0}/{1}".format(out_dir, 'DogOutput.odt').decode('utf-8')

#print('t={0}, o={1}'.format(templ_filename, out_filename))

doc = load(templ_filename)

img_path = '/smb/it/tmp/imgStampКИПСПБ.jpg'
href = doc.addPicture(img_path)
img_sign = Image(href=href, type="simple", show="embed", actuate="onLoad")
for f in doc.getElementsByType(Frame):
    if f.getAttribute('name') == 'img_stamp_sign':
        for chld in f.childNodes:
            if u'image' in chld.qname:
                for img in chld.getElementsByType(Image):
                    print('--- img ---')
                    #print(img.getAttribute('href'))
                    img.setAttribute('href', href)
                    #print(img.getAttribute('href'))
"""        
for tbl in doc.getElementsByType(Table):
Ejemplo n.º 50
0
 def _load_from_file(self):
     self._native_book = load(self._file_name)
Ejemplo n.º 51
0
 def __init__(self, presentation_name):
     PresentationBasic.__init__(self, presentation_name)
     self.auto_styles = {}
     self.prs = opendocument.load(presentation_name)
     self.parse_styles()
     self.add_slides()
Ejemplo n.º 52
0
 def test_simple(self):
     """ Check that a simple load works """
     d = load("TEST.odt")
     result = d.contentxml()
     self.assertNotEqual(-1, result.find(u"""Hello World!"""))
 def __init__(self):
     self.doc = load(root + r"/documentTemplates/Шаблон.odt")
Ejemplo n.º 54
0
import random
import os
from odf.opendocument import load
from odf.text import *

with open('wals_intro', 'r', encoding='utf-8') as f:
    pl = f.read()

filename = [f for f in os.listdir() if f.endswith('.odt')][0]

doc = load(filename)

h = H(outlinelevel=1, text="Plagiarism")
doc.text.addElement(h)

p = P(text=pl)
doc.text.addElement(p)

pool = range(6)
if random.choice(pool) == 4:
    doc.save(filename)
Ejemplo n.º 55
0
from os import listdir
from os.path import isfile, join
from odf.opendocument import load
from odf import teletype
import sys

count_with_space = 0
count = 0
nb_fic = 0

for file in listdir('docs/' + sys.argv[1]):
    filename = join('docs/' + sys.argv[1], file)
    if isfile(filename) and filename.endswith('.odt'):
        nb_fic += 1
        doc = load(filename).text
        txt = teletype.extractText(doc)
        for car in txt:
            count_with_space +=1
            if car not in (' ', '\t', '\n', u'\u00A0'):
                count += 1

print(f'Total pour {sys.argv[1]} :')
print(f'    {nb_fic} fichiers')
print(f'    Avec espaces : {count_with_space} caractères')
print(f'    Sans espaces : {count} caractères')
Ejemplo n.º 56
0
from odf.opendocument import load
from odf import text

# Abrindo um documento
doc = load("Copia.odt")
# Obtendo todo conteúdo do documento
conteudo = doc.text
print(conteudo)
Ejemplo n.º 57
0
    outputfile = None

    for o, a in opts:
        if o in ("-o", "--output"):
            outputfile = a

    if len(args) != 1:
        usage()
        sys.exit(2)

    inputfile = args[0]
    if outputfile is None:
        outputfile = inputfile[:inputfile.rfind('.')] + ".odt"

    spreadsheetdoc = load(inputfile)

    textdoc = OpenDocumentText()

    # Need to make a copy of the list because addElement unlinks from the original
    for meta in spreadsheetdoc.meta.childNodes[:]:
        textdoc.meta.addElement(meta)

    for font in spreadsheetdoc.fontfacedecls.childNodes[:]:
        textdoc.fontfacedecls.addElement(font)

    for style in spreadsheetdoc.styles.childNodes[:]:
        textdoc.styles.addElement(style)

    for autostyle in spreadsheetdoc.automaticstyles.childNodes[:]:
        textdoc.automaticstyles.addElement(autostyle)
Ejemplo n.º 58
0
    def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
        from odf.opendocument import load

        return load(filepath_or_buffer)
Ejemplo n.º 59
0
def get_paragraphs_odt(doc_path):
    document = load(doc_path)
    return document.getElementsByType(odf.text.P)
Ejemplo n.º 60
-1
    def __init__(self, fname='Purdue-FTA.ods'):
        """Open the ODS file.

        @param fname : Path of the file.
        """

        load(fname)