예제 #1
0
    def mangleData(self, data, index):
        self.sha1_offset = 208
        self.md5_offset = 256
        self.header_offset = 360
        self.filedata_offset = 3170

        data = MangleFile.mangleData(self, data, index)

        if USE_HACHOIR:
            #data.tofile(open('/tmp/oops', 'wb'))
            hachoir_config.quiet = True
            data_str = data.tostring()
            parser = guessParser(StringInputStream(data_str))
            if parser:
                self.useHachoirParser(parser)

        summary_data = data[self.header_offset:].tostring()
        checksum = md5(summary_data).digest()
        data[self.md5_offset:self.md5_offset + 16] = array('B', checksum)

        summary_data = data[self.header_offset:self.filedata_offset].tostring()
        checksum = sha(summary_data).hexdigest()
        data[self.sha1_offset:self.sha1_offset + 40] = array('B', checksum)

        return data
예제 #2
0
 def __init__(self, project, *args, **kw):
     MangleFile.__init__(self, project, args[0], int(kw['nb_file']))
     self.hard_max_op = 10000
     self.hard_min_op = 0
     self.aggressivity = None
     self.fixed_size_factor = 1.0
     if kw.has_key('ext'):
         if kw['ext'] in ('.xml', '.svg', '.rdf'):
             if kw.has_key('nofile') and kw['nofile']:
                 self.xml = MangleXML(nofile=True, xmltype=MangleXML.SVG11)
             else:
                 self.xml = MangleXML(xmltype=MangleXML.SVG11)
         elif kw['ext'] == ".html":
             if kw.has_key('nofile'):
                 self.xml = MangleXML(nofile=kw['nofile'], xmltype=MangleXML.XHTML1)
             else:
                 self.xml = MangleXML(xmltype=MangleXML.XHTML1)
     else:
         self.xml = None
예제 #3
0
 def __init__(self, project, *args, **kw):
     MangleFile.__init__(self, project, args[0], int(kw['nb_file']))
     self.hard_max_op = 10000
     self.hard_min_op = 0
     self.aggressivity = None
     self.fixed_size_factor = 1.0
     if kw.has_key('ext'):
         if kw['ext'] in ('.xml', '.svg', '.rdf'):
             if kw.has_key('nofile') and kw['nofile']:
                 self.xml = MangleXML(nofile=True, xmltype=MangleXML.SVG11)
             else:
                 self.xml = MangleXML(xmltype=MangleXML.SVG11)
         elif kw['ext'] == ".html":
             if kw.has_key('nofile'):
                 self.xml = MangleXML(nofile=kw['nofile'],
                                      xmltype=MangleXML.XHTML1)
             else:
                 self.xml = MangleXML(xmltype=MangleXML.XHTML1)
     else:
         self.xml = None
예제 #4
0
def setupProject(project):
    USE_STDOUT = True

    time = ProcessTimeWatch(
        project,
        too_slow=3.0,
        too_slow_score=0.10,
        too_fast=0.100,
        too_fast_score=-0.80,
    )

    orig_filename = project.application().getInputFilename("PDF document")
    if AUTO_MANGLE:
        mangle = AutoMangle(project, orig_filename)
        mangle.hard_max_op = 1000
    else:
        mangle = MangleFile(project, orig_filename)
        mangle.config.max_op = 1000

    options = {'timeout': 5.0}
    if not USE_STDOUT:
        options['stdout'] = 'null'
    process = PopplerProcess(project, ['pdftotext'], **options)
    WatchProcess(process, exitcode_score=-0.10)

    if USE_STDOUT:
        stdout = WatchStdout(process)

        def cleanupLine(line):
            match = re.match(r"Error(?: \([0-9]+\))?: (.*)", line)
            if match:
                line = match.group(1)
            return line

        stdout.cleanup_func = cleanupLine
        del stdout.words['unknown']
        #        stdout.show_not_matching = True
        #        stdout.ignoreRegex(r"Unknown operator 'allocate'$")
        #        stdout.ignoreRegex(r" operator is wrong type \(error\)$")
        #        stdout.ignoreRegex(r'^No current point in lineto$')
        #        stdout.ignoreRegex(r'^No current point in lineto')
        #        stdout.ignoreRegex(r'^Unknown operator ')
        #        stdout.ignoreRegex(r"^Couldn't open 'nameToUnicode' file ")
        #        stdout.ignoreRegex(r"^Illegal character ")
        #        stdout.ignoreRegex(r"^No font in show$")
        #        stdout.ignoreRegex(r"^Element of show/space array must be number or string$")
        #        stdout.ignoreRegex(r"^No current point in curveto$")
        #        stdout.ignoreRegex(r"^Badly formatted number$")
        #        stdout.ignoreRegex(r"^Dictionary key must be a name object$")
        #        stdout.ignoreRegex(r"^End of file inside array$")
        #        stdout.ignoreRegex(r"^Too few \([0-9]+\) args to .* operator$")
        #        stdout.ignoreRegex(r"Too many args in content stream")
        stdout.max_nb_line = (100, 0.20)
예제 #5
0
 def mangleData(self, data, file_index):
     self.setupConf(data)
     if self.xml:
         return array('B', self.xml.mangleData(data.tostring()))
     return MangleFile.mangleData(self, data, file_index)
예제 #6
0
 def mangleData(self, data, file_index):
     self.setupConf(data)
     if self.xml:
         return array('B', self.xml.mangleData(data.tostring()))
     return MangleFile.mangleData(self, data, file_index)
예제 #7
0
 def __init__(self, project, *args, **kw):
     MangleFile.__init__(self, project, *args, **kw)
     self.hard_max_op = 10000
     self.hard_min_op = 0
     self.aggressivity = None
     self.fixed_size_factor = 1.0
예제 #8
0
 def mangleData(self, data, file_index):
     self.setupConf(data)
     return MangleFile.mangleData(self, data, file_index)