Python guessParser Examples, hachoir_parser.guess.guessParser Python Examples

Example #1

0

Show file

File: hachoir.py Project: Woerd88/hachoir

def get_parser(data, streamdata, sessid):
    """Guess or retrieve the parser based on the stream.

    Streams are retrieved from the "data" persistant storage variable, from
    the "streams" key.

    The parser for the main stream ((None, None, filename) in data['streams'])
    is cached for efficiency reasons in data['parser_cache'].

    """
    # must remake parser EVERY TIME because parsers can't be pickled
    # (they contain generators which are currently not pickleable)
    # best I can do here is cache the parser, so at least we're not
    # taking time to re-guess the parser...
    if streamdata[0] is None: # original file
        stream = FileInputStream(data['filename'],
                            real_filename = unicode(tmp_dir+sessid+'.file'))
        if 'parser_cache' in data:
            parser = data['parser_cache'](stream)
        else:
            parser = guessParser(stream)
            if not parser:
                print_parse_error()
                return (None, None)
            data['parser_cache'] = parser.__class__
            save_data(data, sessid)
    elif isinstance(streamdata[0], tuple):
        prevstream, prevparser = get_parser(data, streamdata[0], sessid)
        stream = prevparser[streamdata[1]].getSubIStream()
        parser = guessParser(stream)
    else:
        stream = StringInputStream(streamdata[1])
        stream.tags = streamdata[0]
        parser = guessParser(stream)
    return stream, parser

Example #2

0

Show file

File: app.py Project: foreni-packages/hachoir-wx

 def on_field_parse_substream(self, dispatcher, field):
     stream = field.getSubIStream()
     parser = guessParser(stream)
     if not parser:
         return
     subfile = FileFromInputStream(stream)
     subfile.name = field.path
     new_window(self, subfile, parser, subfile.name)

Example #3

0

Show file

File: app.py Project: pombreda/Law-Enforcement-Scripts

 def on_field_parse_substream(self, dispatcher, field):
     stream = field.getSubIStream()
     parser = guessParser(stream)
     if not parser:
         return
     subfile = FileFromInputStream(stream)
     subfile.name = field.path
     new_window(self, subfile, parser, subfile.name)

Example #4

0

Show file

def get_parser(data, streamdata, sessid):
    """Guess or retrieve the parser based on the stream.

    Streams are retrieved from the "data" persistant storage variable, from
    the "streams" key.

    The parser for the main stream ((None, None, filename) in data['streams'])
    is cached for efficiency reasons in data['parser_cache'].

    """
    # must remake parser EVERY TIME because parsers can't be pickled
    # (they contain generators which are currently not pickleable)
    # best I can do here is cache the parser, so at least we're not
    # taking time to re-guess the parser...
    if streamdata[0] is None:  # original file
        stream = FileInputStream(data['filename'],
                                 real_filename=unicode(tmp_dir + sessid +
                                                       '.file'))
        if 'parser_cache' in data:
            parser = data['parser_cache'](stream)
        else:
            parser = guessParser(stream)
            if not parser:
                print_parse_error()
                return (None, None)
            data['parser_cache'] = parser.__class__
            save_data(data, sessid)
    elif isinstance(streamdata[0], tuple):
        prevstream, prevparser = get_parser(data, streamdata[0], sessid)
        stream = prevparser[streamdata[1]].getSubIStream()
        parser = guessParser(stream)
    else:
        stream = StringInputStream(streamdata[1])
        stream.tags = streamdata[0]
        parser = guessParser(stream)
    return stream, parser

Example #5

0

Show file

File: media.py Project: iskracat/Hitotsubashi-University

def parse_raw(raw):
    stream = InputIOStream(raw)
    parser = guessParser(stream)
    return extract_metadata(parser)

Example #6

0

Show file

def handle_form():
    """Process submitted data.

    See comments for details.

    """
    prune_old()
    form = cgi.FieldStorage()
    if 'file' in form and form['file'].file:
        # compute session id
        sessid = get_sessid()
        if not sessid:
            rand = str(time.time()) + form['file'].filename + str(
                random.random())
            sessid = hashlib.md5(rand).hexdigest()
        # write uploaded file
        f = open(tmp_dir + sessid + '.file', 'wb')
        if form['file'].done == -1:
            raise ValueError("File upload canceled?")
        while f.tell() < 2**22:  # 4MB limit
            chunk = form['file'].file.read(32768)  # 32KB chunks
            if not chunk:
                break
            f.write(chunk)
        if f.tell() == 0:
            f.close()
            print_form('Nothing uploaded.')
            return
        f.close()
        # write session variables
        try:
            fn = unicode(form['file'].filename, 'utf-8')
        except UnicodeDecodeError:
            fn = unicode(form['file'].filename, 'iso-8859-1')
        # stream "None" represents the original stream
        save_data({'filename': fn, 'streams': [(None, None, fn)]}, sessid)
        # send session id and reset variables
        c = SimpleCookie()
        c['sess'] = sessid
        c['hpath'] = '/'  # clear path var.
        c['stream'] = '0'  # clear stream var
        print c  # send cookie to client (headers)
        print_page()  # print AJAX frame page
    elif get_sessid():  # or perhaps you already have a file to parse?
        if not 'hpath' in form:
            print_page()
            return
        # redirect stderr, so we can catch parser errors
        sys.stderr = StringIO()
        # load variables
        hpath = cgi.escape(form.getfirst('hpath', '/'))
        stream_id = int(form.getfirst('stream', '0'))
        path = hpath.split(':')[stream_id]
        sessid = get_sessid()
        try:
            data = cPickle.load(file(tmp_dir + sessid + '.sess', 'rb'))
        except IOError:
            print_error('Your file was deleted due to inactivity. '
                        'Please upload a new one.')
            return
        stream, parser = get_parser(data, data['streams'][stream_id], sessid)
        if parser is None:
            return  # sorry, couldn't parse file!
        if 'save' in form:
            # "Download Raw"
            f = FileFromInputStream(stream)
            fld = parser[path]
            f.seek(fld.absolute_address / 8)
            size = alignValue(fld.size, 8) / 8
            sys.stdout.write('Content-Type: application/octet-stream\r\n')
            sys.stdout.write('Content-Length: %i\r\n' % size)
            sys.stdout.write('Content-Disposition: attachment; '
                             'filename=%s\r\n\r\n' %
                             path.strip('/').split('/')[-1])
            sys.stdout.write(f.read(size))
            return
        elif 'savesub' in form:
            # "Download Substream"
            stream = parser[path.rstrip('/')].getSubIStream()
            filename = path.strip('/').split('/')[-1]
            tags = getattr(stream, 'tags', [])
            for tag in tags:
                if tag[0] == 'filename':
                    filename = tag[1]
            sys.stdout.write('Content-Type: application/octet-stream\r\n')
            sys.stdout.write('Content-Disposition: attachment; '
                             'filename=%s\r\n\r\n' % filename)
            sys.stdout.write(FileFromInputStream(stream).read())
            return
        elif 'addStream' in form:
            # "Parse Substream"
            spath = cgi.escape(form['addStream'].value)
            new_stream = parser[spath.rstrip('/')].getSubIStream()
            streamdata = FileFromInputStream(new_stream).read()
            new_parser = guessParser(new_stream)
            if new_parser:
                stream = new_stream
                parser = new_parser
                tags = getattr(stream, 'tags', [])
                streamname = data['streams'][stream_id][2] + ':'
                data['streams'].append((tags, streamdata, streamname + spath))
                try:
                    if force_substream_ref:
                        raise Exception("Use references for all substreams")
                    save_data(data, sessid)
                except Exception:
                    # many things could go wrong with pickling
                    data['streams'][-1] = (data['streams'][stream_id], spath,
                                           streamname + spath)
                    save_data(data, sessid)
                path = '/'
                hpath += ':/'
                stream_id = len(data['streams']) - 1
            else:
                sys.stderr.write("Cannot parse substream %s: "
                                 "No suitable parser\n" % spath)
        elif 'delStream' in form:
            # "Delete Stream"
            n = int(form['delStream'].value)
            paths = hpath.split(':')
            del paths[n]
            del data['streams'][n]
            if n >= len(data['streams']):
                stream_id = 0
            else:
                stream_id = n
            path = paths[stream_id]
            hpath = ':'.join(paths)
            save_data(data, sessid)
            stream, parser = get_parser(data, data['streams'][stream_id],
                                        sessid)
        # update client's variables
        c = SimpleCookie()
        c['hpath'] = hpath
        c['stream'] = str(stream_id)
        print c  # send cookie to client
        # send headers
        print 'Content-Type: text/html'
        print
        # breadcrumb trail path up top
        print_path(path, data, stream_id)
        # fields
        print '''<table id="maintable" border="1">
<tr class="header">
    <th class="headertext">Offset</th>
    <th class="headertext">Name</th>
    <th class="headertext">Type</th>
    <th class="headertext">Size</th>
    <th class="headertext">Description</th>
    <th class="headertext">Data</th>
    <th class="headertext">Download Field</th>
</tr>'''
        for i in parser[path]:
            # determine options
            display = i.raw_display if form.getfirst('raw','0') == '1'\
                else i.display
            disp_off = bits2hex if form.getfirst('hex','0') == '1'\
                else bits2dec
            addr = i.address if form.getfirst('rel','0') == '1'\
                else i.absolute_address
            if display == 'None':
                display = ''
            # clickable name for field sets
            if i.is_field_set:
                name = '''<span href="#" onClick="goPath('%s%s/')"\
 class="fieldlink">%s/</span>''' % (path, i.name, i.name)
            else:
                name = i.name
            print '<tr class="data">'
            print '<td class="fldaddress">%s</td>' % disp_off(addr)
            print '<td class="fldname">%s</td>' % name
            print '<td class="fldtype">%s</td>' % i.__class__.__name__
            print '<td class="fldsize">%s</td>' % disp_off(i.size)
            print '<td class="flddesc">%s</td>' % i.description
            print '<td class="flddisplay">%s</td>' % display
            print '<td class="flddownload">'
            paths = hpath.split(':')
            paths[stream_id] += i.name
            url = "%s?hpath=%s&stream=%s"%\
                (script_name,':'.join(paths), stream_id)
            # hack to determine if a substream is present
            # the default getSubIStream() returns InputFieldStream()
            # InputFieldStream() then returns an InputSubStream.
            # in all the overrides, the return is a different stream type,
            # but this is certainly not the safest way to check for
            # an overridden method...
            # finally, if the field is a SubFile, then it has a custom
            # substream, and thus gets the substream features.
            if not isinstance(i.getSubIStream(), InputSubStream)\
                or isinstance(i, SubFile):
                print '<a href="javascript:addStream(\'%s\')"\
 class="dllink">Parse Substream</a><br/>' % (path + i.name)
                print '<a href="%s&savesub=1"\
 class="dllink">Download Substream</a><br/>' % url
                print '<a href="%s&save=1"\
 class="dllink">Download Raw</a>' % url
            else:
                print '<a href="%s&save=1"\
 class="dllink">Download</a>' % url
            print '</td>'
            print '</tr>'
        print '</table>'
        print_path(path, data, stream_id)
        if sys.stderr.getvalue():
            print_error('Error(s) encountered:', print_headers=False)
            print '<pre class="parseerror">%s</pre>' % sys.stderr.getvalue()
    else:
        print_form('Note: Cookies MUST be enabled!')

Example #7

0

Show file

def parse_raw(raw):
    stream = InputIOStream(raw)
    parser = guessParser(stream)
    return extract_metadata(parser)

Example #8

0

Show file

File: hachoir.py Project: Woerd88/hachoir

def handle_form():
    """Process submitted data.

    See comments for details.

    """
    prune_old()
    form = cgi.FieldStorage()
    if 'file' in form and form['file'].file:
        # compute session id
        sessid = get_sessid()
        if not sessid:
            rand = str(time.time())+form['file'].filename+str(random.random())
            sessid = hashlib.md5(rand).hexdigest()
        # write uploaded file
        f = open(tmp_dir+sessid+'.file','wb')
        if form['file'].done==-1:
            raise ValueError("File upload canceled?")
        while f.tell()<2**22: # 4MB limit
            chunk = form['file'].file.read(32768) # 32KB chunks
            if not chunk:
                break
            f.write(chunk)
        if f.tell() == 0:
            f.close()
            print_form('Nothing uploaded.')
            return
        f.close()
        # write session variables
        try:
            fn = unicode(form['file'].filename,'utf-8')
        except UnicodeDecodeError:
            fn = unicode(form['file'].filename,'iso-8859-1')
        # stream "None" represents the original stream
        save_data({'filename':fn,'streams':[(None, None, fn)]}, sessid)
        # send session id and reset variables
        c = SimpleCookie()
        c['sess'] = sessid
        c['hpath'] = '/' # clear path var.
        c['stream'] = '0' # clear stream var
        print c # send cookie to client (headers)
        print_page() # print AJAX frame page
    elif get_sessid(): # or perhaps you already have a file to parse?
        if not 'hpath' in form:
            print_page()
            return
        # redirect stderr, so we can catch parser errors
        sys.stderr = StringIO()
        # load variables
        hpath = cgi.escape(form.getfirst('hpath','/'))
        stream_id = int(form.getfirst('stream','0'))
        path = hpath.split(':')[stream_id]
        sessid = get_sessid()
        try:
            data = cPickle.load(file(tmp_dir+sessid+'.sess','rb'))
        except IOError:
            print_error('Your file was deleted due to inactivity. '
                'Please upload a new one.')
            return
        stream, parser = get_parser(data, data['streams'][stream_id], sessid)
        if parser is None:
            return # sorry, couldn't parse file!
        if 'save' in form:
            # "Download Raw"
            f = FileFromInputStream(stream)
            fld = parser[path]
            f.seek(fld.absolute_address/8)
            size = alignValue(fld.size, 8)/8
            sys.stdout.write('Content-Type: application/octet-stream\r\n')
            sys.stdout.write('Content-Length: %i\r\n'%size)
            sys.stdout.write('Content-Disposition: attachment; '
                'filename=%s\r\n\r\n'%path.strip('/').split('/')[-1])
            sys.stdout.write(f.read(size))
            return
        elif 'savesub' in form:
            # "Download Substream"
            stream = parser[path.rstrip('/')].getSubIStream()
            filename = path.strip('/').split('/')[-1]
            tags = getattr(stream,'tags',[])
            for tag in tags:
                if tag[0] == 'filename':
                    filename = tag[1]
            sys.stdout.write('Content-Type: application/octet-stream\r\n')
            sys.stdout.write('Content-Disposition: attachment; '
                'filename=%s\r\n\r\n'%filename)
            sys.stdout.write(FileFromInputStream(stream).read())
            return
        elif 'addStream' in form:
            # "Parse Substream"
            spath = cgi.escape(form['addStream'].value)
            new_stream = parser[spath.rstrip('/')].getSubIStream()
            streamdata = FileFromInputStream(new_stream).read()
            new_parser = guessParser(new_stream)
            if new_parser:
                stream = new_stream
                parser = new_parser
                tags = getattr(stream,'tags',[])
                streamname = data['streams'][stream_id][2]+':'
                data['streams'].append((tags, streamdata, streamname+spath))
                try:
                    if force_substream_ref:
                        raise Exception("Use references for all substreams")
                    save_data(data, sessid)
                except Exception:
                    # many things could go wrong with pickling
                    data['streams'][-1] = (data['streams'][stream_id],
                        spath, streamname+spath)
                    save_data(data, sessid)
                path = '/'
                hpath += ':/'
                stream_id = len(data['streams'])-1
            else:
                sys.stderr.write("Cannot parse substream %s: "
                    "No suitable parser\n"%spath)
        elif 'delStream' in form:
            # "Delete Stream"
            n = int(form['delStream'].value)
            paths = hpath.split(':')
            del paths[n]
            del data['streams'][n]
            if n >= len(data['streams']):
                stream_id = 0
            else:
                stream_id = n
            path = paths[stream_id]
            hpath = ':'.join(paths)
            save_data(data, sessid)
            stream, parser = get_parser(data, data['streams'][stream_id],
                sessid)
        # update client's variables
        c = SimpleCookie()
        c['hpath'] = hpath
        c['stream'] = str(stream_id)
        print c # send cookie to client
        # send headers
        print 'Content-Type: text/html'
        print
        # breadcrumb trail path up top
        print_path(path, data, stream_id)
        # fields
        print '''<table id="maintable" border="1">
<tr class="header">
    <th class="headertext">Offset</th>
    <th class="headertext">Name</th>
    <th class="headertext">Type</th>
    <th class="headertext">Size</th>
    <th class="headertext">Description</th>
    <th class="headertext">Data</th>
    <th class="headertext">Download Field</th>
</tr>'''
        for i in parser[path]:
            # determine options
            display = i.raw_display if form.getfirst('raw','0') == '1'\
                else i.display
            disp_off = bits2hex if form.getfirst('hex','0') == '1'\
                else bits2dec
            addr = i.address if form.getfirst('rel','0') == '1'\
                else i.absolute_address
            if display == 'None':
                display = ''
            # clickable name for field sets
            if i.is_field_set:
                name = '''<span href="#" onClick="goPath('%s%s/')"\
 class="fieldlink">%s/</span>'''%(path, i.name, i.name)
            else:
                name = i.name
            print '<tr class="data">'
            print '<td class="fldaddress">%s</td>'%disp_off(addr)
            print '<td class="fldname">%s</td>'%name
            print '<td class="fldtype">%s</td>'%i.__class__.__name__
            print '<td class="fldsize">%s</td>'%disp_off(i.size)
            print '<td class="flddesc">%s</td>'%i.description
            print '<td class="flddisplay">%s</td>'%display
            print '<td class="flddownload">'
            paths = hpath.split(':')
            paths[stream_id] += i.name
            url = "%s?hpath=%s&stream=%s"%\
                (script_name,':'.join(paths), stream_id)
            # hack to determine if a substream is present
            # the default getSubIStream() returns InputFieldStream()
            # InputFieldStream() then returns an InputSubStream.
            # in all the overrides, the return is a different stream type,
            # but this is certainly not the safest way to check for
            # an overridden method...
            # finally, if the field is a SubFile, then it has a custom
            # substream, and thus gets the substream features.
            if not isinstance(i.getSubIStream(), InputSubStream)\
                or isinstance(i, SubFile):
                print '<a href="javascript:addStream(\'%s\')"\
 class="dllink">Parse Substream</a><br/>'%(path+i.name)
                print '<a href="%s&savesub=1"\
 class="dllink">Download Substream</a><br/>'%url
                print '<a href="%s&save=1"\
 class="dllink">Download Raw</a>'%url
            else:
                print '<a href="%s&save=1"\
 class="dllink">Download</a>'%url
            print '</td>'
            print '</tr>'
        print '</table>'
        print_path(path, data, stream_id)
        if sys.stderr.getvalue():
            print_error('Error(s) encountered:', print_headers=False)
            print '<pre class="parseerror">%s</pre>'%sys.stderr.getvalue()
    else:
        print_form('Note: Cookies MUST be enabled!')