Ejemplo n.º 1
0
Archivo: io.py Proyecto: trucgiao91/woo
def Object_load(typ,inFile,format='auto',overrideHashPercent={}):
    def typeChecked(obj,type):
        if type==None: return obj
        if not isinstance(obj,typ): raise TypeError('Loaded object of type '+obj.__class__.__name__+' is not a '+typ.__name__)
        return obj
    validFormats=('auto','boost::serialization','expr','pickle','json')
    if format not in validFormats: raise ValueError('format must be one of '+', '.join(validFormats)+'.')
    if format=='auto':
        format=None
        ## DO NOT use extensions to determine type, that is evil
        # check for compression first
        head=open(inFile,'rb').read(100)
        # we might want to pass this data to ObjectIO, which currently determines format by looking at extension
        if head[:2]==b'\x1f\x8b':
            import gzip
            head=gzip.open(inFile,'rb').read(100)
        elif head[:2]==b'BZ':
            import bz2
            head=bz2.BZ2File(inFile,'rb').read(100)
        # detect real format here (head is uncompressed already)
        # the string between nulls is 'serialization::archive'
        # see http://stackoverflow.com/questions/10614215/magic-for-detecting-boostserialization-file
        # newer versions (1.51 and perhaps greater) put '\n' (\x0a) after serialization::archive instead of null,
        # so let's just not test the byte after "archive"
        if head.startswith(b'\x16\x00\x00\x00\x00\x00\x00\x00\x73\x65\x72\x69\x61\x6c\x69\x7a\x61\x74\x69\x6f\x6e\x3a\x3a\x61\x72\x63\x68\x69\x76\x65'):
            format='boost::serialization'
        elif head.startswith(b'<?xml version="1.0"'):
            format='boost::serialization'
        elif head.startswith(b'##woo-expression##'):
            format='expr'
        else:
            # test pickling by trying to load
            try: return typeChecked(pickle.load(open(inFile,'rb')),typ) # open again to seek to the beginning
            except (IOError,KeyError,pickle.UnpicklingError,EOFError): pass
            try: return typeChecked(WooJSONDecoder().decode(codecs.open(inFile,'rb','utf-8').read()),typ)
            except (IOError,ValueError): pass
        if not format:    raise RuntimeError('File format detection failed on %s (head: %s, bin: %s)'%(inFile,''.join(["\\x%02x"%(x if py3k else ord(x)) for x in head]),str(head))) # in py3k, bytes contain integers rather than chars
    if format not in validFormats: raise RuntimeError("format='%s'??"%format)
    assert format in validFormats
    if overrideHashPercent and format!='expr': raise ValueError("overrideHashPercent only applicable with the 'expr' format (not '%s')"%format)
    if format==None:
        raise IOError('Input file format not detected')
    elif format=='boost::serialization':
        # ObjectIO takes care of detecting binary, xml, compression independently
        return typeChecked(Object._boostLoad(str(inFile)),typ) # convert unicode to str, if necessary, as the c++ type is std::string
    elif format=='expr':
        buf=codecs.open(inFile,'rb','utf-8').read()
        return typeChecked(wooExprEval(buf,inFile,__overrideHashPercent=overrideHashPercent),typ)
    elif format=='pickle':
        return typeChecked(pickle.load(open(inFile,'rb')),typ)
    elif format=='json':
        return typeChecked(WooJSONDecoder().decode(codecs.open(inFile,'rb','utf-8').read()),typ)
    assert False
Ejemplo n.º 2
0
Archivo: io.py Proyecto: CrazyHeex/woo
def Object_load(typ,inFile,format='auto',overrideHashColon={}):
    def typeChecked(obj,type):
        if type==None: return obj
        if not isinstance(obj,typ): raise TypeError('Loaded object of type '+obj.__class__.__name__+' is not a '+typ.__name__)
        return obj
    validFormats=('auto','boost::serialization','expr','pickle','json')
    if format not in validFormats: raise ValueError('format must be one of '+', '.join(validFormats)+'.')
    if format=='auto':
        format=None
        ## DO NOT use extensions to determine type, that is evil
        # check for compression first
        head=open(inFile,'rb').read(100)
        # we might want to pass this data to ObjectIO, which currently determines format by looking at extension
        if head[:2]==b'\x1f\x8b':
            import gzip
            head=gzip.open(inFile,'rb').read(100)
        elif head[:2]==b'BZ':
            import bz2
            head=bz2.BZ2File(inFile,'rb').read(100)
        # detect real format here (head is uncompressed already)
        # the string between nulls is 'serialization::archive'
        # see http://stackoverflow.com/questions/10614215/magic-for-detecting-boostserialization-file
        # newer versions (1.51 and perhaps greater) put '\n' (\x0a) after serialization::archive instead of null,
        # so let's just not test the byte after "archive"
        if head.startswith(b'\x16\x00\x00\x00\x00\x00\x00\x00\x73\x65\x72\x69\x61\x6c\x69\x7a\x61\x74\x69\x6f\x6e\x3a\x3a\x61\x72\x63\x68\x69\x76\x65'):
            format='boost::serialization'
        elif head.startswith(b'<?xml version="1.0"'):
            format='boost::serialization'
        elif head.startswith(b'##woo-expression##'):
            format='expr'
        else:
            # test pickling by trying to load
            try: return typeChecked(pickle.load(open(inFile,'rb')),typ) # open again to seek to the beginning
            except (IOError,KeyError,pickle.UnpicklingError,EOFError): pass
            try: return typeChecked(WooJSONDecoder().decode(codecs.open(inFile,'rb','utf-8').read()),typ)
            except (IOError,ValueError): pass
        if not format:    raise RuntimeError('File format detection failed on %s (head: %s, bin: %s)'%(inFile,''.join(["\\x%02x"%(x if py3k else ord(x)) for x in head]),str(head))) # in py3k, bytes contain integers rather than chars
    if format not in validFormats: raise RuntimeError("format='%s'??"%format)
    assert format in validFormats
    if overrideHashColon and format!='expr': raise ValueError("overrideHashColon only applicable with the 'expr' format (not '%s')"%format)
    if format==None:
        raise IOError('Input file format not detected')
    elif format=='boost::serialization':
        # ObjectIO takes care of detecting binary, xml, compression independently
        return typeChecked(Object._boostLoad(str(inFile)),typ) # convert unicode to str, if necessary, as the c++ type is std::string
    elif format=='expr':
        buf=codecs.open(inFile,'rb','utf-8').read()
        return typeChecked(wooExprEval(buf,inFile,__overrideHashColon=overrideHashColon),typ)
    elif format=='pickle':
        return typeChecked(pickle.load(open(inFile,'rb')),typ)
    elif format=='json':
        return typeChecked(WooJSONDecoder().decode(codecs.open(inFile,'rb','utf-8').read()),typ)
    assert False