def test_os_path_to_uri(): for osname in ('posix', 'nt'): for path, nt_uri, posix_uri in file_paths: if isinstance(path, str): testname = repr(path) else: testname = path if osname == 'nt': uri = nt_uri elif osname == 'posix': uri = posix_uri else: break if uri is None: with pytest.raises(IriError, osname+': '+subgroupname+': '+testname+': '+path): iri.os_path_to_uri(path, attemptAbsolute=False, osname=osname) else: assert uri == iri.os_path_to_uri(path, attemptAbsolute=False, osname=osname), \ osname+': '+testname+': '+uri
def __init__(self, obj, siri=None, encoding=None, streamopenmode='rb', sourcetype=inputsourcetype.unknown): ''' obj - byte string, proper string (only if you really know what you're doing), file-like object (stream), file path or URI. uri - optional override URI. Base URI for the input source will be set to this value >>> from amara3 import inputsource >>> inp = inputsource('abc') >>> inp.stream <_io.StringIO object at 0x1056fbf78> >>> inp.iri >>> print(inp.iri) None >>> inp = inputsource(['abc', 'def']) #Now multiple streams in one source >>> inp.stream <_io.StringIO object at 0x1011aff78> >>> print(inp.iri) None >>> inp = next(inp) >>> inp.stream <_io.StringIO object at 0x1011af5e8> >>> print(inp.iri) None >>> ''' # from amara3 import inputsource; inp = inputsource('foo.zip') # from amara3 import inputsource; inp = inputsource('test/resource/std-examples.zip') # s = inp.stream.read(100) # s # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<!-- edited with XML Spy v4.3 U (http://www.xmlspy.com) by M' # s # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<collection xmlns="http://www.loc.gov/MARC21/slim">\r\n <reco' self.stream = None self.iri = siri self.sourcetype = sourcetype if obj in ('', b''): raise ValueError("Cannot parse an empty string as XML") if hasattr(obj, 'read'): #Create dummy Uri to use as base #uri = uri or uuid4().urn self.stream = obj #elif sourcetype == inputsourcetype.xmlstring: #See this article about XML detection heuristics #http://www.xml.com/pub/a/2007/02/28/what-does-xml-smell-like.html #uri = uri or uuid4().urn elif self.sourcetype == inputsourcetype.iri or ( siri and iri.matches_uri_syntax(obj)): self.iri = siri or obj self.stream = urlopen(iri) elif self.sourcetype == inputsourcetype.filename or ( siri and iri.is_absolute(obj) and not os.path.isfile(obj)): #FIXME: convert path to URI self.iri = siri or iri.os_path_to_uri(obj) self.stream = open(obj, streamopenmode) elif self.sourcetype == inputsourcetype.string or isinstance( obj, str) or isinstance(obj, bytes): self.stream = StringIO(obj) #If obj is beyond a certain length, don't even try it as a URI #if len(obj) < MAX_URI_LENGTH_FOR_HEURISTIC: # self.iri = iri.os_path_to_uri(obj) # self.stream = urlopen(siri) else: raise ValueError("Unable to recognize as an inputsource") return
def __init__(self, obj, siri=None, encoding=None, streamopenmode='rb', sourcetype=inputsourcetype.unknown): ''' obj - byte string, proper string (only if you really know what you're doing), file-like object (stream), file path or URI. uri - optional override URI. Base URI for the input source will be set to this value >>> from amara3 import inputsource >>> inp = inputsource('abc') >>> inp.stream <_io.StringIO object at 0x1056fbf78> >>> inp.iri >>> print(inp.iri) None >>> inp = inputsource(['abc', 'def']) #Now multiple streams in one source >>> inp.stream <_io.StringIO object at 0x1011aff78> >>> print(inp.iri) None >>> inp = next(inp) >>> inp.stream <_io.StringIO object at 0x1011af5e8> >>> print(inp.iri) None >>> ''' # from amara3 import inputsource; inp = inputsource('foo.zip') # from amara3 import inputsource; inp = inputsource('test/resource/std-examples.zip') # s = inp.stream.read(100) # s # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<!-- edited with XML Spy v4.3 U (http://www.xmlspy.com) by M' # s # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<collection xmlns="http://www.loc.gov/MARC21/slim">\r\n <reco' self.stream = None self.iri = siri self.sourcetype = sourcetype if obj in ('', b''): raise ValueError("Cannot parse an empty string as XML") if hasattr(obj, 'read'): #Create dummy Uri to use as base #uri = uri or uuid4().urn self.stream = obj #elif sourcetype == inputsourcetype.xmlstring: #See this article about XML detection heuristics #http://www.xml.com/pub/a/2007/02/28/what-does-xml-smell-like.html #uri = uri or uuid4().urn elif self.sourcetype == inputsourcetype.iri or (siri and iri.matches_uri_syntax(obj)): self.iri = siri or obj self.stream = urlopen(iri) elif self.sourcetype == inputsourcetype.filename or (siri and iri.is_absolute(obj) and not os.path.isfile(obj)): #FIXME: convert path to URI self.iri = siri or iri.os_path_to_uri(obj) self.stream = open(obj, streamopenmode) elif self.sourcetype == inputsourcetype.string or isinstance(obj, str) or isinstance(obj, bytes): self.stream = StringIO(obj) #If obj is beyond a certain length, don't even try it as a URI #if len(obj) < MAX_URI_LENGTH_FOR_HEURISTIC: # self.iri = iri.os_path_to_uri(obj) # self.stream = urlopen(siri) else: raise ValueError("Unable to recognize as an inputsource") return