Esempio n. 1
0
def test_os_path_to_uri():
    for osname in ('posix', 'nt'):
        for path, nt_uri, posix_uri in file_paths:
            if isinstance(path, str):
                testname = repr(path)
            else:
                testname = path
            if osname == 'nt':
                uri = nt_uri
            elif osname == 'posix':
                uri = posix_uri
            else:
                break
            if uri is None:
                with pytest.raises(IriError, osname+': '+subgroupname+': '+testname+': '+path):
                        iri.os_path_to_uri(path, attemptAbsolute=False, osname=osname)
            else:
                assert uri == iri.os_path_to_uri(path, attemptAbsolute=False, osname=osname), \
                                 osname+': '+testname+': '+uri
Esempio n. 2
0
def test_os_path_to_uri():
    for osname in ('posix', 'nt'):
        for path, nt_uri, posix_uri in file_paths:
            if isinstance(path, str):
                testname = repr(path)
            else:
                testname = path
            if osname == 'nt':
                uri = nt_uri
            elif osname == 'posix':
                uri = posix_uri
            else:
                break
            if uri is None:
                with pytest.raises(IriError, osname+': '+subgroupname+': '+testname+': '+path):
                        iri.os_path_to_uri(path, attemptAbsolute=False, osname=osname)
            else:
                assert uri == iri.os_path_to_uri(path, attemptAbsolute=False, osname=osname), \
                                 osname+': '+testname+': '+uri
Esempio n. 3
0
    def __init__(self,
                 obj,
                 siri=None,
                 encoding=None,
                 streamopenmode='rb',
                 sourcetype=inputsourcetype.unknown):
        '''
        obj - byte string, proper string (only if you really know what you're doing),
            file-like object (stream), file path or URI.
        uri - optional override URI.  Base URI for the input source will be set to
            this value

        >>> from amara3 import inputsource
        >>> inp = inputsource('abc')
        >>> inp.stream
        <_io.StringIO object at 0x1056fbf78>
        >>> inp.iri
        >>> print(inp.iri)
        None
        >>> inp = inputsource(['abc', 'def']) #Now multiple streams in one source
        >>> inp.stream
        <_io.StringIO object at 0x1011aff78>
        >>> print(inp.iri)
        None
        >>> inp = next(inp)
        >>> inp.stream
        <_io.StringIO object at 0x1011af5e8>
        >>> print(inp.iri)
        None
        >>>
        '''
        # from amara3 import inputsource; inp = inputsource('foo.zip')
        # from amara3 import inputsource; inp = inputsource('test/resource/std-examples.zip')
        # s = inp.stream.read(100)
        # s
        # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<!-- edited with XML Spy v4.3 U (http://www.xmlspy.com) by M'
        # s
        # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<collection xmlns="http://www.loc.gov/MARC21/slim">\r\n  <reco'

        self.stream = None
        self.iri = siri
        self.sourcetype = sourcetype

        if obj in ('', b''):
            raise ValueError("Cannot parse an empty string as XML")

        if hasattr(obj, 'read'):
            #Create dummy Uri to use as base
            #uri = uri or uuid4().urn
            self.stream = obj
        #elif sourcetype == inputsourcetype.xmlstring:
        #See this article about XML detection heuristics
        #http://www.xml.com/pub/a/2007/02/28/what-does-xml-smell-like.html
        #uri = uri or uuid4().urn
        elif self.sourcetype == inputsourcetype.iri or (
                siri and iri.matches_uri_syntax(obj)):
            self.iri = siri or obj
            self.stream = urlopen(iri)
        elif self.sourcetype == inputsourcetype.filename or (
                siri and iri.is_absolute(obj) and not os.path.isfile(obj)):
            #FIXME: convert path to URI
            self.iri = siri or iri.os_path_to_uri(obj)
            self.stream = open(obj, streamopenmode)
        elif self.sourcetype == inputsourcetype.string or isinstance(
                obj, str) or isinstance(obj, bytes):
            self.stream = StringIO(obj)
            #If obj is beyond a certain length, don't even try it as a URI
            #if len(obj) < MAX_URI_LENGTH_FOR_HEURISTIC:
            #    self.iri = iri.os_path_to_uri(obj)
            #    self.stream = urlopen(siri)
        else:
            raise ValueError("Unable to recognize as an inputsource")
        return
Esempio n. 4
0
    def __init__(self, obj, siri=None, encoding=None, streamopenmode='rb',
                    sourcetype=inputsourcetype.unknown):
        '''
        obj - byte string, proper string (only if you really know what you're doing),
            file-like object (stream), file path or URI.
        uri - optional override URI.  Base URI for the input source will be set to
            this value

        >>> from amara3 import inputsource
        >>> inp = inputsource('abc')
        >>> inp.stream
        <_io.StringIO object at 0x1056fbf78>
        >>> inp.iri
        >>> print(inp.iri)
        None
        >>> inp = inputsource(['abc', 'def']) #Now multiple streams in one source
        >>> inp.stream
        <_io.StringIO object at 0x1011aff78>
        >>> print(inp.iri)
        None
        >>> inp = next(inp)
        >>> inp.stream
        <_io.StringIO object at 0x1011af5e8>
        >>> print(inp.iri)
        None
        >>>
        '''
        # from amara3 import inputsource; inp = inputsource('foo.zip')
        # from amara3 import inputsource; inp = inputsource('test/resource/std-examples.zip')
        # s = inp.stream.read(100)
        # s
        # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<!-- edited with XML Spy v4.3 U (http://www.xmlspy.com) by M'
        # s
        # b'<?xml version="1.0" encoding="UTF-8"?>\r\n<collection xmlns="http://www.loc.gov/MARC21/slim">\r\n  <reco'

        self.stream = None
        self.iri = siri
        self.sourcetype = sourcetype

        if obj in ('', b''):
            raise ValueError("Cannot parse an empty string as XML")

        if hasattr(obj, 'read'):
            #Create dummy Uri to use as base
            #uri = uri or uuid4().urn
            self.stream = obj
        #elif sourcetype == inputsourcetype.xmlstring:
            #See this article about XML detection heuristics
            #http://www.xml.com/pub/a/2007/02/28/what-does-xml-smell-like.html
            #uri = uri or uuid4().urn
        elif self.sourcetype == inputsourcetype.iri or (siri and iri.matches_uri_syntax(obj)):
            self.iri = siri or obj
            self.stream = urlopen(iri)
        elif self.sourcetype == inputsourcetype.filename or (siri and iri.is_absolute(obj) and not os.path.isfile(obj)):
            #FIXME: convert path to URI
            self.iri = siri or iri.os_path_to_uri(obj)
            self.stream = open(obj, streamopenmode)
        elif self.sourcetype == inputsourcetype.string or isinstance(obj, str) or isinstance(obj, bytes):
            self.stream = StringIO(obj)
            #If obj is beyond a certain length, don't even try it as a URI
            #if len(obj) < MAX_URI_LENGTH_FOR_HEURISTIC:
            #    self.iri = iri.os_path_to_uri(obj)
            #    self.stream = urlopen(siri)
        else:
            raise ValueError("Unable to recognize as an inputsource")
        return