def __new__(cls, arg, uri=None, encoding=None, resolver=None, sourcetype=0): """ arg - a string, Unicode object (only if you really know what you're doing), file-like object (stream), file path or URI. You can also pass an InputSource object, in which case the return value is just the same object, possibly with the URI modified uri - optional override URI. The base URI for the IS will be set to this value Returns an input source which can be passed to Amara APIs. """ #do the imports within the function to avoid circular crap #from amara._xmlstring import IsXml as isxml #These importa are tucked in here because amara.lib.iri is an expensive import from amara.lib.iri import is_absolute, os_path_to_uri from amara.lib.irihelpers import DEFAULT_RESOLVER resolver = resolver or DEFAULT_RESOLVER if isinstance(arg, InputSource): return arg #if arg == (u'', ''): -> UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal if arg == '': #FIXME L10N raise ValueError("Cannot parse an empty string as XML") if isinstance(arg, urllib2.Request): uri = arg.get_full_url() #One of the rightly labeled "lame" helper methods in urllib2 ;) stream = resolver.resolve(arg) elif hasattr(arg, 'read'): #Create dummy Uri to use as base uri = uri or uuid4().urn stream = arg #XXX: Should we at this point refuse to proceed unless it's a basestring? elif sourcetype == XMLSTRING or isxml(arg): #See this article about XML detection heuristics #http://www.xml.com/pub/a/2007/02/28/what-does-xml-smell-like.html uri = uri or uuid4().urn stream = StringIO(arg) elif is_absolute(arg) and not os.path.isfile(arg): uri = arg stream = resolver.resolve(uri) #If the arg is beyond a certain length, don't even try it as a URI elif len(arg) < MAX_URI_LENGTH_FOR_HEURISTIC: uri = os_path_to_uri(arg) stream = resolver.resolve(uri) else: #FIXME L10N raise ValueError("Does not appear to be well-formed XML") #We might add the ability to load zips, gzips & bzip2s #http://docs.python.org/lib/module-zlib.html #http://docs.python.org/lib/module-gzip.html #http://docs.python.org/lib/module-bz2.html #http://docs.python.org/lib/zipfile-objects.html #import inspect; print inspect.stack() #InputSource.__new__ is in C: expat/input_source.c:inputsource_new return InputSource.__new__(cls, stream, uri, encoding)
def __new__(cls, arg, uri=None, encoding=None, resolver=None, sourcetype=0): """ arg - a string, Unicode object (only if you really know what you're doing), file-like object (stream), file path or URI. You can also pass an InputSource object, in which case the return value is just the same object, possibly with the URI modified uri - optional override URI. The base URI for the IS will be set to this value Returns an input source which can be passed to Amara APIs. """ #do the imports within the function to avoid circular crap #from amara._xmlstring import IsXml as isxml #These importa are tucked in here because amara.lib.iri is an expensive import from amara.lib.iri import is_absolute, os_path_to_uri from amara.lib.irihelpers import DEFAULT_RESOLVER resolver = resolver or DEFAULT_RESOLVER if isinstance(arg, InputSource): return arg #if arg == (u'', ''): -> UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal if arg == '': #FIXME L10N raise ValueError("Cannot parse an empty string as XML") if isinstance(arg, urllib2.Request): uri = arg.get_full_url( ) #One of the rightly labeled "lame" helper methods in urllib2 ;) stream = resolver.resolve(arg) elif hasattr(arg, 'read'): #Create dummy Uri to use as base uri = uri or uuid4().urn stream = arg #XXX: Should we at this point refuse to proceed unless it's a basestring? elif sourcetype == XMLSTRING or isxml(arg): #See this article about XML detection heuristics #http://www.xml.com/pub/a/2007/02/28/what-does-xml-smell-like.html uri = uri or uuid4().urn stream = StringIO(arg) elif is_absolute(arg) and not os.path.isfile(arg): uri = arg stream = resolver.resolve(uri) #If the arg is beyond a certain length, don't even try it as a URI elif len(arg) < MAX_URI_LENGTH_FOR_HEURISTIC: uri = os_path_to_uri(arg) stream = resolver.resolve(uri) else: #FIXME L10N raise ValueError("Does not appear to be well-formed XML") #We might add the ability to load zips, gzips & bzip2s #http://docs.python.org/lib/module-zlib.html #http://docs.python.org/lib/module-gzip.html #http://docs.python.org/lib/module-bz2.html #http://docs.python.org/lib/zipfile-objects.html #import inspect; print inspect.stack() #InputSource.__new__ is in C: expat/input_source.c:inputsource_new return InputSource.__new__(cls, stream, uri, encoding)
raise except Exception, e: pass if imt in EXCEL_IMTS: source = speadsheet.read(body) dataprofile = {} try: data = ss_data or [ row for row in source.rows() ] except (KeyboardInterrupt, SystemExit): raise except Exception, e: raise #print >> sys.stderr, e #print >> sys.stderr, 'Spreadsheet processing failure. No data to return.' imt = EXCEL_IMTS[0] elif isxml(body): if MODS_NAMESPACE in body: try: data, diag_info = mods2json(body, diagnostics) imt = 'application/x-mods+xml' except amara.ReaderError: raise ValueError('Unable to process content') else: try: data = atomparse(body) logger.debug("ATOM: " + repr(data)) except ValueError: data = webfeed(body) imt = 'application/rss+xml' if data is None: raise ValueError('Unable to process content')