Exemple #1
0
def _parseRDFFromString(contents, baseuri, type='unknown', scope=None,
                       options=None):
    from vesper.data import base
    if scope is None: scope = ''  #workaround 4suite bug
    options = options or {}

    if type.startswith('http://rx4rdf.sf.net/ns/wiki#rdfformat-'):
        type = type.split('-', 1)[1]
        
    if isinstance(contents, unicode):
        contents = contents.encode('utf8')
            
    try:
        while type == 'unknown':
            if isinstance(contents, (list, tuple)):
                if not contents:
                    return contents, 'statements'
                if isinstance(contents[0], (tuple, BaseStatement)):
                    return contents, 'statements' #looks like already a list of statements
                #otherwise assume pjson
                type='pjson' 
                break
            elif isinstance(contents, dict):
                type='pjson' #assume pjson
                break
            
            startcontents = contents[:256].lstrip()
            if not startcontents: #empty
                return [], 'statements'
                
            if startcontents[0] in '{[':
                type='pjson' #assume pjson
                break
            else:
                from vesper import multipartjson
                if multipartjson.looks_like_multipartjson(startcontents):
                    type = 'mjson'
                    break
            try:
                from vesper.utils import htmlfilter
                ns, prefix, local = htmlfilter.getRootElementName(contents)
                if ns == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#':
                    type = 'rdfxml'
                elif local == 'rx':
                    type = 'rxml_xml'
                elif ns == 'http://purl.org/atom/ns#' or local == 'rss':
                    type = "rss-tag-soup"
                else:
                    raise ParseException(
                        "RDF parse error: Unsupported XML vocabulary: " + local)
            except:
                #hmmm, try our NTriples parser
                try:
                    #convert generator to list to force parsing now 
                    return list(NTriples2Statements(
                                StringIO.StringIO(contents), scope,
                                baseuri, **options))
                except:
                    raise ParseException("unrecognized or invalid file contents")
                            
        if type in ['ntriples', 'ntjson']:
            #use our parser
            return NTriples2Statements(StringIO.StringIO(contents), scope,
                                       baseuri, **options), type
        elif type == 'rss-tag-soup':
            try: #only redland's parser supports these
                import RDF
                parser=RDF.Parser(type)
                stream = parser.parse_string_as_stream(contents, baseuri)
                return base.redland2Statements(stream, scope), type 
            except ImportError:
                raise ParseException("RDF parse error: "+ type+
                    " is only supported by Redland, which isn't installed")
        elif type == 'rdfxml' or type == 'turtle':
            if rdfParser:
                return rdfParser(contents, baseuri, type, scope)
            try:
                return _parseRDFWithRdfLib(contents, baseuri, type, scope)
            except ImportError:                
                try: #try redland's parser
                    return _parseRDFWithRdfLib(contents, baseuri, type, scope)
                except ImportError:
                    raise ParseException("no %s parser installed" % type)
        elif type == 'json':            
            return _parseRDFJSON(contents, scope), type
        elif type == 'pjson' or type == 'yaml' or type == 'mjson':
            from vesper import pjson
            if isinstance(contents, str):
                if type == 'yaml':
                    import yaml
                    contents = yaml.safe_load(contents)
                elif type == 'mjson':
                    from vesper import multipartjson
                    content = multipartjson.loads(contents, False)
                else:
                    contents = json.loads(contents)    

            options['scope'] = scope
            #XXX generateBnode doesn't detect collisions, maybe gen UUID instead            
            if 'generateBnode' not in options:
                options['generateBnode']='uuid'            
            if not contents:
                return [], type
            stmts = pjson.tostatements(contents, **options), type
            return stmts
        else:
            raise ParseException('unsupported type: ' + type)
    except ParseException:
        raise
    except: #an unexpected Exception
        raise ParseException("error parsing "+type)