Exemple #1
0
def _toString(x):
        """ In N3 everything is represented as a string (at the moment), so we need
        to turn everything into a string without Python encoding.
        """
        if type(x) is type(' '): return stringToN3(stripCR(x))
        if type(x) is type(u' '): return stringToN3(stripCR(x))  # @@ sure?
        if type(x) is type(6): return '"'+`x`+'"'
        y = `x`
        if y[:5]=="<time":  # Must be better way
                str = x.Format("%Y-%m-%dT%H:%M:%S%z")
                if str[:2] == "45": return ""   # Null date value - must be better way! @@
                return '"'  + str + '"'
        return `x`   # @@@ unhandled things
Exemple #2
0
def _toString(x):
        """ In N3 everything is represented as a string (at the moment), so we need
        to turn everything into a string without Python encoding.
        """
        if type(x) is type(' '): return stringToN3(stripCR(x))
        if type(x) is type(u' '): return stringToN3(stripCR(x))  # @@ sure?
        if type(x) is type(6): return '"'+`x`+'"'
        y = `x`
        if y[:5]=="<time":  # Must be better way
                str = x.Format("%Y-%m-%dT%H:%M:%S%z")
                if str[:2] == "45": return ""   # Null date value - must be better way! @@
                return '"'  + str + '"'
        return `x`   # @@@ unhandled things
Exemple #3
0
 def orderedFields(value, map):
         cardData = ""
         beg = 0
         for i in range(len(map)):
             end = beg
             while 1:
                 end = value.find(";", end)
                 if end>0 and value[end-1] == "\\":
                     end += 1
                     continue
                 break
             if end < 0:
                 end = len(value)
             st = " ".join(splitBy(value[beg:end], ','))
             if st: cardData = lineFold(cardData, ' v:%s %s;' % \
                                 (map[i], stringToN3(st, singleLine=1)))
             beg=end+1
             if beg > len(value):
                 break
         return cardData
Exemple #4
0
 def orderedFields(value, map):
     cardData = ""
     beg = 0
     for i in range(len(map)):
         end = beg
         while 1:
             end = value.find(";", end)
             if end > 0 and value[end - 1] == "\\":
                 end += 1
                 continue
             break
         if end < 0:
             end = len(value)
         st = " ".join(splitBy(value[beg:end], ','))
         if st:                cardData = lineFold(cardData, ' v:%s %s;' % \
                          (map[i], stringToN3(st, singleLine=1)))
         beg = end + 1
         if beg > len(value):
             break
     return cardData
Exemple #5
0
def representationOf(pair):
    """  Representation of a thing in the output stream

    Regenerates genids if required.
    Uses prefix dictionary to use qname syntax if possible.
    """
    pair = auPair(pair)
    _flags = ''

    if "t" not in _flags:
        if pair == N3_nil:
            return "()"

    ty, value = pair

    singleLine = False
    if ty == LITERAL:
        return stringToN3(value, singleLine=singleLine, flags=_flags)

    if ty == XMLLITERAL:
        st = Canonicalize(value, None, unsuppressedPrefixes=['foo'])
        st = stringToN3(st, singleLine=singleLine, flags=_flags)
        return st + "^^" + representationOf(
            (SYMBOL, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))

    if ty == LITERAL_DT:
        s, dt = value
        if "b" not in _flags:
            if (dt == BOOLEAN_DATATYPE):
                return toBool(s) and "true" or "false"
        if "n" not in _flags:
            dt_uri = dt
            #               dt_uri = dt.uriref()
            if (dt_uri == INTEGER_DATATYPE):
                return str(long(s))
            if (dt_uri == FLOAT_DATATYPE):
                retVal = str(float(s))  # numeric value python-normalized
                if 'e' not in retVal:
                    retVal += 'e+00'
                return retVal
            if (dt_uri == DECIMAL_DATATYPE):
                retVal = str(Decimal(s))
                if '.' not in retVal:
                    retVal += '.0'
                return retVal
        st = stringToN3(s, singleLine=singleLine, flags=_flags)
        return st + "^^" + representationOf((SYMBOL, dt))

    if ty == LITERAL_LANG:
        s, lang = value
        return stringToN3(s, singleLine=singleLine, flags=_flags) + "@" + lang

#    aid = self._anodeId.get(pair[1], None)
#    if aid != None:  # "a" flag only
#        return "_:" + aid    # Must start with alpha as per NTriples spec.

##    if ((ty == ANONYMOUS)
##        and not option_noregen and "i" not in self._flags ):
##            x = self.regen.get(value, None)
##            if x == None:
##                x = self.genId()
##                self.regen[value] = x
##            value = x
###                return "<"+x+">"
##
##
##    j = string.rfind(value, "#")
##    if j<0 and "/" in self._flags:
##        j=string.rfind(value, "/")   # Allow "/" namespaces as a second best
##
##    if (j>=0
##        and "p" not in self._flags):   # Suppress use of prefixes?
##        for ch in value[j+1:]:  #  Examples: "." ";"  we can't have in qname
##            if ch in _notNameChars:
##                if verbosity() > 0:
##                    progress("Cannot have character %i in local name."
##                                % ord(ch))
##                break
##        else:
##            namesp = value[:j+1]
##            if (self.defaultNamespace
##                and self.defaultNamespace == namesp
##                and "d" not in self._flags):
##                return ":"+value[j+1:]
##            self.countNamespace(namesp)
##            prefix = self.prefixes.get(namesp, None) # @@ #CONVENTION
##            if prefix != None : return prefix + ":" + value[j+1:]
##
##            if value[:j] == self.base:   # If local to output stream,
##                return "<#" + value[j+1:] + ">" # use local frag id

##    if "r" not in self._flags and self.base != None:
##        value = hexify(refTo(self.base, value))
##    elif "u" in self._flags:
    value = backslashUify(value)
    ##    else: value = hexify(value)

    return "<" + value + ">"  # Everything else
Exemple #6
0
    def predicateObject(n, props, value):
        "Return a pair of the predicate and object as N3 strings"
        modifiers = ""
        datatype = None
        classes = []
        for prop, val in props:
            if prop == 'type':
                vals = val.lower()
                for val in splitBy(vals, ','):
                    if val == 'internet' and n == 'email':
                        pass
                    elif val == 'pref':   # Preferred @@ - how represent?
                        pass
                    elif val in typeFields.get(n, []):
                        if relationshipModifiers.get(val, 0):
                            if modifiers: print "# @@ multiple modifiers in: "+line
                            modifiers = val + '-' + modifiers
                        else: classes.append('vc:'+val[0].upper()+val[1:])
                    else:
                        raise ValueError("Unhandled type %s in: %s" %(val, line))
            elif prop == 'value':  # This means datatype
                datatype = val
                if val == 'date':
                    pass # Date-times from AB certainly look like w3c not iCal dates
                elif val == 'uri':
                    pass
                else:
                    raise ValueError ('Unimplemented data type:'+val)
            elif prop == 'base64' or (prop == 'encoding' and val.lower() == 'b'):
                value = value.replace(' ','')
                res = ""
                while value:
                    res += value[:lineLength] + "\n"
                    value = value[lineLength:]
                return 'v:'+n, '[ v:base64 """%s"""]\n' % (res)  # Special case
                                
            else: raise ValueError('Unknown property %s with value %s' & (prop, val))

        classSpec = ""
        if classes: classSpec = 'a '+(', '.join(classes))

        map = fieldProperties.get(n,None)
        pred = 'v:%s%s' % (modifiers, n)
        if map:
            if classSpec: classSpec = '\n\t'+classSpec
            if n == 'n': # Special case 
                assert classSpec == ""
                return  '', orderedFields(value, map)  # Naked fields - see notes
            return pred, '[' + orderedFields(value, map) + classSpec + ']'
        if n == 'version':
            assert value == "3.0", "value found: "+`value`
            return "", ""
        if n == 'x.ablabel':
            return "", "" # used elsewhere

        if n == 'categories':   # Really should relate these to classes, but this roundtrips
            obj = ", ".join(['"'+x+'"' for x in splitBy(value, ',')])
            return pred,  obj 


        unesc = splitBy(value, ';')
        if len(unesc) != 1: raise ValueError("Unescaped semicolon in value: "+ value)
        unesc = unesc[0]

        obj = None
        if n == 'tel':
            if value[0] != '+':
                print "# @@ Warning: not international form tel: "+value
            obj = '<tel:%s>' % (value.replace(' ','-'))
        elif n == 'url':
            obj = '<%s>' % (value)
        elif n == 'email':
            obj = '<mailto:%s>' % (value)

        if obj:  # Any case so far is a form of URI
            if classSpec: wr('%s %s.\n'  %(obj, classSpec))
            return pred, obj

        elif n in singleTextField :  # Single text
            if classSpec:
                raiseValueError("Unexpected class on %s: %s"%(n,`classSpec`))
            return pred, stringToN3(unesc, singleLine=0) # @@@ N3 escaping

        raise ValueError('Unknown tag:'+n)
Exemple #7
0
def representationOf(pair):
    """  Representation of a thing in the output stream

    Regenerates genids if required.
    Uses prefix dictionary to use qname syntax if possible.
    """
    pair = auPair(pair)
    _flags = ''

    if "t" not in _flags:
        if pair == N3_nil:
            return"()"

    ty, value = pair

    singleLine = False
    if ty == LITERAL:
        return stringToN3(value, singleLine=singleLine, flags = _flags)

    if ty == XMLLITERAL:
        st = Canonicalize(value, None, unsuppressedPrefixes=['foo'])
        st = stringToN3(st, singleLine=singleLine, flags=_flags)
        return st + "^^" + representationOf((SYMBOL,
                "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"))

    if ty == LITERAL_DT:
        s, dt = value
        if "b" not in _flags:
            if (dt == BOOLEAN_DATATYPE):
                return toBool(s) and "true" or "false"
        if "n" not in _flags:
            dt_uri = dt
#               dt_uri = dt.uriref()             
            if (dt_uri == INTEGER_DATATYPE):
                return str(long(s))
            if (dt_uri == FLOAT_DATATYPE):
                retVal =  str(float(s))    # numeric value python-normalized
                if 'e' not in retVal:
                    retVal += 'e+00'
                return retVal
            if (dt_uri == DECIMAL_DATATYPE):
                retVal = str(Decimal(s))
                if '.' not in retVal:
                    retVal += '.0'
                return retVal
        st = stringToN3(s, singleLine= singleLine, flags=_flags)
        return st + "^^" + representationOf((SYMBOL, dt))

    if ty == LITERAL_LANG:
        s, lang = value
        return stringToN3(s, singleLine= singleLine,
                                    flags=_flags)+ "@" + lang

#    aid = self._anodeId.get(pair[1], None)
#    if aid != None:  # "a" flag only
#        return "_:" + aid    # Must start with alpha as per NTriples spec.

##    if ((ty == ANONYMOUS)
##        and not option_noregen and "i" not in self._flags ):
##            x = self.regen.get(value, None)
##            if x == None:
##                x = self.genId()
##                self.regen[value] = x
##            value = x
###                return "<"+x+">"
##
##
##    j = string.rfind(value, "#")
##    if j<0 and "/" in self._flags:
##        j=string.rfind(value, "/")   # Allow "/" namespaces as a second best
##    
##    if (j>=0
##        and "p" not in self._flags):   # Suppress use of prefixes?
##        for ch in value[j+1:]:  #  Examples: "." ";"  we can't have in qname
##            if ch in _notNameChars:
##                if verbosity() > 0:
##                    progress("Cannot have character %i in local name."
##                                % ord(ch))
##                break
##        else:
##            namesp = value[:j+1]
##            if (self.defaultNamespace
##                and self.defaultNamespace == namesp
##                and "d" not in self._flags):
##                return ":"+value[j+1:]
##            self.countNamespace(namesp)
##            prefix = self.prefixes.get(namesp, None) # @@ #CONVENTION
##            if prefix != None : return prefix + ":" + value[j+1:]
##        
##            if value[:j] == self.base:   # If local to output stream,
##                return "<#" + value[j+1:] + ">" # use local frag id
    
##    if "r" not in self._flags and self.base != None:
##        value = hexify(refTo(self.base, value))
##    elif "u" in self._flags:
    value = backslashUify(value)
##    else: value = hexify(value)

    return "<" + value + ">"    # Everything else
def convert(path):
    """Convert LDIF format to n3"""
    global nochange
    global verbose
    global hideMailbox

    dict = {}

    print "# http://www.w3.org/DesignIssues/Notation3"
    print "# Generated from", path
    print "# Generated by  ", version
    print
    print "@prefix foaf: <http://xmlns.com/foaf/0.1/>."
    print "@prefix ldif: <http://www.w3.org/2007/ont/ldif#>."
    print

    input = open(path, "r")
    buf = input.read()  # Read the file
    input.close()

    nextLine = 0

    blank = re.compile(r' *$') 
#    lines = []
    inPerson = 0
    dataline = re.compile(r'([a-zA-Z0-9_]*): +(.*)')
    base64line = re.compile(r'([a-zA-Z0-9_]*):: +(.*)')
    urlline = re.compile(r'([a-zA-Z0-9_]*):<+(.*)')
    commentLine = re.compile(r'^#.*')

    
    asFoaf = { "cn": "foaf:name" }
    
    while nextLine < len(buf):  # Iterate over lines
        l = ""
        while 1:  # unfold continuation lines
            eol = buf.find("\n", nextLine)
            if eol <0:
                l += buf[nextLine:]
                nextLine = len(buf);
                break
            if eol+1 < len(buf) and buf[eol+1] == ' ':  # DOES LDIF fold lines??
                l += buf[nextLine:eol]
                nextLine = eol+2 # After the '\n '                
                continue
            l += buf[nextLine:eol]
            nextLine = eol+1
            break
        #    continue
        #if l contains encoded jpeg, continue (encoding error in StringToN3)
        if "jpeg" in l:
            continue
        while l and l[-1:] in "\r\n": l = l[:-1]
        
        if commentLine.match(l): continue

        m = blank.match(l)
        if m:
            print "    ]."
            inPerson = 0
            continue
        valtype = 'LITERAL'
        m = dataline.match(l)
        if m:
            field = m.group(1)
            value = m.group(2)
        else:
            m = base64line.match(l)
            if m:
                field = m.group(1)
                value = base64.decodestring(m.group(2))

            else:
                m = urlline.match(l)
                if m:
                    field = m.group(1)
                    value = m.group(2)
                    valtype = 'SYMBOL' 
        if m:
            if not inPerson:
                #print subject here?
                #use dn path to parse into foaf:name or a sioc:User
                print "    ["
                inPerson = 1
                
            if field == "objectclass":
                if value == "top": continue # Zero content info
                print '\ta ldif:%s; '% (value[0:1].upper() + value[1:])
            
            elif field in ["mail", "email", "mozillaSecondEmail"]:  ## @@ distinguish?
                mboxUri = "mailto:" + value
                hash = binascii.hexlify(sha.new(mboxUri).digest())
                print '\tfoaf:mbox_sha1sum %s;' % (stringToN3(hash, singleLine=1))
                if not hideMailbox:
                    print '\tfoaf:mbox <%s>;' % (mboxUri)
                    
            elif field in ["telephoneNumber", "homePhone", 'fax', 'pager', 'mobile']:
                print '\tldif:%s <tel:%s>;' % (field, value.replace(' ','-'))

            else:
            
                if field == "modifytimestamp" and value == "0Z":
                    continue;  # ignore

                obj = stringToN3(value, singleLine=0)
                pred = asFoaf.get(field, '\tldif:'+field)
                if not (hideMailbox and field == "dn"):
                    print '\t%s %s; '% (pred, obj)

            continue

        print "# ERROR: Unknown line format:", l
Exemple #9
0
    def predicateObject(n, props, value):
        "Return a pair of the predicate and object as N3 strings"
        modifiers = ""
        datatype = None
        classes = []
        for prop, val in props:
            if prop == 'type':
                vals = val.lower()
                for val in splitBy(vals, ','):
                    if val == 'internet' and n == 'email':
                        pass
                    elif val == 'pref':  # Preferred @@ - how represent?
                        pass
                    elif val in typeFields.get(n, []):
                        if relationshipModifiers.get(val, 0):
                            if modifiers:
                                print "# @@ multiple modifiers in: " + line
                            modifiers = val + '-' + modifiers
                        else:
                            classes.append('vc:' + val[0].upper() + val[1:])
                    else:
                        raise ValueError("Unhandled type %s in: %s" %
                                         (val, line))
            elif prop == 'value':  # This means datatype
                datatype = val
                if val == 'date':
                    pass  # Date-times from AB certainly look like w3c not iCal dates
                elif val == 'uri':
                    pass
                else:
                    raise ValueError('Unimplemented data type:' + val)
            elif prop == 'base64' or (prop == 'encoding'
                                      and val.lower() == 'b'):
                value = value.replace(' ', '')
                res = ""
                while value:
                    res += value[:lineLength] + "\n"
                    value = value[lineLength:]
                return 'v:' + n, '[ v:base64 """%s"""]\n' % (res
                                                             )  # Special case

            else:
                raise ValueError('Unknown property %s with value %s'
                                 & (prop, val))

        classSpec = ""
        if classes: classSpec = 'a ' + (', '.join(classes))

        map = fieldProperties.get(n, None)
        pred = 'v:%s%s' % (modifiers, n)
        if map:
            if classSpec: classSpec = '\n\t' + classSpec
            if n == 'n':  # Special case
                assert classSpec == ""
                return '', orderedFields(value,
                                         map)  # Naked fields - see notes
            return pred, '[' + orderedFields(value, map) + classSpec + ']'
        if n == 'version':
            assert value == "3.0", "value found: " + ` value `
            return "", ""
        if n == 'x.ablabel':
            return "", ""  # used elsewhere

        if n == 'categories':  # Really should relate these to classes, but this roundtrips
            obj = ", ".join(['"' + x + '"' for x in splitBy(value, ',')])
            return pred, obj

        unesc = splitBy(value, ';')
        if len(unesc) != 1:
            raise ValueError("Unescaped semicolon in value: " + value)
        unesc = unesc[0]

        obj = None
        if n == 'tel':
            if value[0] != '+':
                print "# @@ Warning: not international form tel: " + value
            obj = '<tel:%s>' % (value.replace(' ', '-'))
        elif n == 'url':
            obj = '<%s>' % (value)
        elif n == 'email':
            obj = '<mailto:%s>' % (value)

        if obj:  # Any case so far is a form of URI
            if classSpec: wr('%s %s.\n' % (obj, classSpec))
            return pred, obj

        elif n in singleTextField:  # Single text
            if classSpec:
                raiseValueError("Unexpected class on %s: %s" %
                                (n, ` classSpec `))
            return pred, stringToN3(unesc, singleLine=0)  # @@@ N3 escaping

        raise ValueError('Unknown tag:' + n)