def _toString(x): """ In N3 everything is represented as a string (at the moment), so we need to turn everything into a string without Python encoding. """ if type(x) is type(' '): return stringToN3(stripCR(x)) if type(x) is type(u' '): return stringToN3(stripCR(x)) # @@ sure? if type(x) is type(6): return '"'+`x`+'"' y = `x` if y[:5]=="<time": # Must be better way str = x.Format("%Y-%m-%dT%H:%M:%S%z") if str[:2] == "45": return "" # Null date value - must be better way! @@ return '"' + str + '"' return `x` # @@@ unhandled things
def orderedFields(value, map): cardData = "" beg = 0 for i in range(len(map)): end = beg while 1: end = value.find(";", end) if end>0 and value[end-1] == "\\": end += 1 continue break if end < 0: end = len(value) st = " ".join(splitBy(value[beg:end], ',')) if st: cardData = lineFold(cardData, ' v:%s %s;' % \ (map[i], stringToN3(st, singleLine=1))) beg=end+1 if beg > len(value): break return cardData
def orderedFields(value, map): cardData = "" beg = 0 for i in range(len(map)): end = beg while 1: end = value.find(";", end) if end > 0 and value[end - 1] == "\\": end += 1 continue break if end < 0: end = len(value) st = " ".join(splitBy(value[beg:end], ',')) if st: cardData = lineFold(cardData, ' v:%s %s;' % \ (map[i], stringToN3(st, singleLine=1))) beg = end + 1 if beg > len(value): break return cardData
def representationOf(pair): """ Representation of a thing in the output stream Regenerates genids if required. Uses prefix dictionary to use qname syntax if possible. """ pair = auPair(pair) _flags = '' if "t" not in _flags: if pair == N3_nil: return "()" ty, value = pair singleLine = False if ty == LITERAL: return stringToN3(value, singleLine=singleLine, flags=_flags) if ty == XMLLITERAL: st = Canonicalize(value, None, unsuppressedPrefixes=['foo']) st = stringToN3(st, singleLine=singleLine, flags=_flags) return st + "^^" + representationOf( (SYMBOL, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")) if ty == LITERAL_DT: s, dt = value if "b" not in _flags: if (dt == BOOLEAN_DATATYPE): return toBool(s) and "true" or "false" if "n" not in _flags: dt_uri = dt # dt_uri = dt.uriref() if (dt_uri == INTEGER_DATATYPE): return str(long(s)) if (dt_uri == FLOAT_DATATYPE): retVal = str(float(s)) # numeric value python-normalized if 'e' not in retVal: retVal += 'e+00' return retVal if (dt_uri == DECIMAL_DATATYPE): retVal = str(Decimal(s)) if '.' not in retVal: retVal += '.0' return retVal st = stringToN3(s, singleLine=singleLine, flags=_flags) return st + "^^" + representationOf((SYMBOL, dt)) if ty == LITERAL_LANG: s, lang = value return stringToN3(s, singleLine=singleLine, flags=_flags) + "@" + lang # aid = self._anodeId.get(pair[1], None) # if aid != None: # "a" flag only # return "_:" + aid # Must start with alpha as per NTriples spec. ## if ((ty == ANONYMOUS) ## and not option_noregen and "i" not in self._flags ): ## x = self.regen.get(value, None) ## if x == None: ## x = self.genId() ## self.regen[value] = x ## value = x ### return "<"+x+">" ## ## ## j = string.rfind(value, "#") ## if j<0 and "/" in self._flags: ## j=string.rfind(value, "/") # Allow "/" namespaces as a second best ## ## if (j>=0 ## and "p" not in self._flags): # Suppress use of prefixes? ## for ch in value[j+1:]: # Examples: "." ";" we can't have in qname ## if ch in _notNameChars: ## if verbosity() > 0: ## progress("Cannot have character %i in local name." ## % ord(ch)) ## break ## else: ## namesp = value[:j+1] ## if (self.defaultNamespace ## and self.defaultNamespace == namesp ## and "d" not in self._flags): ## return ":"+value[j+1:] ## self.countNamespace(namesp) ## prefix = self.prefixes.get(namesp, None) # @@ #CONVENTION ## if prefix != None : return prefix + ":" + value[j+1:] ## ## if value[:j] == self.base: # If local to output stream, ## return "<#" + value[j+1:] + ">" # use local frag id ## if "r" not in self._flags and self.base != None: ## value = hexify(refTo(self.base, value)) ## elif "u" in self._flags: value = backslashUify(value) ## else: value = hexify(value) return "<" + value + ">" # Everything else
def predicateObject(n, props, value): "Return a pair of the predicate and object as N3 strings" modifiers = "" datatype = None classes = [] for prop, val in props: if prop == 'type': vals = val.lower() for val in splitBy(vals, ','): if val == 'internet' and n == 'email': pass elif val == 'pref': # Preferred @@ - how represent? pass elif val in typeFields.get(n, []): if relationshipModifiers.get(val, 0): if modifiers: print "# @@ multiple modifiers in: "+line modifiers = val + '-' + modifiers else: classes.append('vc:'+val[0].upper()+val[1:]) else: raise ValueError("Unhandled type %s in: %s" %(val, line)) elif prop == 'value': # This means datatype datatype = val if val == 'date': pass # Date-times from AB certainly look like w3c not iCal dates elif val == 'uri': pass else: raise ValueError ('Unimplemented data type:'+val) elif prop == 'base64' or (prop == 'encoding' and val.lower() == 'b'): value = value.replace(' ','') res = "" while value: res += value[:lineLength] + "\n" value = value[lineLength:] return 'v:'+n, '[ v:base64 """%s"""]\n' % (res) # Special case else: raise ValueError('Unknown property %s with value %s' & (prop, val)) classSpec = "" if classes: classSpec = 'a '+(', '.join(classes)) map = fieldProperties.get(n,None) pred = 'v:%s%s' % (modifiers, n) if map: if classSpec: classSpec = '\n\t'+classSpec if n == 'n': # Special case assert classSpec == "" return '', orderedFields(value, map) # Naked fields - see notes return pred, '[' + orderedFields(value, map) + classSpec + ']' if n == 'version': assert value == "3.0", "value found: "+`value` return "", "" if n == 'x.ablabel': return "", "" # used elsewhere if n == 'categories': # Really should relate these to classes, but this roundtrips obj = ", ".join(['"'+x+'"' for x in splitBy(value, ',')]) return pred, obj unesc = splitBy(value, ';') if len(unesc) != 1: raise ValueError("Unescaped semicolon in value: "+ value) unesc = unesc[0] obj = None if n == 'tel': if value[0] != '+': print "# @@ Warning: not international form tel: "+value obj = '<tel:%s>' % (value.replace(' ','-')) elif n == 'url': obj = '<%s>' % (value) elif n == 'email': obj = '<mailto:%s>' % (value) if obj: # Any case so far is a form of URI if classSpec: wr('%s %s.\n' %(obj, classSpec)) return pred, obj elif n in singleTextField : # Single text if classSpec: raiseValueError("Unexpected class on %s: %s"%(n,`classSpec`)) return pred, stringToN3(unesc, singleLine=0) # @@@ N3 escaping raise ValueError('Unknown tag:'+n)
def representationOf(pair): """ Representation of a thing in the output stream Regenerates genids if required. Uses prefix dictionary to use qname syntax if possible. """ pair = auPair(pair) _flags = '' if "t" not in _flags: if pair == N3_nil: return"()" ty, value = pair singleLine = False if ty == LITERAL: return stringToN3(value, singleLine=singleLine, flags = _flags) if ty == XMLLITERAL: st = Canonicalize(value, None, unsuppressedPrefixes=['foo']) st = stringToN3(st, singleLine=singleLine, flags=_flags) return st + "^^" + representationOf((SYMBOL, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")) if ty == LITERAL_DT: s, dt = value if "b" not in _flags: if (dt == BOOLEAN_DATATYPE): return toBool(s) and "true" or "false" if "n" not in _flags: dt_uri = dt # dt_uri = dt.uriref() if (dt_uri == INTEGER_DATATYPE): return str(long(s)) if (dt_uri == FLOAT_DATATYPE): retVal = str(float(s)) # numeric value python-normalized if 'e' not in retVal: retVal += 'e+00' return retVal if (dt_uri == DECIMAL_DATATYPE): retVal = str(Decimal(s)) if '.' not in retVal: retVal += '.0' return retVal st = stringToN3(s, singleLine= singleLine, flags=_flags) return st + "^^" + representationOf((SYMBOL, dt)) if ty == LITERAL_LANG: s, lang = value return stringToN3(s, singleLine= singleLine, flags=_flags)+ "@" + lang # aid = self._anodeId.get(pair[1], None) # if aid != None: # "a" flag only # return "_:" + aid # Must start with alpha as per NTriples spec. ## if ((ty == ANONYMOUS) ## and not option_noregen and "i" not in self._flags ): ## x = self.regen.get(value, None) ## if x == None: ## x = self.genId() ## self.regen[value] = x ## value = x ### return "<"+x+">" ## ## ## j = string.rfind(value, "#") ## if j<0 and "/" in self._flags: ## j=string.rfind(value, "/") # Allow "/" namespaces as a second best ## ## if (j>=0 ## and "p" not in self._flags): # Suppress use of prefixes? ## for ch in value[j+1:]: # Examples: "." ";" we can't have in qname ## if ch in _notNameChars: ## if verbosity() > 0: ## progress("Cannot have character %i in local name." ## % ord(ch)) ## break ## else: ## namesp = value[:j+1] ## if (self.defaultNamespace ## and self.defaultNamespace == namesp ## and "d" not in self._flags): ## return ":"+value[j+1:] ## self.countNamespace(namesp) ## prefix = self.prefixes.get(namesp, None) # @@ #CONVENTION ## if prefix != None : return prefix + ":" + value[j+1:] ## ## if value[:j] == self.base: # If local to output stream, ## return "<#" + value[j+1:] + ">" # use local frag id ## if "r" not in self._flags and self.base != None: ## value = hexify(refTo(self.base, value)) ## elif "u" in self._flags: value = backslashUify(value) ## else: value = hexify(value) return "<" + value + ">" # Everything else
def convert(path): """Convert LDIF format to n3""" global nochange global verbose global hideMailbox dict = {} print "# http://www.w3.org/DesignIssues/Notation3" print "# Generated from", path print "# Generated by ", version print print "@prefix foaf: <http://xmlns.com/foaf/0.1/>." print "@prefix ldif: <http://www.w3.org/2007/ont/ldif#>." print input = open(path, "r") buf = input.read() # Read the file input.close() nextLine = 0 blank = re.compile(r' *$') # lines = [] inPerson = 0 dataline = re.compile(r'([a-zA-Z0-9_]*): +(.*)') base64line = re.compile(r'([a-zA-Z0-9_]*):: +(.*)') urlline = re.compile(r'([a-zA-Z0-9_]*):<+(.*)') commentLine = re.compile(r'^#.*') asFoaf = { "cn": "foaf:name" } while nextLine < len(buf): # Iterate over lines l = "" while 1: # unfold continuation lines eol = buf.find("\n", nextLine) if eol <0: l += buf[nextLine:] nextLine = len(buf); break if eol+1 < len(buf) and buf[eol+1] == ' ': # DOES LDIF fold lines?? l += buf[nextLine:eol] nextLine = eol+2 # After the '\n ' continue l += buf[nextLine:eol] nextLine = eol+1 break # continue #if l contains encoded jpeg, continue (encoding error in StringToN3) if "jpeg" in l: continue while l and l[-1:] in "\r\n": l = l[:-1] if commentLine.match(l): continue m = blank.match(l) if m: print " ]." inPerson = 0 continue valtype = 'LITERAL' m = dataline.match(l) if m: field = m.group(1) value = m.group(2) else: m = base64line.match(l) if m: field = m.group(1) value = base64.decodestring(m.group(2)) else: m = urlline.match(l) if m: field = m.group(1) value = m.group(2) valtype = 'SYMBOL' if m: if not inPerson: #print subject here? #use dn path to parse into foaf:name or a sioc:User print " [" inPerson = 1 if field == "objectclass": if value == "top": continue # Zero content info print '\ta ldif:%s; '% (value[0:1].upper() + value[1:]) elif field in ["mail", "email", "mozillaSecondEmail"]: ## @@ distinguish? mboxUri = "mailto:" + value hash = binascii.hexlify(sha.new(mboxUri).digest()) print '\tfoaf:mbox_sha1sum %s;' % (stringToN3(hash, singleLine=1)) if not hideMailbox: print '\tfoaf:mbox <%s>;' % (mboxUri) elif field in ["telephoneNumber", "homePhone", 'fax', 'pager', 'mobile']: print '\tldif:%s <tel:%s>;' % (field, value.replace(' ','-')) else: if field == "modifytimestamp" and value == "0Z": continue; # ignore obj = stringToN3(value, singleLine=0) pred = asFoaf.get(field, '\tldif:'+field) if not (hideMailbox and field == "dn"): print '\t%s %s; '% (pred, obj) continue print "# ERROR: Unknown line format:", l
def predicateObject(n, props, value): "Return a pair of the predicate and object as N3 strings" modifiers = "" datatype = None classes = [] for prop, val in props: if prop == 'type': vals = val.lower() for val in splitBy(vals, ','): if val == 'internet' and n == 'email': pass elif val == 'pref': # Preferred @@ - how represent? pass elif val in typeFields.get(n, []): if relationshipModifiers.get(val, 0): if modifiers: print "# @@ multiple modifiers in: " + line modifiers = val + '-' + modifiers else: classes.append('vc:' + val[0].upper() + val[1:]) else: raise ValueError("Unhandled type %s in: %s" % (val, line)) elif prop == 'value': # This means datatype datatype = val if val == 'date': pass # Date-times from AB certainly look like w3c not iCal dates elif val == 'uri': pass else: raise ValueError('Unimplemented data type:' + val) elif prop == 'base64' or (prop == 'encoding' and val.lower() == 'b'): value = value.replace(' ', '') res = "" while value: res += value[:lineLength] + "\n" value = value[lineLength:] return 'v:' + n, '[ v:base64 """%s"""]\n' % (res ) # Special case else: raise ValueError('Unknown property %s with value %s' & (prop, val)) classSpec = "" if classes: classSpec = 'a ' + (', '.join(classes)) map = fieldProperties.get(n, None) pred = 'v:%s%s' % (modifiers, n) if map: if classSpec: classSpec = '\n\t' + classSpec if n == 'n': # Special case assert classSpec == "" return '', orderedFields(value, map) # Naked fields - see notes return pred, '[' + orderedFields(value, map) + classSpec + ']' if n == 'version': assert value == "3.0", "value found: " + ` value ` return "", "" if n == 'x.ablabel': return "", "" # used elsewhere if n == 'categories': # Really should relate these to classes, but this roundtrips obj = ", ".join(['"' + x + '"' for x in splitBy(value, ',')]) return pred, obj unesc = splitBy(value, ';') if len(unesc) != 1: raise ValueError("Unescaped semicolon in value: " + value) unesc = unesc[0] obj = None if n == 'tel': if value[0] != '+': print "# @@ Warning: not international form tel: " + value obj = '<tel:%s>' % (value.replace(' ', '-')) elif n == 'url': obj = '<%s>' % (value) elif n == 'email': obj = '<mailto:%s>' % (value) if obj: # Any case so far is a form of URI if classSpec: wr('%s %s.\n' % (obj, classSpec)) return pred, obj elif n in singleTextField: # Single text if classSpec: raiseValueError("Unexpected class on %s: %s" % (n, ` classSpec `)) return pred, stringToN3(unesc, singleLine=0) # @@@ N3 escaping raise ValueError('Unknown tag:' + n)