def findComponents(lines, container, components=[]): """return a list of (name, props, subcomponents) and remaining lines # @@TODO: make this a generator """ props = [] subs = [] while 1: try: n, p, v = parseLine(lines.next(), downcase=False) except StopIteration: break #print "finding...", n, p, v #print >>sys.stderr, "began", container, n, p, v if n == 'END': # @@hmm... found an extra space after END:DAYLIGHT # in test/20030410querymtg.ics # where did that come from? allow it, or fix test data? v = v.rstrip().upper() if v != container: raise ValueError, 'expected "%s" but found "%s"' % \ (container, v) components.append((container, props, subs)) return elif n == 'BEGIN': findComponents(lines, v, subs) else: props.append((n, p, v))
def findComponents(lines, container, components=[]): """return a list of (name, props, subcomponents) and remaining lines # @@TODO: make this a generator """ props = [] subs = [] while 1: try: n, p, v = parseLine(lines.next(), downcase=False) except StopIteration: break # print "finding...", n, p, v # print >>sys.stderr, "began", container, n, p, v if n == "END": # @@hmm... found an extra space after END:DAYLIGHT # in test/20030410querymtg.ics # where did that come from? allow it, or fix test data? v = v.rstrip().upper() if v != container: raise ValueError, 'expected "%s" but found "%s"' % (container, v) components.append((container, props, subs)) return elif n == "BEGIN": findComponents(lines, v, subs) else: props.append((n, p, v))
def _test(): import sys from pprint import pprint import doctest, fromIcal doctest.testmod(fromIcal) lines = unbreak(open(sys.argv[1])) n, p, v = parseLine(lines.next()) c = [] findComponents(lines, v, c) pprint(c)
def interpret(sx, fp, base=None, suppressed=[]): lines = unbreak(fp) n, p, v = parseLine(lines.next()) if v != "VCALENDAR": raise SyntaxError("Expected CALENDAR but found: %s" % (v)) calendars = [] findComponents(lines, v, calendars) attrs = {} RDF.bindAttr("rdf", attrs) iCalendar.bindAttr("", attrs) if base: attrs["xml:base"] = base sx.startElement("rdf:RDF", attrs) doComponents(sx, calendars, iCalendarDefs, suppressed=suppressed) sx.endElement("rdf:RDF")
def interpret(sx, fp, base=None, suppressed=[]): lines = unbreak(fp) n, p, v = parseLine(lines.next()) if v != 'VCALENDAR': raise SyntaxError('Expected CALENDAR but found: %s' % (v)) calendars = [] findComponents(lines, v, calendars) attrs = {} RDF.bindAttr('rdf', attrs) iCalendar.bindAttr('', attrs) if base: attrs['xml:base'] = base sx.startElement('rdf:RDF', attrs) doComponents(sx, calendars, iCalendarDefs, suppressed=suppressed) sx.endElement('rdf:RDF')
def extract(path): global nochange global verbose total = 0 wr( """# n3 http://www.w3.org/DesignIssues/Notation3. # From vCard data in %s # Extracted by $Id: ics2txt.py,v 1.1 2014-07-23 13:23:05 timbl Exp $ @prefix : <#>. @prefix loc: <#loc_>. @prefix s: <http://www.w3.org/2000/01/rdf-schema#> . @prefix log: <http://www.w3.org/2000/10/swap/log#>. @prefix v: <http://www.w3.org/2006/vcard/ns#>. @prefix vc: <http://www.w3.org/2006/vcard/class#>. @prefix abl: <http://www.w3.org/2006/vcard/abl#>. @prefix user: <#>. """ % path) input = open(path, "r") b = input.read() input.close() wr("# Length: " + `len(b)`+ "starts ") wr(" ".join(["%2x"%ord(ch) for ch in b[:8]])) wr("\n") if ord(b[0])==0 and ord(b[1]) == ord('B'): # UTF16 with MSB byte order unmarked d = "\xfe\xff" # Add byte order mark buf = (d+b).decode('utf-16') wr( " #Warning: UTF-16 was not byte order marked.\n") else: buf = b.decode('utf-8') group_line = re.compile(r'^([a-zA-Z0-9]+)\.(.*)') field_value = re.compile(r'^([A-Za-z0-9_-]*):(.*)') group = None # cardData = "" groupData = {} groupPred = {} def readBareLine(buf): "Return None for EOF or a line, even including a final widowed line" global bufpo # can't pass by ref begin = bufpo if begin == len(buf): return None bufpo = buf.find('\n', begin) if bufpo < 0: bufpo = len(buf) line = buf[begin:] else: line = buf[begin:bufpo] bufpo += 1 # After \n while line [-1:] == "\r": line = line[:-1] # Strip triling CRs return line def startGroup(g): # print "# Start group <%s>" % `g` groupData[g] = [] # Pairs of p and o groupPred[g] = "loc:%s" % munge(g) # Unless overwritten def endGroup(g): #print "# End group <%s> data:%s" % (`g`, groupData[g]) pos = groupData[g] kludges, data = [], [] for i in range(len(pos)): p, o = pos[i] if p in kludgeTags: kludges.append((p,o)) else: data.append((p,o)) if len(data) == 1: # The AddressBook model, one data item + kludges dp, do = data[0] for p,o in kludges: if p == 'v:x-ablabel': dp = o # Override predicate if p == 'v:x-abadr': assert do[-1:] == ']'; do = do[:-1]+' '+ p +' '+ o +';]' # annnotate object return "%s %s;\n" % (dp, do) if len(kludges) != 0: raise ValueError("Unknown Group pattern:"+`pos`) res = "" for p,o in pos: res += " %s %s;" %(p,o) return "%s [ # %s\n%s];\n" % (groupPred[g], g, res) def lineFold(str1, str2): x = str1.rfind('\n') if x < 0: x = 0 if len(str1) - x + len(str2) > lineLength: return str1+ "\n\t" + str2 return str1 + str2 def orderedFields(value, map): cardData = "" beg = 0 for i in range(len(map)): end = beg while 1: end = value.find(";", end) if end>0 and value[end-1] == "\\": end += 1 continue break if end < 0: end = len(value) st = " ".join(splitBy(value[beg:end], ',')) if st: cardData = lineFold(cardData, ' v:%s %s;' % \ (map[i], stringToN3(st, singleLine=1))) beg=end+1 if beg > len(value): break return cardData def predicateObject(n, props, value): "Return a pair of the predicate and object as N3 strings" modifiers = "" datatype = None classes = [] for prop, val in props: if prop == 'type': vals = val.lower() for val in splitBy(vals, ','): if val == 'internet' and n == 'email': pass elif val == 'pref': # Preferred @@ - how represent? pass elif val in typeFields.get(n, []): if relationshipModifiers.get(val, 0): if modifiers: print "# @@ multiple modifiers in: "+line modifiers = val + '-' + modifiers else: classes.append('vc:'+val[0].upper()+val[1:]) else: raise ValueError("Unhandled type %s in: %s" %(val, line)) elif prop == 'value': # This means datatype datatype = val if val == 'date': pass # Date-times from AB certainly look like w3c not iCal dates elif val == 'uri': pass else: raise ValueError ('Unimplemented data type:'+val) elif prop == 'base64' or (prop == 'encoding' and val.lower() == 'b'): value = value.replace(' ','') res = "" while value: res += value[:lineLength] + "\n" value = value[lineLength:] return 'v:'+n, '[ v:base64 """%s"""]\n' % (res) # Special case else: raise ValueError('Unknown property %s with value %s' & (prop, val)) classSpec = "" if classes: classSpec = 'a '+(', '.join(classes)) map = fieldProperties.get(n,None) pred = 'v:%s%s' % (modifiers, n) if map: if classSpec: classSpec = '\n\t'+classSpec if n == 'n': # Special case assert classSpec == "" return '', orderedFields(value, map) # Naked fields - see notes return pred, '[' + orderedFields(value, map) + classSpec + ']' if n == 'version': assert value == "3.0", "value found: "+`value` return "", "" if n == 'x.ablabel': return "", "" # used elsewhere if n == 'categories': # Really should relate these to classes, but this roundtrips obj = ", ".join(['"'+x+'"' for x in splitBy(value, ',')]) return pred, obj unesc = splitBy(value, ';') if len(unesc) != 1: raise ValueError("Unescaped semicolon in value: "+ value) unesc = unesc[0] obj = None if n == 'tel': if value[0] != '+': print "# @@ Warning: not international form tel: "+value obj = '<tel:%s>' % (value.replace(' ','-')) elif n == 'url': obj = '<%s>' % (value) elif n == 'email': obj = '<mailto:%s>' % (value) if obj: # Any case so far is a form of URI if classSpec: wr('%s %s.\n' %(obj, classSpec)) return pred, obj elif n in singleTextField : # Single text if classSpec: raiseValueError("Unexpected class on %s: %s"%(n,`classSpec`)) return pred, stringToN3(unesc, singleLine=0) # @@@ N3 escaping raise ValueError('Unknown tag:'+n) global bufpo bufpo = 0 nextLine = readBareLine(buf) while 1: line = nextLine while 1: nextLine = readBareLine(buf) if not nextLine or nextLine[0] != ' ': break line += nextLine[1:] if line is None : break # EOF # wr( "# line: " +line[:100]) m = group_line.match(line) if m: g = m.group(1) line = m.group(2) if group != g: if group is not None: cardData += endGroup(group) if g is not None: startGroup(g) group = g n, props, value = icslex.parseLine(line) # for prop, val in props: # if prop == 'type': # val = val.lower() # if group .startswith("item"): # AB hack # groupPred[group] = "loc:"+val if n == 'x-ablabel': pred = 'v:'+n if value[:4] == "_$!<" and value[-4:] == ">!$_": # [sic] obj = "abl:"+munge(value[4:-4]).lower() else: # User generated obj = "user:"******"" cardID = "[]" elif n == "uid": cardID = "<uid:%s>" % value elif n == 'end': wr("%s %s." % (cardID, cardData)) else: if n == 'n': # ugh special case map = fieldProperties.get(n,None) cardData += orderedFields(value, map) +'\n' # Naked fields - see notes else: p, o = predicateObject(n, props, value) if p: cardData+= " %s %s;\n" %(p, o) wr("\n\n#ends\n") input.close()
def extract(path): global nochange global verbose total = 0 wr("""# n3 http://www.w3.org/DesignIssues/Notation3. # From vCard data in %s # Extracted by $Id: ics2txt.py,v 1.1 2014-07-23 13:23:05 timbl Exp $ @prefix : <#>. @prefix loc: <#loc_>. @prefix s: <http://www.w3.org/2000/01/rdf-schema#> . @prefix log: <http://www.w3.org/2000/10/swap/log#>. @prefix v: <http://www.w3.org/2006/vcard/ns#>. @prefix vc: <http://www.w3.org/2006/vcard/class#>. @prefix abl: <http://www.w3.org/2006/vcard/abl#>. @prefix user: <#>. """ % path) input = open(path, "r") b = input.read() input.close() wr("# Length: " + ` len(b) ` + "starts ") wr(" ".join(["%2x" % ord(ch) for ch in b[:8]])) wr("\n") if ord(b[0]) == 0 and ord( b[1]) == ord('B'): # UTF16 with MSB byte order unmarked d = "\xfe\xff" # Add byte order mark buf = (d + b).decode('utf-16') wr(" #Warning: UTF-16 was not byte order marked.\n") else: buf = b.decode('utf-8') group_line = re.compile(r'^([a-zA-Z0-9]+)\.(.*)') field_value = re.compile(r'^([A-Za-z0-9_-]*):(.*)') group = None # cardData = "" groupData = {} groupPred = {} def readBareLine(buf): "Return None for EOF or a line, even including a final widowed line" global bufpo # can't pass by ref begin = bufpo if begin == len(buf): return None bufpo = buf.find('\n', begin) if bufpo < 0: bufpo = len(buf) line = buf[begin:] else: line = buf[begin:bufpo] bufpo += 1 # After \n while line[-1:] == "\r": line = line[:-1] # Strip triling CRs return line def startGroup(g): # print "# Start group <%s>" % `g` groupData[g] = [] # Pairs of p and o groupPred[g] = "loc:%s" % munge(g) # Unless overwritten def endGroup(g): #print "# End group <%s> data:%s" % (`g`, groupData[g]) pos = groupData[g] kludges, data = [], [] for i in range(len(pos)): p, o = pos[i] if p in kludgeTags: kludges.append((p, o)) else: data.append((p, o)) if len(data) == 1: # The AddressBook model, one data item + kludges dp, do = data[0] for p, o in kludges: if p == 'v:x-ablabel': dp = o # Override predicate if p == 'v:x-abadr': assert do[-1:] == ']' do = do[:-1] + ' ' + p + ' ' + o + ';]' # annnotate object return "%s %s;\n" % (dp, do) if len(kludges) != 0: raise ValueError("Unknown Group pattern:" + ` pos `) res = "" for p, o in pos: res += " %s %s;" % (p, o) return "%s [ # %s\n%s];\n" % (groupPred[g], g, res) def lineFold(str1, str2): x = str1.rfind('\n') if x < 0: x = 0 if len(str1) - x + len(str2) > lineLength: return str1 + "\n\t" + str2 return str1 + str2 def orderedFields(value, map): cardData = "" beg = 0 for i in range(len(map)): end = beg while 1: end = value.find(";", end) if end > 0 and value[end - 1] == "\\": end += 1 continue break if end < 0: end = len(value) st = " ".join(splitBy(value[beg:end], ',')) if st: cardData = lineFold(cardData, ' v:%s %s;' % \ (map[i], stringToN3(st, singleLine=1))) beg = end + 1 if beg > len(value): break return cardData def predicateObject(n, props, value): "Return a pair of the predicate and object as N3 strings" modifiers = "" datatype = None classes = [] for prop, val in props: if prop == 'type': vals = val.lower() for val in splitBy(vals, ','): if val == 'internet' and n == 'email': pass elif val == 'pref': # Preferred @@ - how represent? pass elif val in typeFields.get(n, []): if relationshipModifiers.get(val, 0): if modifiers: print "# @@ multiple modifiers in: " + line modifiers = val + '-' + modifiers else: classes.append('vc:' + val[0].upper() + val[1:]) else: raise ValueError("Unhandled type %s in: %s" % (val, line)) elif prop == 'value': # This means datatype datatype = val if val == 'date': pass # Date-times from AB certainly look like w3c not iCal dates elif val == 'uri': pass else: raise ValueError('Unimplemented data type:' + val) elif prop == 'base64' or (prop == 'encoding' and val.lower() == 'b'): value = value.replace(' ', '') res = "" while value: res += value[:lineLength] + "\n" value = value[lineLength:] return 'v:' + n, '[ v:base64 """%s"""]\n' % (res ) # Special case else: raise ValueError('Unknown property %s with value %s' & (prop, val)) classSpec = "" if classes: classSpec = 'a ' + (', '.join(classes)) map = fieldProperties.get(n, None) pred = 'v:%s%s' % (modifiers, n) if map: if classSpec: classSpec = '\n\t' + classSpec if n == 'n': # Special case assert classSpec == "" return '', orderedFields(value, map) # Naked fields - see notes return pred, '[' + orderedFields(value, map) + classSpec + ']' if n == 'version': assert value == "3.0", "value found: " + ` value ` return "", "" if n == 'x.ablabel': return "", "" # used elsewhere if n == 'categories': # Really should relate these to classes, but this roundtrips obj = ", ".join(['"' + x + '"' for x in splitBy(value, ',')]) return pred, obj unesc = splitBy(value, ';') if len(unesc) != 1: raise ValueError("Unescaped semicolon in value: " + value) unesc = unesc[0] obj = None if n == 'tel': if value[0] != '+': print "# @@ Warning: not international form tel: " + value obj = '<tel:%s>' % (value.replace(' ', '-')) elif n == 'url': obj = '<%s>' % (value) elif n == 'email': obj = '<mailto:%s>' % (value) if obj: # Any case so far is a form of URI if classSpec: wr('%s %s.\n' % (obj, classSpec)) return pred, obj elif n in singleTextField: # Single text if classSpec: raiseValueError("Unexpected class on %s: %s" % (n, ` classSpec `)) return pred, stringToN3(unesc, singleLine=0) # @@@ N3 escaping raise ValueError('Unknown tag:' + n) global bufpo bufpo = 0 nextLine = readBareLine(buf) while 1: line = nextLine while 1: nextLine = readBareLine(buf) if not nextLine or nextLine[0] != ' ': break line += nextLine[1:] if line is None: break # EOF # wr( "# line: " +line[:100]) m = group_line.match(line) if m: g = m.group(1) line = m.group(2) if group != g: if group is not None: cardData += endGroup(group) if g is not None: startGroup(g) group = g n, props, value = icslex.parseLine(line) # for prop, val in props: # if prop == 'type': # val = val.lower() # if group .startswith("item"): # AB hack # groupPred[group] = "loc:"+val if n == 'x-ablabel': pred = 'v:' + n if value[:4] == "_$!<" and value[-4:] == ">!$_": # [sic] obj = "abl:" + munge(value[4:-4]).lower() else: # User generated obj = "user:"******"" cardID = "[]" elif n == "uid": cardID = "<uid:%s>" % value elif n == 'end': wr("%s %s." % (cardID, cardData)) else: if n == 'n': # ugh special case map = fieldProperties.get(n, None) cardData += orderedFields( value, map) + '\n' # Naked fields - see notes else: p, o = predicateObject(n, props, value) if p: cardData += " %s %s;\n" % (p, o) wr("\n\n#ends\n") input.close()