def parse_fast(instr, maxrecs, progress): """fast parser but doesn't check correctness, i.e. must be pre-checked by caller.""" numorgs = numopps = 0 outstr_list = ['<?xml version="1.0" ?>'] outstr_list.append('<FootprintFeed schemaVersion="0.1">') # note: processes Organizations first, so ID lookups work for match in re.finditer(re.compile('<FeedInfo>.+?</FeedInfo>', re.DOTALL), instr): node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False) xmlh.set_default_value(node, node.firstChild, "feedID", "0") set_default_time_elem(node, node.firstChild, "createdDateTime") outstr_list.append(xmlh.prettyxml(node, True)) outstr_list.append('<Organizations>') for match in re.finditer(re.compile('<Organization>.+?</Organization>', re.DOTALL), instr): node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False) numorgs += 1 outstr_list.append(xmlh.prettyxml(node, True)) outstr_list.append('</Organizations>') outstr_list.append('<VolunteerOpportunities>') for match in re.finditer(re.compile( '<VolunteerOpportunity>.+?</VolunteerOpportunity>', re.DOTALL), instr): opp = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False) numopps += 1 if (maxrecs > 0 and numopps > maxrecs): break #if progress and numopps % 250 == 0: # print datetime.now(), ": ", numopps, " records generated." # these set_default_* functions dont do anything if the field # doesnt already exists xmlh.set_default_value(opp, opp, "volunteersNeeded", -8888) xmlh.set_default_value(opp, opp, "paid", "No") xmlh.set_default_value(opp, opp, "sexRestrictedTo", "Neither") xmlh.set_default_value(opp, opp, "language", "English") set_default_time_elem(opp, opp, "lastUpdated") set_default_time_elem(opp, opp, "expires", xmlh.current_ts(DEFAULT_EXPIRATION)) try: opplocs = opp.getElementsByTagName("location") except: opplocs = [] for loc in opplocs: xmlh.set_default_value(opp, loc, "virtual", "No") xmlh.set_default_value(opp, loc, "country", "US") try: dttms = opp.getElementsByTagName("dateTimeDurations") except: dttms = [] for dttm in dttms: # redundant xmlh.set_default_value(opp, dttm, "openEnded", "No") xmlh.set_default_value(opp, dttm, "iCalRecurrence", "") if (dttm.getElementsByTagName("startTime") == None and dttm.getElementsByTagName("endTime") == None): set_default_time_elem(opp, dttm, "timeFlexible", "Yes") else: set_default_time_elem(opp, dttm, "timeFlexible", "No") xmlh.set_default_value(opp, dttm, "openEnded", "No") try: time_elems = opp.getElementsByTagName("startTime") time_elems += opp.getElementsByTagName("endTime") except: time_elems = [] for el in time_elems: xmlh.set_default_attr(opp, el, "olsonTZ", "America/Los_Angeles") str_opp = xmlh.prettyxml(opp, True) outstr_list.append(str_opp) outstr_list.append('</VolunteerOpportunities>') outstr_list.append('</FootprintFeed>') return "".join(outstr_list), numorgs, numopps
def parse_fast(instr, maxrecs, progress): """fast parser but doesn't check correctness, i.e. must be pre-checked by caller.""" numorgs = numopps = 0 outstr_list = ['<?xml version="1.0" ?>'] outstr_list.append('<FootprintFeed schemaVersion="0.1">') # note: processes Organizations first, so ID lookups work for match in re.finditer(re.compile('<FeedInfo>.+?</FeedInfo>', re.DOTALL), instr): node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False) xmlh.set_default_value(node, node.firstChild, "feedID", "0") set_default_time_elem(node, node.firstChild, "createdDateTime") outstr_list.append(xmlh.prettyxml(node, True)) outstr_list.append('<Organizations>') for match in re.finditer( re.compile('<Organization>.+?</Organization>', re.DOTALL), instr): node = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False) numorgs += 1 outstr_list.append(xmlh.prettyxml(node, True)) outstr_list.append('</Organizations>') outstr_list.append('<VolunteerOpportunities>') for match in re.finditer( re.compile('<VolunteerOpportunity>.+?</VolunteerOpportunity>', re.DOTALL), instr): opp = xmlh.simple_parser(match.group(0), KNOWN_ELEMENTS, False) numopps += 1 if (maxrecs > 0 and numopps > maxrecs): break #if progress and numopps % 250 == 0: # print datetime.now(), ": ", numopps, " records generated." # these set_default_* functions dont do anything if the field # doesnt already exists xmlh.set_default_value(opp, opp, "volunteersNeeded", -8888) xmlh.set_default_value(opp, opp, "paid", "No") xmlh.set_default_value(opp, opp, "sexRestrictedTo", "Neither") xmlh.set_default_value(opp, opp, "language", "English") set_default_time_elem(opp, opp, "lastUpdated") set_default_time_elem(opp, opp, "expires", xmlh.current_ts(DEFAULT_EXPIRATION)) try: opplocs = opp.getElementsByTagName("location") except: opplocs = [] for loc in opplocs: xmlh.set_default_value(opp, loc, "virtual", "No") xmlh.set_default_value(opp, loc, "country", "US") try: dttms = opp.getElementsByTagName("dateTimeDurations") except: dttms = [] for dttm in dttms: # redundant xmlh.set_default_value(opp, dttm, "openEnded", "No") xmlh.set_default_value(opp, dttm, "iCalRecurrence", "") if (dttm.getElementsByTagName("startTime") == None and dttm.getElementsByTagName("endTime") == None): set_default_time_elem(opp, dttm, "timeFlexible", "Yes") else: set_default_time_elem(opp, dttm, "timeFlexible", "No") xmlh.set_default_value(opp, dttm, "openEnded", "No") try: time_elems = opp.getElementsByTagName("startTime") time_elems += opp.getElementsByTagName("endTime") except: time_elems = [] for el in time_elems: xmlh.set_default_attr(opp, el, "olsonTZ", "America/Los_Angeles") str_opp = xmlh.prettyxml(opp, True) outstr_list.append(str_opp) outstr_list.append('</VolunteerOpportunities>') outstr_list.append('</FootprintFeed>') return "".join(outstr_list), numorgs, numopps
def set_default_time_elem(parent, entity, tagname, timest=xmlh.current_ts()): """footprint macro.""" cdt = xmlh.set_default_value(parent, entity, tagname, timest) xmlh.set_default_attr(parent, cdt, "olsonTZ", "America/Los_Angeles")