def register_org(item): """register the organization info, for lookup later.""" global MAX_ORGID # SponsoringOrganization/Name -- fortunately, no conflicts # but there's no data except the name orgname = xmlh.get_tag_val(item, "Name") if orgname in ORGIDS: return ORGIDS[orgname] MAX_ORGID = MAX_ORGID + 1 orgstr = '<Organization>' orgstr += '<organizationID>%d</organizationID>' % (MAX_ORGID) orgstr += '<nationalEIN />' orgstr += '<name>%s</name>' % (xml.sax.saxutils.escape(orgname)) orgstr += '<missionStatement />' orgstr += '<description />' orgstr += '<location>' orgstr += xmlh.output_node("city", item, "City") orgstr += xmlh.output_node("region", item, "StateOrProvince") orgstr += xmlh.output_node("postalCode", item, "ZipOrPostalCode") orgstr += '</location>' orgstr += '<organizationURL />' orgstr += '<donateURL />' orgstr += '<logoURL />' orgstr += '<detailURL />' orgstr += '</Organization>' ORGS[MAX_ORGID] = orgstr ORGIDS[orgname] = MAX_ORGID return MAX_ORGID
def parse(instr, maxrecs, progress): numorgs = numopps = 0 instr = re.sub(r'<(/?db):', r'<\1_', instr) opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>', instr, re.DOTALL) volopps = "" for i, oppstr in enumerate(opps): #if progress and i > 0 and i % 250 == 0: # print str(datetime.now())+": ", i, " opportunities processed." if (maxrecs > 0 and i > maxrecs): break xmlh.print_rps_progress("opps", progress, i, maxrecs) item = xmlh.simple_parser(oppstr, known_elnames, progress=False) orgid = register_org(item) # logoURL -- sigh, this is for the opportunity not the org volopps += '<VolunteerOpportunity>' volopps += xmlh.output_val('volunteerOpportunityID', str(i)) volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid)) volopps += xmlh.output_node('volunteerHubOrganizationID', item, "LocalID") volopps += xmlh.output_node('title', item, "Title") volopps += xmlh.output_node('abstract', item, "Description") volopps += xmlh.output_node('description', item, "Description") volopps += xmlh.output_node('detailURL', item, "DetailURL") volopps += xmlh.output_val('volunteersNeeded', "-8888") try: oppdates = item.getElementsByTagName("OpportunityDate") except: oppdates = [] if len(oppdates) > 1: print datetime.now(), \ "parse_servenet.py: only 1 OpportunityDate supported." #return None oppdate = oppdates[0] elif len(oppdates) == 0: oppdate = None else: oppdate = oppdates[0] volopps += '<dateTimeDurations><dateTimeDuration>' if oppdate: volopps += xmlh.output_val('openEnded', 'No') volopps += xmlh.output_val( 'duration', 'P%s%s' % (xmlh.get_tag_val(oppdate, "DurationQuantity"), xmlh.get_tag_val(oppdate, "DurationUnit"))) volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += xmlh.output_node('startDate', oppdate, "StartDate") volopps += xmlh.output_node('endDate', oppdate, "EndDate") else: volopps += xmlh.output_val('openEnded', 'Yes') volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += '</dateTimeDuration></dateTimeDurations>' volopps += '<locations>' try: opplocs = item.getElementsByTagName("Location") except: opplocs = [] for opploc in opplocs: volopps += '<location>' virtual_tag = opploc.getElementsByTagName("Virtual") if virtual_tag and xmlh.get_tag_val( opploc, "Virtual").lower() == "yes": volopps += xmlh.output_val('virtual', 'Yes') else: volopps += xmlh.output_node('region', opploc, "StateOrProvince") volopps += xmlh.output_node('country', opploc, "Country") volopps += xmlh.output_node('postalCode', opploc, "ZipOrPostalCode") volopps += '</location>' volopps += '</locations>' volopps += '<categoryTags/>' volopps += '</VolunteerOpportunity>' numopps += 1 # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', providerID) outstr += xmlh.output_val('providerName', providerName) outstr += xmlh.output_val('feedID', feedID) outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', providerURL) outstr += xmlh.output_val('description', feedDescription) # TODO: capture ts -- use now?! outstr += '</FeedInfo>' # hardcoded: Organization outstr += '<Organizations>' for key in ORGS: outstr += ORGS[key] numorgs += 1 outstr += '</Organizations>' outstr += '<VolunteerOpportunities>' outstr += volopps outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr) return outstr, numorgs, numopps
def parse(instr, maxrecs, progress): numorgs = numopps = 0 instr = re.sub(r'<(/?db):', r'<\1_', instr) opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>', instr, re.DOTALL) volopps = "" for i, oppstr in enumerate(opps): #if progress and i > 0 and i % 250 == 0: # print str(datetime.now())+": ", i, " opportunities processed." if (maxrecs > 0 and i > maxrecs): break xmlh.print_rps_progress("opps", progress, i, maxrecs) item = xmlh.simple_parser(oppstr, known_elnames, progress=False) orgid = register_org(item) # logoURL -- sigh, this is for the opportunity not the org volopps += '<VolunteerOpportunity>' volopps += xmlh.output_val('volunteerOpportunityID', str(i)) volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid)) volopps += xmlh.output_node('volunteerHubOrganizationID', item, "LocalID") volopps += xmlh.output_node('title', item, "Title") volopps += xmlh.output_node('abstract', item, "Description") volopps += xmlh.output_node('description', item, "Description") volopps += xmlh.output_node('detailURL', item, "DetailURL") volopps += xmlh.output_val('volunteersNeeded', "-8888") try: oppdates = item.getElementsByTagName("OpportunityDate") except: oppdates = [] if len(oppdates) > 1: print datetime.now(), \ "parse_servenet.py: only 1 OpportunityDate supported." #return None oppdate = oppdates[0] elif len(oppdates) == 0: oppdate = None else: oppdate = oppdates[0] volopps += '<dateTimeDurations><dateTimeDuration>' if oppdate: volopps += xmlh.output_val('openEnded', 'No') volopps += xmlh.output_val('duration', 'P%s%s' % (xmlh.get_tag_val(oppdate, "DurationQuantity"), xmlh.get_tag_val(oppdate, "DurationUnit"))) volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += xmlh.output_node('startDate', oppdate, "StartDate") volopps += xmlh.output_node('endDate', oppdate, "EndDate") else: volopps += xmlh.output_val('openEnded', 'Yes') volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += '</dateTimeDuration></dateTimeDurations>' volopps += '<locations>' try: opplocs = item.getElementsByTagName("Location") except: opplocs = [] for opploc in opplocs: volopps += '<location>' virtual_tag = opploc.getElementsByTagName("Virtual") if virtual_tag and xmlh.get_tag_val(opploc, "Virtual").lower() == "yes": volopps += xmlh.output_val('virtual', 'Yes') else: volopps += xmlh.output_node('region', opploc, "StateOrProvince") volopps += xmlh.output_node('country', opploc, "Country") volopps += xmlh.output_node('postalCode', opploc, "ZipOrPostalCode") volopps += '</location>' volopps += '</locations>' volopps += '<categoryTags/>' volopps += '</VolunteerOpportunity>' numopps += 1 # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', providerID) outstr += xmlh.output_val('providerName', providerName) outstr += xmlh.output_val('feedID', feedID) outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', providerURL) outstr += xmlh.output_val('description', feedDescription) # TODO: capture ts -- use now?! outstr += '</FeedInfo>' # hardcoded: Organization outstr += '<Organizations>' for key in ORGS: outstr += ORGS[key] numorgs += 1 outstr += '</Organizations>' outstr += '<VolunteerOpportunities>' outstr += volopps outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr) return outstr, numorgs, numopps