def register_org(item): """register the organization info, for lookup later.""" global MAX_ORGID # SponsoringOrganization/Name -- fortunately, no conflicts # but there's no data except the name orgname = xmlh.get_tag_val(item, "Name") if orgname in ORGIDS: return ORGIDS[orgname] MAX_ORGID = MAX_ORGID + 1 orgstr = '<Organization>' orgstr += '<organizationID>%d</organizationID>' % (MAX_ORGID) orgstr += '<nationalEIN />' orgstr += '<name>%s</name>' % (xml.sax.saxutils.escape(orgname)) orgstr += '<missionStatement />' orgstr += '<description />' orgstr += '<location>' orgstr += xmlh.output_node("city", item, "City") orgstr += xmlh.output_node("region", item, "StateOrProvince") orgstr += xmlh.output_node("postalCode", item, "ZipOrPostalCode") orgstr += '</location>' orgstr += '<organizationURL />' orgstr += '<donateURL />' orgstr += '<logoURL />' orgstr += '<detailURL />' orgstr += '</Organization>' ORGS[MAX_ORGID] = orgstr ORGIDS[orgname] = MAX_ORGID return MAX_ORGID
def tag_by_date_range(self, rec, feedinfo): rtn = 0.0 if xmlh.get_tag_val(feedinfo, "providerID") in self.omitted_providers: return 0.0 str_start_date = rec.get_val("startDate") if len(str_start_date) > 0: try: start_date = datetime.strptime(str_start_date, "%m/%d/%Y") except: try: start_date = datetime.strptime(str_start_date, "%Y-%m-%d") except: start_date = None if start_date is not None and start_date >= self.date_range_start and start_date <= self.date_range_end: rtn = 1.0 str_end_date = rec.get_val("endDate") if rtn < 1.0 and len(str_end_date) > 0: try: end_date = datetime.strptime(str_end_date, "%m/%d/%Y") except: try: end_date = datetime.strptime(end_date, "%Y-%m-%d") except: end_date = None if end_date is not None and end_date >= self.date_range_start and end_date <= self.date_range_end: rtn = 1.0 return rtn
def parse(s, maxrecs, progress): """return FPXML given volunteermatch data""" # TODO: progress known_elnames = ['feed', 'title', 'subtitle', 'div', 'span', 'updated', 'id', 'link', 'icon', 'logo', 'author', 'name', 'uri', 'email', 'rights', 'entry', 'published', 'g:publish_date', 'g:expiration_date', 'g:event_date_range', 'g:start', 'g:end', 'updated', 'category', 'summary', 'content', 'awb:city', 'awb:country', 'awb:state', 'awb:postalcode', 'g:location', 'g:age_range', 'g:employer', 'g:job_type', 'g:job_industry', 'awb:paid', ] xmldoc = xmlh.simple_parser(s, known_elnames, progress) pubdate = xmlh.get_tag_val(xmldoc, "created") ts = dateutil.parser.parse(pubdate) pubdate = ts.strftime("%Y-%m-%dT%H:%M:%S") # convert to footprint format s = '<?xml version="1.0" ?>' s += '<FootprintFeed schemaVersion="0.1">' s += '<FeedInfo>' # TODO: assign provider IDs? s += '<providerID>104</providerID>' s += '<providerName>volunteermatch.org</providerName>' s += '<feedID>1</feedID>' s += '<providerURL>http://www.volunteermatch.org/</providerURL>' s += '<createdDateTime>%s</createdDateTime>' % (pubdate) s += '<description></description>' s += '</FeedInfo>' numorgs = numopps = 0 # hardcoded: Organization s += '<Organizations>' items = xmldoc.getElementsByTagName("listing") if (maxrecs > items.length or maxrecs == -1): maxrecs = items.length for item in items[0:maxrecs]: orgs = item.getElementsByTagName("parent") if (orgs.length == 1): org = orgs[0] s += '<Organization>' s += '<organizationID>%s</organizationID>' % (xmlh.get_tag_val(org, "key")) s += '<nationalEIN></nationalEIN>' s += '<name>%s</name>' % (xmlh.get_tag_val(org, "name")) s += '<missionStatement></missionStatement>' s += '<description></description>' s += '<location><city></city><region></region><postalCode></postalCode></location>' s += '<organizationURL>%s</organizationURL>' % (xmlh.get_tag_val(org, "URL")) s += '<donateURL></donateURL>' s += '<logoURL></logoURL>' s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(org, "detailURL")) s += '</Organization>' numorgs += 1 else: print datetime.now(), "parse_volunteermatch: listing does not have an organization" return None s += '</Organizations>' s += '<VolunteerOpportunities>' items = xmldoc.getElementsByTagName("listing") for item in items[0:maxrecs]: s += '<VolunteerOpportunity>' s += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (xmlh.get_tag_val(item, "key")) orgs = item.getElementsByTagName("parent") if (orgs.length == 1): org = orgs[0] s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (xmlh.get_tag_val(org, "key")) else: s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>' print datetime.now(), "parse_volunteermatch: listing does not have an organization" s += '<title>%s</title>' % (xmlh.get_tag_val(item, "title")) s += '<volunteersNeeded>-8888</volunteersNeeded>' s += '<dateTimeDurations><dateTimeDuration>' durations = xmlh.get_children_by_tagname(item, "duration") if (len(durations) == 1): duration = durations[0] ongoing = duration.getAttribute("ongoing") if (ongoing == 'true'): s += '<openEnded>Yes</openEnded>' else: s += '<openEnded>No</openEnded>' listingTimes = duration.getElementsByTagName("listingTime") if (listingTimes.length == 1): listingTime = listingTimes[0] s += '<startTime>%s</startTime>' % (xmlh.get_tag_val(listingTime, "startTime")) s += '<endTime>%s</endTime>' % (xmlh.get_tag_val(listingTime, "endTime")) else: print datetime.now(), "parse_volunteermatch: number of durations in item != 1" return None commitments = item.getElementsByTagName("commitment") l_period = l_duration = "" if (commitments.length == 1): commitment = commitments[0] l_num = xmlh.get_tag_val(commitment, "num") l_duration = xmlh.get_tag_val(commitment, "duration") l_period = xmlh.get_tag_val(commitment, "period") if ((l_duration == "hours") and (l_period == "week")): s += '<commitmentHoursPerWeek>' + l_num + '</commitmentHoursPerWeek>' elif ((l_duration == "hours") and (l_period == "day")): # note: weekdays only s += '<commitmentHoursPerWeek>' + str(int(l_num)*5) + '</commitmentHoursPerWeek>' elif ((l_duration == "hours") and (l_period == "month")): hrs = int(float(l_num)/4.0) if hrs < 1: hrs = 1 s += '<commitmentHoursPerWeek>' + str(hrs) + '</commitmentHoursPerWeek>' elif ((l_duration == "hours") and (l_period == "event")): # TODO: ignore for now, later compute the endTime if not already provided pass else: print datetime.now(), "parse_volunteermatch: commitment given in units != hours/week: ", l_duration, "per", l_period s += '</dateTimeDuration></dateTimeDurations>' dbaddresses = item.getElementsByTagName("location") if (dbaddresses.length != 1): print datetime.now(), "parse_volunteermatch: only 1 location supported." return None dbaddress = dbaddresses[0] s += '<locations><location>' s += '<streetAddress1>%s</streetAddress1>' % (xmlh.get_tag_val(dbaddress, "street1")) s += '<city>%s</city>' % (xmlh.get_tag_val(dbaddress, "city")) s += '<region>%s</region>' % (xmlh.get_tag_val(dbaddress, "region")) s += '<postalCode>%s</postalCode>' % (xmlh.get_tag_val(dbaddress, "postalCode")) geolocs = item.getElementsByTagName("geolocation") if (geolocs.length == 1): geoloc = geolocs[0] s += '<latitude>%s</latitude>' % (xmlh.get_tag_val(geoloc, "latitude")) s += '<longitude>%s</longitude>' % (xmlh.get_tag_val(geoloc, "longitude")) s += '</location></locations>' s += '<audienceTags>' audiences = item.getElementsByTagName("audience") for audience in audiences: type = xmlh.node_data(audience) s += '<audienceTag>%s</audienceTag>' % (type) s += '</audienceTags>' s += '<categoryTags>' categories = item.getElementsByTagName("category") for category in categories: type = xmlh.node_data(category) s += '<categoryTag>%s</categoryTag>' % (type) s += '</categoryTags>' s += '<skills>%s</skills>' % (xmlh.get_tag_val(item, "skill")) s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(item, "detailURL")) s += '<description>%s</description>' % (xmlh.get_tag_val(item, "description")) expires = xmlh.get_tag_val(item, "expires") ts = dateutil.parser.parse(expires) expires = ts.strftime("%Y-%m-%dT%H:%M:%S") s += '<expires>%s</expires>' % (expires) s += '</VolunteerOpportunity>' numopps += 1 s += '</VolunteerOpportunities>' s += '</FootprintFeed>' #s = re.sub(r'><([^/])', r'>\n<\1', s) #print(s) return s, numorgs, numopps
def parse(instr, maxrec, progress): """return FPXML given 350.org data""" feed = xmlh.parse_or_die(instr.encode('utf-8')) org_id = str(139) mission_statement = "350.org is an international campaign that's building a movement to unite the world around solutions to the climate crisis--the solutions that science and justice demand." org_desc = "On October 10 we'll be helping host a Global Work Party, with thousands of communities setting up solar panels or digging community gardens or laying out bike paths." start_date = '2010-10-01' today = datetime.now() last_updated = today.strftime("%Y-%m-%dT%H:%M:%S") numorgs = 1 numopps = 0 xmlh.print_progress("loading 350.org custom XML...") # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', org_id) outstr += xmlh.output_val('providerName', "350org") outstr += xmlh.output_val('feedID', "350org") outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', "http://www.350.org/") outstr += '</FeedInfo>' # 1 "organization" in 350.org postings outstr += '<Organizations><Organization>' outstr += xmlh.output_val('organizationID', org_id) outstr += '<nationalEIN></nationalEIN>' outstr += '<name>350.org</name>' outstr += xmlh.output_val('missionStatement', mission_statement) outstr += xmlh.output_val('description', org_desc) outstr += '<location>' outstr += xmlh.output_val("city", "") outstr += xmlh.output_val("region", "") outstr += xmlh.output_val("postalCode", "") outstr += '</location>' # TODO: make these variables outstr += '<organizationURL>http://www.350.org/</organizationURL>' outstr += '<donateURL>http://www.350.org/donate</donateURL>' outstr += '<logoURL>http://www.350.org/sites/all/themes/threefifty/logo.gif</logoURL>' outstr += '<detailURL>http://www.350.org/about</detailURL>' outstr += '</Organization></Organizations>' outstr += '\n<VolunteerOpportunities>\n' nodes = feed.getElementsByTagName('node') for i, node in enumerate(nodes): if maxrec > 0 and i > maxrec: break title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>' desc = '<![CDATA[' + xmlh.get_tag_val(node, "Body") + ']]>' url = xmlh.get_tag_val(node, "Link") lat = xmlh.get_tag_val(node, "Latitude") lng = xmlh.get_tag_val(node, "Longitude") start_datetime = xmlh.get_tag_val(node, "Start_Date") start_time = None if not start_datetime: start_date = "2010-10-10" else: start_datetime = start_datetime.replace(" (All day)", "T00:00:00") dt = start_datetime.split("T") start_date = dt[0][0:10] if len(dt) > 1: start_time = dt[1] end_datetime = xmlh.get_tag_val(node, "End_Date") end_time = None if not end_datetime: open_ended = True else: open_ended = False end_datetime = end_datetime.replace(" (All day)", "T23:00:00") dt = end_datetime.split("T") end_date = dt[0][0:10] if len(dt) > 1: end_time = dt[1] end_datetime = xmlh.get_tag_val(node, "End_Date") locstr = "%s, %s %s" % (xmlh.get_tag_val( node, "City"), xmlh.get_tag_val( node, "Province"), xmlh.get_tag_val(node, "Country")) outstr += '<VolunteerOpportunity>' outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % ( str(i)) outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % ( org_id) outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % ( org_id) outstr += '<title>%s</title>' % (title) outstr += '<detailURL>%s</detailURL>' % (url) outstr += '<description>%s</description>' % (desc) outstr += '<abstract>%s</abstract>' % (desc) outstr += '<lastUpdated>%s</lastUpdated>' % (last_updated) outstr += '<locations><location>' outstr += '<location_string>%s</location_string>' % (locstr) outstr += '<latitude>%s</latitude>' % (lat) outstr += '<longitude>%s</longitude>' % (lng) outstr += '</location></locations>' outstr += '<dateTimeDurations><dateTimeDuration>' outstr += '<startDate>%s</startDate>' % (start_date) if start_time: outstr += '<startTime>%s</startTime>' % (start_time) if open_ended: outstr += '<openEnded>Yes</openEnded>' else: outstr += '<openEnded>No</openEnded>' outstr += '<endDate>%s</endDate>' % (end_date) if end_time: outstr += '<endTime>%s</endTime>' % (end_time) outstr += '</dateTimeDuration></dateTimeDurations>' outstr += '</VolunteerOpportunity>\n' numopps += 1 outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' return outstr, numorgs, numopps
def parse(instr, maxrec, progress): """return FPXML given 350.org data""" feed = xmlh.parse_or_die(instr.encode('utf-8')) org_id = str(139) mission_statement = "350.org is an international campaign that's building a movement to unite the world around solutions to the climate crisis--the solutions that science and justice demand." org_desc = "On October 10 we'll be helping host a Global Work Party, with thousands of communities setting up solar panels or digging community gardens or laying out bike paths." start_date = '2010-10-01' today = datetime.now() last_updated = today.strftime("%Y-%m-%dT%H:%M:%S") numorgs = 1 numopps = 0 xmlh.print_progress("loading 350.org custom XML...") # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', org_id) outstr += xmlh.output_val('providerName', "350org") outstr += xmlh.output_val('feedID', "350org") outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', "http://www.350.org/") outstr += '</FeedInfo>' # 1 "organization" in 350.org postings outstr += '<Organizations><Organization>' outstr += xmlh.output_val('organizationID', org_id) outstr += '<nationalEIN></nationalEIN>' outstr += '<name>350.org</name>' outstr += xmlh.output_val('missionStatement', mission_statement) outstr += xmlh.output_val('description', org_desc) outstr += '<location>' outstr += xmlh.output_val("city", "") outstr += xmlh.output_val("region", "") outstr += xmlh.output_val("postalCode", "") outstr += '</location>' # TODO: make these variables outstr += '<organizationURL>http://www.350.org/</organizationURL>' outstr += '<donateURL>http://www.350.org/donate</donateURL>' outstr += '<logoURL>http://www.350.org/sites/all/themes/threefifty/logo.gif</logoURL>' outstr += '<detailURL>http://www.350.org/about</detailURL>' outstr += '</Organization></Organizations>' outstr += '\n<VolunteerOpportunities>\n' nodes = feed.getElementsByTagName('node') for i, node in enumerate(nodes): if maxrec > 0 and i > maxrec: break title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>' desc = '<![CDATA[' + xmlh.get_tag_val(node, "Body") + ']]>' url = xmlh.get_tag_val(node, "Link") lat = xmlh.get_tag_val(node, "Latitude") lng = xmlh.get_tag_val(node, "Longitude") start_datetime = xmlh.get_tag_val(node, "Start_Date") start_time = None if not start_datetime: start_date = "2010-10-10" else: start_datetime = start_datetime.replace(" (All day)", "T00:00:00") dt = start_datetime.split("T") start_date = dt[0][0:10] if len(dt) > 1: start_time = dt[1] end_datetime = xmlh.get_tag_val(node, "End_Date") end_time = None if not end_datetime: open_ended = True else: open_ended = False end_datetime = end_datetime.replace(" (All day)", "T23:00:00") dt = end_datetime.split("T") end_date = dt[0][0:10] if len(dt) > 1: end_time = dt[1] end_datetime = xmlh.get_tag_val(node, "End_Date") locstr = "%s, %s %s" % (xmlh.get_tag_val(node, "City"), xmlh.get_tag_val(node, "Province"), xmlh.get_tag_val(node, "Country")) outstr += '<VolunteerOpportunity>' outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (str(i)) outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (org_id) outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % (org_id) outstr += '<title>%s</title>' % (title) outstr += '<detailURL>%s</detailURL>' % (url) outstr += '<description>%s</description>' % (desc) outstr += '<abstract>%s</abstract>' % (desc) outstr += '<lastUpdated>%s</lastUpdated>' %(last_updated) outstr += '<locations><location>' outstr += '<location_string>%s</location_string>' % (locstr) outstr += '<latitude>%s</latitude>' % (lat) outstr += '<longitude>%s</longitude>' % (lng) outstr += '</location></locations>' outstr += '<dateTimeDurations><dateTimeDuration>' outstr += '<startDate>%s</startDate>' % (start_date) if start_time: outstr += '<startTime>%s</startTime>' % (start_time) if open_ended: outstr += '<openEnded>Yes</openEnded>' else: outstr += '<openEnded>No</openEnded>' outstr += '<endDate>%s</endDate>' % (end_date) if end_time: outstr += '<endTime>%s</endTime>' % (end_time) outstr += '</dateTimeDuration></dateTimeDurations>' outstr += '</VolunteerOpportunity>\n' numopps += 1 outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' return outstr, numorgs, numopps
outstr += '<organizationURL>http://www.idealist.org/</organizationURL>' outstr += '<donateURL>http://www.idealist.org/</donateURL>' outstr += '<logoURL>http://www.idealist.org/css/skin02/images/logoBG.png</logoURL>' outstr += '<detailURL>http://www.idealist.org/</detailURL>' outstr += '</Organization></Organizations>' outstr += '\n<VolunteerOpportunities>\n' try: nodes = feed.getElementsByTagName('entry') except: nodes = [] for i, node in enumerate(nodes): if maxrec > 0 and i > maxrec: break title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>' desc = '<![CDATA[' + xmlh.get_tag_val(node, "summary") + ']]>' url = xmlh.get_tag_val(node, "id") start_date = last_updated open_ended = True outstr += '<VolunteerOpportunity>' outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % ( str(i)) outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>' outstr += org_id outstr += '</sponsoringOrganizationID></sponsoringOrganizationIDs>' outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>' outstr += org_id outstr += '</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' outstr += '<title>%s</title>' % (title)
def parse(instr, maxrec, progress): """return FPXML given sparked feed data""" feed = xmlh.parse_or_die(instr.encode('utf-8')) org_id = str(139) mission_statement = "Sparked makes it easy for people with busy lives to help nonprofits get valuable work done when it's convenient. We call it microvolunteering. Through the convenience of the Internet, and with the collaboration of others, micro-volunteers use their professional skills to help causes they care about." org_desc = "Sparked is the world's first Microvolunteering network" today = datetime.now() last_updated = today.strftime("%Y-%m-%dT%H:%M:%S") start_date = last_updated numorgs = 1 numopps = 0 xmlh.print_progress("loading sparked.com custom XML...") # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', org_id) outstr += xmlh.output_val('providerName', "sparked") outstr += xmlh.output_val('feedID', "sparked") outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', "http://www.sparked.com/") outstr += '</FeedInfo>' # 1 "organization" in sparked.com postings outstr += '<Organizations><Organization>' outstr += xmlh.output_val('organizationID', org_id) outstr += '<nationalEIN></nationalEIN>' outstr += '<name>sparked.com</name>' outstr += xmlh.output_val('missionStatement', mission_statement) outstr += xmlh.output_val('description', org_desc) outstr += '<location>' outstr += xmlh.output_val("city", "San Francisco") outstr += xmlh.output_val("region", "CA") outstr += xmlh.output_val("postalCode", "94105") outstr += '</location>' outstr += '<organizationURL>http://www.sparked.com/</organizationURL>' outstr += '<donateURL>http://www.sparked.com/</donateURL>' outstr += '<logoURL>http://www.sparked.com/imgver4/logo_sparked.gif</logoURL>' outstr += '<detailURL>http://www.sparked.com/</detailURL>' outstr += '</Organization></Organizations>' outstr += '\n<VolunteerOpportunities>\n' nodes = feed.getElementsByTagName('challenge') for i, node in enumerate(nodes): if maxrec > 0 and i > maxrec: break title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>' desc = '<![CDATA[' + xmlh.get_tag_val(node, "description") + ']]>' url = xmlh.get_tag_val(node, "url") start_date = last_updated open_ended = True #01234567 #02/15/11 mdy = xmlh.get_tag_val(node, "deadline") if mdy: try: end_date = str(2000 + int(mdy[6:])) + "-" + mdy[0:2] + "-" + mdy[3:5] open_ended = False except: pass outstr += '<VolunteerOpportunity>' outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % ( str(i)) outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % ( org_id) outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % ( org_id) outstr += '<micro>Yes</micro>' outstr += '<title>%s</title>' % (title) outstr += '<detailURL>%s</detailURL>' % (url) outstr += '<description>%s</description>' % (desc) outstr += '<abstract>%s</abstract>' % (desc) outstr += '<lastUpdated>%s</lastUpdated>' % (last_updated) outstr += '<dateTimeDurations><dateTimeDuration>' outstr += '<startDate>%s</startDate>' % (start_date) if open_ended: outstr += '<openEnded>Yes</openEnded>' else: outstr += '<openEnded>No</openEnded>' outstr += '<endDate>%s</endDate>' % (end_date) outstr += '</dateTimeDuration></dateTimeDurations>' outstr += '<locations><location><virtual>Yes</virtual></location></locations>' outstr += '</VolunteerOpportunity>\n' numopps += 1 outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' return outstr, numorgs, numopps
outstr += '<organizationURL>http://www.idealist.org/</organizationURL>' outstr += '<donateURL>http://www.idealist.org/</donateURL>' outstr += '<logoURL>http://www.idealist.org/css/skin02/images/logoBG.png</logoURL>' outstr += '<detailURL>http://www.idealist.org/</detailURL>' outstr += '</Organization></Organizations>' outstr += '\n<VolunteerOpportunities>\n' try: nodes = feed.getElementsByTagName('entry') except: nodes = [] for i, node in enumerate(nodes): if maxrec > 0 and i > maxrec: break title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>' desc = '<![CDATA[' + xmlh.get_tag_val(node, "summary") + ']]>' url = xmlh.get_tag_val(node, "id") start_date = last_updated open_ended = True outstr += '<VolunteerOpportunity>' outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (str(i)) outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>' outstr += org_id outstr += '</sponsoringOrganizationID></sponsoringOrganizationIDs>' outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>' outstr += org_id outstr += '</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' outstr += '<title>%s</title>' % (title) outstr += '<detailURL>%s</detailURL>' % (url)
def get_val(self, field): """return a value for this record""" return xmlh.get_tag_val(self.opp, field)
def tag_by_source_id(self, rec, feedinfo): """matches the feed_providerID against the list of vetted IDs.""" if xmlh.get_tag_val(feedinfo, "providerID") in self.id_list: return 1.0 return 0.0
def parse(instr, maxrecs, progress): numorgs = numopps = 0 instr = re.sub(r'<(/?db):', r'<\1_', instr) opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>', instr, re.DOTALL) volopps = "" for i, oppstr in enumerate(opps): #if progress and i > 0 and i % 250 == 0: # print str(datetime.now())+": ", i, " opportunities processed." if (maxrecs > 0 and i > maxrecs): break xmlh.print_rps_progress("opps", progress, i, maxrecs) item = xmlh.simple_parser(oppstr, known_elnames, progress=False) orgid = register_org(item) # logoURL -- sigh, this is for the opportunity not the org volopps += '<VolunteerOpportunity>' volopps += xmlh.output_val('volunteerOpportunityID', str(i)) volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid)) volopps += xmlh.output_node('volunteerHubOrganizationID', item, "LocalID") volopps += xmlh.output_node('title', item, "Title") volopps += xmlh.output_node('abstract', item, "Description") volopps += xmlh.output_node('description', item, "Description") volopps += xmlh.output_node('detailURL', item, "DetailURL") volopps += xmlh.output_val('volunteersNeeded', "-8888") try: oppdates = item.getElementsByTagName("OpportunityDate") except: oppdates = [] if len(oppdates) > 1: print datetime.now(), \ "parse_servenet.py: only 1 OpportunityDate supported." #return None oppdate = oppdates[0] elif len(oppdates) == 0: oppdate = None else: oppdate = oppdates[0] volopps += '<dateTimeDurations><dateTimeDuration>' if oppdate: volopps += xmlh.output_val('openEnded', 'No') volopps += xmlh.output_val('duration', 'P%s%s' % (xmlh.get_tag_val(oppdate, "DurationQuantity"), xmlh.get_tag_val(oppdate, "DurationUnit"))) volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += xmlh.output_node('startDate', oppdate, "StartDate") volopps += xmlh.output_node('endDate', oppdate, "EndDate") else: volopps += xmlh.output_val('openEnded', 'Yes') volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += '</dateTimeDuration></dateTimeDurations>' volopps += '<locations>' try: opplocs = item.getElementsByTagName("Location") except: opplocs = [] for opploc in opplocs: volopps += '<location>' virtual_tag = opploc.getElementsByTagName("Virtual") if virtual_tag and xmlh.get_tag_val(opploc, "Virtual").lower() == "yes": volopps += xmlh.output_val('virtual', 'Yes') else: volopps += xmlh.output_node('region', opploc, "StateOrProvince") volopps += xmlh.output_node('country', opploc, "Country") volopps += xmlh.output_node('postalCode', opploc, "ZipOrPostalCode") volopps += '</location>' volopps += '</locations>' volopps += '<categoryTags/>' volopps += '</VolunteerOpportunity>' numopps += 1 # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', providerID) outstr += xmlh.output_val('providerName', providerName) outstr += xmlh.output_val('feedID', feedID) outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', providerURL) outstr += xmlh.output_val('description', feedDescription) # TODO: capture ts -- use now?! outstr += '</FeedInfo>' # hardcoded: Organization outstr += '<Organizations>' for key in ORGS: outstr += ORGS[key] numorgs += 1 outstr += '</Organizations>' outstr += '<VolunteerOpportunities>' outstr += volopps outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr) return outstr, numorgs, numopps
'clientID' : CLIENT_ID}) try: maps_fh = urllib2.urlopen("http://maps.google.com/maps/geo?%s" % params) res = maps_fh.read() maps_fh.close() except IOError, err: print_debug("geocode_call: Error contacting Maps API. Sleeping. " + str(err)) time.sleep(1) return geocode_call(query, retries - 1) #print_debug("response length: "+str(len(res))) if re.search(r'403 Forbidden', res): respcode = 403 else: node = xmlh.simple_parser(res, [], False) respcode = xmlh.get_tag_val(node, "code") if respcode == "": #print_debug("unparseable response: "+res) return False respcode = int(respcode) if respcode in (400, 601, 602, 603): # problem with the query return None if respcode in (403, 500, 620): # problem with the server print_debug("geocode_call: Connection problem or quota exceeded. Sleeping...") if retries == 4: xmlh.print_progress("geocoder: %d" % respcode, "", SHOW_PROGRESS) time.sleep(5) return geocode_call(query, retries - 1)
def parse(instr, maxrecs, progress): """return FPXML given usaservice data""" # TODO: progress known_elnames = [ 'channel', 'db:abstract', 'db:address', 'db:attendee_count', 'db:categories', 'db:city', 'db:country', 'db:county', 'db:dateTime', 'db:event', 'db:eventType', 'db:guest_total', 'db:host', 'db:latitude', 'db:length', 'db:longitude', 'db:rsvp', 'db:scheduledTime', 'db:state', 'db:street', 'db:title', 'db:venue_name', 'db:zipcode', 'description', 'docs', 'guid', 'item', 'language', 'link', 'pubDate', 'rss', 'title', ] # convert to footprint format s = '<?xml version="1.0" ?>' s += '<FootprintFeed schemaVersion="0.1">' s += '<FeedInfo>' # TODO: assign provider IDs? s += '<providerID>101</providerID>' s += '<providerName>usaservice.org</providerName>' s += '<feedID>1</feedID>' s += '<createdDateTime>%s</createdDateTime>' % xmlh.current_ts() s += '<providerURL>http://www.usaservice.org/</providerURL>' s += '<description>Syndicated events</description>' # TODO: capture ts -- use now?! s += '</FeedInfo>' numorgs = numopps = 0 # hardcoded: Organization s += '<Organizations>' s += '<Organization>' s += '<organizationID>0</organizationID>' s += '<nationalEIN></nationalEIN>' s += '<name></name>' s += '<missionStatement></missionStatement>' s += '<description></description>' s += '<location><city></city><region></region><postalCode></postalCode></location>' s += '<organizationURL></organizationURL>' s += '<donateURL></donateURL>' s += '<logoURL></logoURL>' s += '<detailURL></detailURL>' s += '</Organization>' numorgs += 1 s += '</Organizations>' s += '<VolunteerOpportunities>' instr = re.sub(r'<(/?db):', r'<\1_', instr) for i, line in enumerate(instr.splitlines()): if (maxrecs > 0 and i > maxrecs): break xmlh.print_rps_progress("opps", progress, i, maxrecs) item = xmlh.simple_parser(line, known_elnames, progress=False) # unmapped: db_rsvp (seems to be same as link, but with #rsvp at end of url?) # unmapped: db_host (no equivalent?) # unmapped: db_county (seems to be empty) # unmapped: attendee_count # unmapped: guest_total # unmapped: db_title (dup of title, above) s += '<VolunteerOpportunity>' s += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % ( xmlh.get_tag_val(item, "guid")) # hardcoded: sponsoringOrganizationID s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>' # hardcoded: volunteerHubOrganizationID s += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>0</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' s += '<title>%s</title>' % (xmlh.get_tag_val(item, "title")) s += '<abstract>%s</abstract>' % (xmlh.get_tag_val(item, "abstract")) s += '<volunteersNeeded>-8888</volunteersNeeded>' dbscheduledTimes = item.getElementsByTagName("db_scheduledTime") if (dbscheduledTimes.length != 1): print datetime.now( ), "parse_usaservice: only 1 db_scheduledTime supported." return None dbscheduledTime = dbscheduledTimes[0] s += '<dateTimeDurations><dateTimeDuration>' length = xmlh.get_tag_val(dbscheduledTime, "db_length") if length == "" or length == "-1": s += '<openEnded>Yes</openEnded>' else: s += '<openEnded>No</openEnded>' date, time = xmlh.get_tag_val(dbscheduledTime, "db_dateTime").split(" ") s += '<startDate>%s</startDate>' % (date) # TODO: timezone??? s += '<startTime>%s</startTime>' % (time) s += '</dateTimeDuration></dateTimeDurations>' dbaddresses = item.getElementsByTagName("db_address") if (dbaddresses.length != 1): print datetime.now( ), "parse_usaservice: only 1 db_address supported." return None dbaddress = dbaddresses[0] s += '<locations><location>' s += '<name>%s</name>' % (xmlh.get_tag_val(item, "db_venue_name")) s += '<streetAddress1>%s</streetAddress1>' % (xmlh.get_tag_val( dbaddress, "db_street")) s += '<city>%s</city>' % (xmlh.get_tag_val(dbaddress, "db_city")) s += '<region>%s</region>' % (xmlh.get_tag_val(dbaddress, "db_state")) s += '<country>%s</country>' % (xmlh.get_tag_val( dbaddress, "db_country")) s += '<postalCode>%s</postalCode>' % (xmlh.get_tag_val( dbaddress, "db_zipcode")) s += '<latitude>%s</latitude>' % (xmlh.get_tag_val( item, "db_latitude")) s += '<longitude>%s</longitude>' % (xmlh.get_tag_val( item, "db_longitude")) s += '</location></locations>' type = xmlh.get_tag_val(item, "db_eventType") s += '<categoryTags><categoryTag>%s</categoryTag></categoryTags>' % ( type) s += '<contactName>%s</contactName>' % xmlh.get_tag_val( item, "db_host") s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(item, "link")) s += '<description>%s</description>' % (xmlh.get_tag_val( item, "description")) pubdate = xmlh.get_tag_val(item, "pubDate") if re.search("[0-9][0-9] [A-Z][a-z][a-z] [0-9][0-9][0-9][0-9]", pubdate): # TODO: parse() is ignoring timzone... ts = dateutil.parser.parse(pubdate) pubdate = ts.strftime("%Y-%m-%dT%H:%M:%S") s += '<lastUpdated>%s</lastUpdated>' % (pubdate) s += '</VolunteerOpportunity>' numopps += 1 s += '</VolunteerOpportunities>' s += '</FootprintFeed>' #s = re.sub(r'><([^/])', r'>\n<\1', s) return s, numorgs, numopps
def parse(instr, maxrecs, progress): numorgs = numopps = 0 instr = re.sub(r'<(/?db):', r'<\1_', instr) opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>', instr, re.DOTALL) volopps = "" for i, oppstr in enumerate(opps): #if progress and i > 0 and i % 250 == 0: # print str(datetime.now())+": ", i, " opportunities processed." if (maxrecs > 0 and i > maxrecs): break xmlh.print_rps_progress("opps", progress, i, maxrecs) item = xmlh.simple_parser(oppstr, known_elnames, progress=False) orgid = register_org(item) # logoURL -- sigh, this is for the opportunity not the org volopps += '<VolunteerOpportunity>' volopps += xmlh.output_val('volunteerOpportunityID', str(i)) volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid)) volopps += xmlh.output_node('volunteerHubOrganizationID', item, "LocalID") volopps += xmlh.output_node('title', item, "Title") volopps += xmlh.output_node('abstract', item, "Description") volopps += xmlh.output_node('description', item, "Description") volopps += xmlh.output_node('detailURL', item, "DetailURL") volopps += xmlh.output_val('volunteersNeeded', "-8888") try: oppdates = item.getElementsByTagName("OpportunityDate") except: oppdates = [] if len(oppdates) > 1: print datetime.now(), \ "parse_servenet.py: only 1 OpportunityDate supported." #return None oppdate = oppdates[0] elif len(oppdates) == 0: oppdate = None else: oppdate = oppdates[0] volopps += '<dateTimeDurations><dateTimeDuration>' if oppdate: volopps += xmlh.output_val('openEnded', 'No') volopps += xmlh.output_val( 'duration', 'P%s%s' % (xmlh.get_tag_val(oppdate, "DurationQuantity"), xmlh.get_tag_val(oppdate, "DurationUnit"))) volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += xmlh.output_node('startDate', oppdate, "StartDate") volopps += xmlh.output_node('endDate', oppdate, "EndDate") else: volopps += xmlh.output_val('openEnded', 'Yes') volopps += xmlh.output_val('commitmentHoursPerWeek', '0') volopps += '</dateTimeDuration></dateTimeDurations>' volopps += '<locations>' try: opplocs = item.getElementsByTagName("Location") except: opplocs = [] for opploc in opplocs: volopps += '<location>' virtual_tag = opploc.getElementsByTagName("Virtual") if virtual_tag and xmlh.get_tag_val( opploc, "Virtual").lower() == "yes": volopps += xmlh.output_val('virtual', 'Yes') else: volopps += xmlh.output_node('region', opploc, "StateOrProvince") volopps += xmlh.output_node('country', opploc, "Country") volopps += xmlh.output_node('postalCode', opploc, "ZipOrPostalCode") volopps += '</location>' volopps += '</locations>' volopps += '<categoryTags/>' volopps += '</VolunteerOpportunity>' numopps += 1 # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', providerID) outstr += xmlh.output_val('providerName', providerName) outstr += xmlh.output_val('feedID', feedID) outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', providerURL) outstr += xmlh.output_val('description', feedDescription) # TODO: capture ts -- use now?! outstr += '</FeedInfo>' # hardcoded: Organization outstr += '<Organizations>' for key in ORGS: outstr += ORGS[key] numorgs += 1 outstr += '</Organizations>' outstr += '<VolunteerOpportunities>' outstr += volopps outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr) return outstr, numorgs, numopps
def parse(s, maxrecs, progress): """return FPXML given volunteermatch data""" # TODO: progress known_elnames = [ 'feed', 'title', 'subtitle', 'div', 'span', 'updated', 'id', 'link', 'icon', 'logo', 'author', 'name', 'uri', 'email', 'rights', 'entry', 'published', 'g:publish_date', 'g:expiration_date', 'g:event_date_range', 'g:start', 'g:end', 'updated', 'category', 'summary', 'content', 'awb:city', 'awb:country', 'awb:state', 'awb:postalcode', 'g:location', 'g:age_range', 'g:employer', 'g:job_type', 'g:job_industry', 'awb:paid', ] xmldoc = xmlh.simple_parser(s, known_elnames, progress) pubdate = xmlh.get_tag_val(xmldoc, "created") ts = dateutil.parser.parse(pubdate) pubdate = ts.strftime("%Y-%m-%dT%H:%M:%S") # convert to footprint format s = '<?xml version="1.0" ?>' s += '<FootprintFeed schemaVersion="0.1">' s += '<FeedInfo>' # TODO: assign provider IDs? s += '<providerID>104</providerID>' s += '<providerName>volunteermatch.org</providerName>' s += '<feedID>1</feedID>' s += '<providerURL>http://www.volunteermatch.org/</providerURL>' s += '<createdDateTime>%s</createdDateTime>' % (pubdate) s += '<description></description>' s += '</FeedInfo>' numorgs = numopps = 0 # hardcoded: Organization s += '<Organizations>' items = xmldoc.getElementsByTagName("listing") if (maxrecs > items.length or maxrecs == -1): maxrecs = items.length for item in items[0:maxrecs]: orgs = item.getElementsByTagName("parent") if (orgs.length == 1): org = orgs[0] s += '<Organization>' s += '<organizationID>%s</organizationID>' % (xmlh.get_tag_val( org, "key")) s += '<nationalEIN></nationalEIN>' s += '<name>%s</name>' % (xmlh.get_tag_val(org, "name")) s += '<missionStatement></missionStatement>' s += '<description></description>' s += '<location><city></city><region></region><postalCode></postalCode></location>' s += '<organizationURL>%s</organizationURL>' % (xmlh.get_tag_val( org, "URL")) s += '<donateURL></donateURL>' s += '<logoURL></logoURL>' s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val( org, "detailURL")) s += '</Organization>' numorgs += 1 else: print datetime.now( ), "parse_volunteermatch: listing does not have an organization" return None s += '</Organizations>' s += '<VolunteerOpportunities>' items = xmldoc.getElementsByTagName("listing") for item in items[0:maxrecs]: s += '<VolunteerOpportunity>' s += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % ( xmlh.get_tag_val(item, "key")) orgs = item.getElementsByTagName("parent") if (orgs.length == 1): org = orgs[0] s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % ( xmlh.get_tag_val(org, "key")) else: s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>' print datetime.now( ), "parse_volunteermatch: listing does not have an organization" s += '<title>%s</title>' % (xmlh.get_tag_val(item, "title")) s += '<volunteersNeeded>-8888</volunteersNeeded>' s += '<dateTimeDurations><dateTimeDuration>' durations = xmlh.get_children_by_tagname(item, "duration") if (len(durations) == 1): duration = durations[0] ongoing = duration.getAttribute("ongoing") if (ongoing == 'true'): s += '<openEnded>Yes</openEnded>' else: s += '<openEnded>No</openEnded>' listingTimes = duration.getElementsByTagName("listingTime") if (listingTimes.length == 1): listingTime = listingTimes[0] s += '<startTime>%s</startTime>' % (xmlh.get_tag_val( listingTime, "startTime")) s += '<endTime>%s</endTime>' % (xmlh.get_tag_val( listingTime, "endTime")) else: print datetime.now( ), "parse_volunteermatch: number of durations in item != 1" return None commitments = item.getElementsByTagName("commitment") l_period = l_duration = "" if (commitments.length == 1): commitment = commitments[0] l_num = xmlh.get_tag_val(commitment, "num") l_duration = xmlh.get_tag_val(commitment, "duration") l_period = xmlh.get_tag_val(commitment, "period") if ((l_duration == "hours") and (l_period == "week")): s += '<commitmentHoursPerWeek>' + l_num + '</commitmentHoursPerWeek>' elif ((l_duration == "hours") and (l_period == "day")): # note: weekdays only s += '<commitmentHoursPerWeek>' + str( int(l_num) * 5) + '</commitmentHoursPerWeek>' elif ((l_duration == "hours") and (l_period == "month")): hrs = int(float(l_num) / 4.0) if hrs < 1: hrs = 1 s += '<commitmentHoursPerWeek>' + str( hrs) + '</commitmentHoursPerWeek>' elif ((l_duration == "hours") and (l_period == "event")): # TODO: ignore for now, later compute the endTime if not already provided pass else: print datetime.now( ), "parse_volunteermatch: commitment given in units != hours/week: ", l_duration, "per", l_period s += '</dateTimeDuration></dateTimeDurations>' dbaddresses = item.getElementsByTagName("location") if (dbaddresses.length != 1): print datetime.now( ), "parse_volunteermatch: only 1 location supported." return None dbaddress = dbaddresses[0] s += '<locations><location>' s += '<streetAddress1>%s</streetAddress1>' % (xmlh.get_tag_val( dbaddress, "street1")) s += '<city>%s</city>' % (xmlh.get_tag_val(dbaddress, "city")) s += '<region>%s</region>' % (xmlh.get_tag_val(dbaddress, "region")) s += '<postalCode>%s</postalCode>' % (xmlh.get_tag_val( dbaddress, "postalCode")) geolocs = item.getElementsByTagName("geolocation") if (geolocs.length == 1): geoloc = geolocs[0] s += '<latitude>%s</latitude>' % (xmlh.get_tag_val( geoloc, "latitude")) s += '<longitude>%s</longitude>' % (xmlh.get_tag_val( geoloc, "longitude")) s += '</location></locations>' s += '<audienceTags>' audiences = item.getElementsByTagName("audience") for audience in audiences: type = xmlh.node_data(audience) s += '<audienceTag>%s</audienceTag>' % (type) s += '</audienceTags>' s += '<categoryTags>' categories = item.getElementsByTagName("category") for category in categories: type = xmlh.node_data(category) s += '<categoryTag>%s</categoryTag>' % (type) s += '</categoryTags>' s += '<skills>%s</skills>' % (xmlh.get_tag_val(item, "skill")) s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val( item, "detailURL")) s += '<description>%s</description>' % (xmlh.get_tag_val( item, "description")) expires = xmlh.get_tag_val(item, "expires") ts = dateutil.parser.parse(expires) expires = ts.strftime("%Y-%m-%dT%H:%M:%S") s += '<expires>%s</expires>' % (expires) s += '</VolunteerOpportunity>' numopps += 1 s += '</VolunteerOpportunities>' s += '</FootprintFeed>' #s = re.sub(r'><([^/])', r'>\n<\1', s) #print(s) return s, numorgs, numopps
def parse(instr, maxrec, progress): """return FPXML given sparked feed data""" feed = xmlh.parse_or_die(instr.encode('utf-8')) org_id = str(139) mission_statement = "Sparked makes it easy for people with busy lives to help nonprofits get valuable work done when it's convenient. We call it microvolunteering. Through the convenience of the Internet, and with the collaboration of others, micro-volunteers use their professional skills to help causes they care about." org_desc = "Sparked is the world's first Microvolunteering network" today = datetime.now() last_updated = today.strftime("%Y-%m-%dT%H:%M:%S") start_date = last_updated numorgs = 1 numopps = 0 xmlh.print_progress("loading sparked.com custom XML...") # convert to footprint format outstr = '<?xml version="1.0" ?>' outstr += '<FootprintFeed schemaVersion="0.1">' outstr += '<FeedInfo>' outstr += xmlh.output_val('providerID', org_id) outstr += xmlh.output_val('providerName', "sparked") outstr += xmlh.output_val('feedID', "sparked") outstr += xmlh.output_val('createdDateTime', xmlh.current_ts()) outstr += xmlh.output_val('providerURL', "http://www.sparked.com/") outstr += '</FeedInfo>' # 1 "organization" in sparked.com postings outstr += '<Organizations><Organization>' outstr += xmlh.output_val('organizationID', org_id) outstr += '<nationalEIN></nationalEIN>' outstr += '<name>sparked.com</name>' outstr += xmlh.output_val('missionStatement', mission_statement) outstr += xmlh.output_val('description', org_desc) outstr += '<location>' outstr += xmlh.output_val("city", "San Francisco") outstr += xmlh.output_val("region", "CA") outstr += xmlh.output_val("postalCode", "94105") outstr += '</location>' outstr += '<organizationURL>http://www.sparked.com/</organizationURL>' outstr += '<donateURL>http://www.sparked.com/</donateURL>' outstr += '<logoURL>http://www.sparked.com/imgver4/logo_sparked.gif</logoURL>' outstr += '<detailURL>http://www.sparked.com/</detailURL>' outstr += '</Organization></Organizations>' outstr += '\n<VolunteerOpportunities>\n' nodes = feed.getElementsByTagName('challenge') for i, node in enumerate(nodes): if maxrec > 0 and i > maxrec: break title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>' desc = '<![CDATA[' + xmlh.get_tag_val(node, "description") + ']]>' url = xmlh.get_tag_val(node, "url") start_date = last_updated open_ended = True #01234567 #02/15/11 mdy = xmlh.get_tag_val(node, "deadline") if mdy: try: end_date = str(2000 + int(mdy[6:])) + "-" + mdy[0:2] + "-" + mdy[3:5] open_ended = False except: pass outstr += '<VolunteerOpportunity>' outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (str(i)) outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (org_id) outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % (org_id) outstr += '<micro>Yes</micro>' outstr += '<title>%s</title>' % (title) outstr += '<detailURL>%s</detailURL>' % (url) outstr += '<description>%s</description>' % (desc) outstr += '<abstract>%s</abstract>' % (desc) outstr += '<lastUpdated>%s</lastUpdated>' %(last_updated) outstr += '<dateTimeDurations><dateTimeDuration>' outstr += '<startDate>%s</startDate>' % (start_date) if open_ended: outstr += '<openEnded>Yes</openEnded>' else: outstr += '<openEnded>No</openEnded>' outstr += '<endDate>%s</endDate>' % (end_date) outstr += '</dateTimeDuration></dateTimeDurations>' outstr += '<locations><location><virtual>Yes</virtual></location></locations>' outstr += '</VolunteerOpportunity>\n' numopps += 1 outstr += '</VolunteerOpportunities>' outstr += '</FootprintFeed>' return outstr, numorgs, numopps
maps_fh = urllib2.urlopen(url) res = maps_fh.read() maps_fh.close() except IOError, err: print_debug("geocode_call: Error calling Maps API" + str(err) + "\n" + url) return False try: node = xmlh.simple_parser(res, [], False) node = node.getElementsByTagName('GeocodeResponse')[0] except: print_debug("unparseable response: " + res) return False respcode = xmlh.get_tag_val(node, "status") if respcode != "OK": print_debug("Maps API reponded " + respcode) return None if respcode == "UNKNOWN_ERROR": # problem with the server print_debug("geocode_call: Connection problem. retrying...") if retries > 0: time.sleep(3) return geocode_call(query, retries - 1) result_node = node.getElementsByTagName('result')[0] addr = xmlh.get_tag_val(result_node, "formatted_address") # removes "USA" from all addresses. addr = re.sub(r', USA$', r'', addr)