コード例 #1
0
def register_org(item):
    """register the organization info, for lookup later."""
    global MAX_ORGID

    # SponsoringOrganization/Name -- fortunately, no conflicts
    # but there's no data except the name
    orgname = xmlh.get_tag_val(item, "Name")
    if orgname in ORGIDS:
        return ORGIDS[orgname]
    MAX_ORGID = MAX_ORGID + 1
    orgstr = '<Organization>'
    orgstr += '<organizationID>%d</organizationID>' % (MAX_ORGID)
    orgstr += '<nationalEIN />'
    orgstr += '<name>%s</name>' % (xml.sax.saxutils.escape(orgname))
    orgstr += '<missionStatement />'
    orgstr += '<description />'
    orgstr += '<location>'
    orgstr += xmlh.output_node("city", item, "City")
    orgstr += xmlh.output_node("region", item, "StateOrProvince")
    orgstr += xmlh.output_node("postalCode", item, "ZipOrPostalCode")
    orgstr += '</location>'
    orgstr += '<organizationURL />'
    orgstr += '<donateURL />'
    orgstr += '<logoURL />'
    orgstr += '<detailURL />'
    orgstr += '</Organization>'
    ORGS[MAX_ORGID] = orgstr
    ORGIDS[orgname] = MAX_ORGID
    return MAX_ORGID
コード例 #2
0
def register_org(item):
  """register the organization info, for lookup later."""
  global MAX_ORGID

  # SponsoringOrganization/Name -- fortunately, no conflicts
  # but there's no data except the name
  orgname = xmlh.get_tag_val(item, "Name")
  if orgname in ORGIDS:
    return ORGIDS[orgname]
  MAX_ORGID = MAX_ORGID + 1
  orgstr = '<Organization>'
  orgstr += '<organizationID>%d</organizationID>' % (MAX_ORGID)
  orgstr += '<nationalEIN />'
  orgstr += '<name>%s</name>' % (xml.sax.saxutils.escape(orgname))
  orgstr += '<missionStatement />'
  orgstr += '<description />'
  orgstr += '<location>'
  orgstr += xmlh.output_node("city", item, "City")
  orgstr += xmlh.output_node("region", item, "StateOrProvince")
  orgstr += xmlh.output_node("postalCode", item, "ZipOrPostalCode")
  orgstr += '</location>'
  orgstr += '<organizationURL />'
  orgstr += '<donateURL />'
  orgstr += '<logoURL />'
  orgstr += '<detailURL />'
  orgstr += '</Organization>'
  ORGS[MAX_ORGID] = orgstr
  ORGIDS[orgname] = MAX_ORGID
  return MAX_ORGID
コード例 #3
0
  def tag_by_date_range(self, rec, feedinfo):
    rtn = 0.0
    if xmlh.get_tag_val(feedinfo, "providerID") in self.omitted_providers:
      return 0.0

    str_start_date = rec.get_val("startDate")
    if len(str_start_date) > 0:
      try:
        start_date = datetime.strptime(str_start_date, "%m/%d/%Y")
      except:
        try:
          start_date = datetime.strptime(str_start_date, "%Y-%m-%d")
        except:
          start_date = None
      if start_date is not None and start_date >= self.date_range_start and start_date <= self.date_range_end:
        rtn = 1.0

    str_end_date = rec.get_val("endDate")
    if rtn < 1.0 and len(str_end_date) > 0:
      try:
        end_date = datetime.strptime(str_end_date, "%m/%d/%Y")
      except:
        try:
          end_date = datetime.strptime(end_date, "%Y-%m-%d")
        except:
          end_date = None
      if end_date is not None and end_date >= self.date_range_start and end_date <= self.date_range_end:
        rtn = 1.0

    return rtn
コード例 #4
0
def parse(s, maxrecs, progress):
  """return FPXML given volunteermatch data"""
  # TODO: progress
  known_elnames = ['feed', 'title', 'subtitle', 'div', 'span', 'updated', 'id', 'link', 'icon', 'logo', 'author', 'name', 'uri', 'email', 'rights', 'entry', 'published', 'g:publish_date', 'g:expiration_date', 'g:event_date_range', 'g:start', 'g:end', 'updated', 'category', 'summary', 'content', 'awb:city', 'awb:country', 'awb:state', 'awb:postalcode', 'g:location', 'g:age_range', 'g:employer', 'g:job_type', 'g:job_industry', 'awb:paid', ]
  xmldoc = xmlh.simple_parser(s, known_elnames, progress)

  pubdate = xmlh.get_tag_val(xmldoc, "created")
  ts = dateutil.parser.parse(pubdate)
  pubdate = ts.strftime("%Y-%m-%dT%H:%M:%S")

  # convert to footprint format
  s = '<?xml version="1.0" ?>'
  s += '<FootprintFeed schemaVersion="0.1">'
  s += '<FeedInfo>'
  # TODO: assign provider IDs?
  s += '<providerID>104</providerID>'
  s += '<providerName>volunteermatch.org</providerName>'
  s += '<feedID>1</feedID>'
  s += '<providerURL>http://www.volunteermatch.org/</providerURL>'
  s += '<createdDateTime>%s</createdDateTime>' % (pubdate)
  s += '<description></description>' 
  s += '</FeedInfo>'

  numorgs = numopps = 0

  # hardcoded: Organization
  s += '<Organizations>'
  items = xmldoc.getElementsByTagName("listing")
  if (maxrecs > items.length or maxrecs == -1):
    maxrecs = items.length
    
  for item in items[0:maxrecs]:
    orgs = item.getElementsByTagName("parent")
    if (orgs.length == 1):
      org = orgs[0]
      s += '<Organization>'
      s += '<organizationID>%s</organizationID>' % (xmlh.get_tag_val(org, "key"))
      s += '<nationalEIN></nationalEIN>'
      s += '<name>%s</name>' % (xmlh.get_tag_val(org, "name"))
      s += '<missionStatement></missionStatement>'
      s += '<description></description>'
      s += '<location><city></city><region></region><postalCode></postalCode></location>'
      s += '<organizationURL>%s</organizationURL>' % (xmlh.get_tag_val(org, "URL"))
      s += '<donateURL></donateURL>'
      s += '<logoURL></logoURL>'
      s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(org, "detailURL"))
      s += '</Organization>'
      numorgs += 1
    else:
      print datetime.now(), "parse_volunteermatch: listing does not have an organization"
      return None

  s += '</Organizations>'
    
  s += '<VolunteerOpportunities>'
  items = xmldoc.getElementsByTagName("listing")
  for item in items[0:maxrecs]:
    s += '<VolunteerOpportunity>'
    s += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (xmlh.get_tag_val(item, "key"))

    orgs = item.getElementsByTagName("parent")
    if (orgs.length == 1):
      org = orgs[0]
      s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (xmlh.get_tag_val(org, "key"))
    else:
      s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>'
      print datetime.now(), "parse_volunteermatch: listing does not have an organization"
      
    s += '<title>%s</title>' % (xmlh.get_tag_val(item, "title"))

    s += '<volunteersNeeded>-8888</volunteersNeeded>'

    s += '<dateTimeDurations><dateTimeDuration>'
    durations = xmlh.get_children_by_tagname(item, "duration")
    if (len(durations) == 1):
      duration = durations[0]
      ongoing = duration.getAttribute("ongoing")
      if (ongoing == 'true'):
        s += '<openEnded>Yes</openEnded>'
      else:
        s += '<openEnded>No</openEnded>'
          
      listingTimes = duration.getElementsByTagName("listingTime")
      if (listingTimes.length == 1):
        listingTime = listingTimes[0]
        s += '<startTime>%s</startTime>' % (xmlh.get_tag_val(listingTime, "startTime"))
        s += '<endTime>%s</endTime>' % (xmlh.get_tag_val(listingTime, "endTime"))
    else:
      print datetime.now(), "parse_volunteermatch: number of durations in item != 1"
      return None
        
    commitments = item.getElementsByTagName("commitment")
    l_period = l_duration = ""
    if (commitments.length == 1):
      commitment = commitments[0]
      l_num = xmlh.get_tag_val(commitment, "num")
      l_duration = xmlh.get_tag_val(commitment, "duration")
      l_period = xmlh.get_tag_val(commitment, "period")
      if ((l_duration == "hours") and (l_period == "week")):
        s += '<commitmentHoursPerWeek>' + l_num + '</commitmentHoursPerWeek>'
      elif ((l_duration == "hours") and (l_period == "day")):
        # note: weekdays only
        s += '<commitmentHoursPerWeek>' + str(int(l_num)*5) + '</commitmentHoursPerWeek>'
      elif ((l_duration == "hours") and (l_period == "month")):
        hrs = int(float(l_num)/4.0)
        if hrs < 1: hrs = 1
        s += '<commitmentHoursPerWeek>' + str(hrs) + '</commitmentHoursPerWeek>'
      elif ((l_duration == "hours") and (l_period == "event")):
        # TODO: ignore for now, later compute the endTime if not already provided
        pass
      else:
        print datetime.now(), "parse_volunteermatch: commitment given in units != hours/week: ", l_duration, "per", l_period
        
    s += '</dateTimeDuration></dateTimeDurations>'

    dbaddresses = item.getElementsByTagName("location")
    if (dbaddresses.length != 1):
      print datetime.now(), "parse_volunteermatch: only 1 location supported."
      return None
    dbaddress = dbaddresses[0]
    s += '<locations><location>'
    s += '<streetAddress1>%s</streetAddress1>' % (xmlh.get_tag_val(dbaddress, "street1"))
    s += '<city>%s</city>' % (xmlh.get_tag_val(dbaddress, "city"))
    s += '<region>%s</region>' % (xmlh.get_tag_val(dbaddress, "region"))
    s += '<postalCode>%s</postalCode>' % (xmlh.get_tag_val(dbaddress, "postalCode"))
    
    geolocs = item.getElementsByTagName("geolocation")
    if (geolocs.length == 1):
      geoloc = geolocs[0]
      s += '<latitude>%s</latitude>' % (xmlh.get_tag_val(geoloc, "latitude"))
      s += '<longitude>%s</longitude>' % (xmlh.get_tag_val(geoloc, "longitude"))
    
    s += '</location></locations>'
    
    s += '<audienceTags>'
    audiences = item.getElementsByTagName("audience")
    for audience in audiences:
      type = xmlh.node_data(audience)
      s += '<audienceTag>%s</audienceTag>' % (type)
    s += '</audienceTags>'

    s += '<categoryTags>'
    categories = item.getElementsByTagName("category")
    for category in categories:
      type = xmlh.node_data(category)
      s += '<categoryTag>%s</categoryTag>' % (type)
    s += '</categoryTags>'

    s += '<skills>%s</skills>' % (xmlh.get_tag_val(item, "skill"))

    s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(item, "detailURL"))
    s += '<description>%s</description>' % (xmlh.get_tag_val(item, "description"))

    expires = xmlh.get_tag_val(item, "expires")
    ts = dateutil.parser.parse(expires)
    expires = ts.strftime("%Y-%m-%dT%H:%M:%S")
    s += '<expires>%s</expires>' % (expires)

    s += '</VolunteerOpportunity>'
    numopps += 1
    
  s += '</VolunteerOpportunities>'
  s += '</FootprintFeed>'

  #s = re.sub(r'><([^/])', r'>\n<\1', s)
  #print(s)
  return s, numorgs, numopps
コード例 #5
0
def parse(instr, maxrec, progress):
    """return FPXML given 350.org data"""
    feed = xmlh.parse_or_die(instr.encode('utf-8'))

    org_id = str(139)
    mission_statement = "350.org is an international campaign that's building a movement to unite the world around solutions to the climate crisis--the solutions that science and justice demand."
    org_desc = "On October 10 we'll be helping host a Global Work Party, with thousands of communities setting up solar panels or digging community gardens or laying out bike paths."

    start_date = '2010-10-01'
    today = datetime.now()
    last_updated = today.strftime("%Y-%m-%dT%H:%M:%S")

    numorgs = 1
    numopps = 0
    xmlh.print_progress("loading 350.org custom XML...")

    # convert to footprint format
    outstr = '<?xml version="1.0" ?>'
    outstr += '<FootprintFeed schemaVersion="0.1">'
    outstr += '<FeedInfo>'
    outstr += xmlh.output_val('providerID', org_id)
    outstr += xmlh.output_val('providerName', "350org")
    outstr += xmlh.output_val('feedID', "350org")
    outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
    outstr += xmlh.output_val('providerURL', "http://www.350.org/")
    outstr += '</FeedInfo>'
    # 1 "organization" in 350.org postings
    outstr += '<Organizations><Organization>'
    outstr += xmlh.output_val('organizationID', org_id)
    outstr += '<nationalEIN></nationalEIN>'
    outstr += '<name>350.org</name>'
    outstr += xmlh.output_val('missionStatement', mission_statement)
    outstr += xmlh.output_val('description', org_desc)
    outstr += '<location>'
    outstr += xmlh.output_val("city", "")
    outstr += xmlh.output_val("region", "")
    outstr += xmlh.output_val("postalCode", "")
    outstr += '</location>'
    # TODO: make these variables
    outstr += '<organizationURL>http://www.350.org/</organizationURL>'
    outstr += '<donateURL>http://www.350.org/donate</donateURL>'
    outstr += '<logoURL>http://www.350.org/sites/all/themes/threefifty/logo.gif</logoURL>'
    outstr += '<detailURL>http://www.350.org/about</detailURL>'
    outstr += '</Organization></Organizations>'

    outstr += '\n<VolunteerOpportunities>\n'
    nodes = feed.getElementsByTagName('node')
    for i, node in enumerate(nodes):
        if maxrec > 0 and i > maxrec:
            break
        title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>'
        desc = '<![CDATA[' + xmlh.get_tag_val(node, "Body") + ']]>'
        url = xmlh.get_tag_val(node, "Link")
        lat = xmlh.get_tag_val(node, "Latitude")
        lng = xmlh.get_tag_val(node, "Longitude")

        start_datetime = xmlh.get_tag_val(node, "Start_Date")
        start_time = None
        if not start_datetime:
            start_date = "2010-10-10"
        else:
            start_datetime = start_datetime.replace(" (All day)", "T00:00:00")
            dt = start_datetime.split("T")
            start_date = dt[0][0:10]
            if len(dt) > 1:
                start_time = dt[1]

        end_datetime = xmlh.get_tag_val(node, "End_Date")
        end_time = None
        if not end_datetime:
            open_ended = True
        else:
            open_ended = False
            end_datetime = end_datetime.replace(" (All day)", "T23:00:00")
            dt = end_datetime.split("T")
            end_date = dt[0][0:10]
            if len(dt) > 1:
                end_time = dt[1]

        end_datetime = xmlh.get_tag_val(node, "End_Date")
        locstr = "%s, %s %s" % (xmlh.get_tag_val(
            node, "City"), xmlh.get_tag_val(
                node, "Province"), xmlh.get_tag_val(node, "Country"))

        outstr += '<VolunteerOpportunity>'
        outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (
            str(i))
        outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (
            org_id)
        outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % (
            org_id)
        outstr += '<title>%s</title>' % (title)
        outstr += '<detailURL>%s</detailURL>' % (url)
        outstr += '<description>%s</description>' % (desc)
        outstr += '<abstract>%s</abstract>' % (desc)
        outstr += '<lastUpdated>%s</lastUpdated>' % (last_updated)
        outstr += '<locations><location>'
        outstr += '<location_string>%s</location_string>' % (locstr)
        outstr += '<latitude>%s</latitude>' % (lat)
        outstr += '<longitude>%s</longitude>' % (lng)
        outstr += '</location></locations>'
        outstr += '<dateTimeDurations><dateTimeDuration>'
        outstr += '<startDate>%s</startDate>' % (start_date)
        if start_time:
            outstr += '<startTime>%s</startTime>' % (start_time)
        if open_ended:
            outstr += '<openEnded>Yes</openEnded>'
        else:
            outstr += '<openEnded>No</openEnded>'
            outstr += '<endDate>%s</endDate>' % (end_date)
            if end_time:
                outstr += '<endTime>%s</endTime>' % (end_time)
        outstr += '</dateTimeDuration></dateTimeDurations>'
        outstr += '</VolunteerOpportunity>\n'
        numopps += 1
    outstr += '</VolunteerOpportunities>'
    outstr += '</FootprintFeed>'

    return outstr, numorgs, numopps
コード例 #6
0
def parse(instr, maxrec, progress):
  """return FPXML given 350.org data"""
  feed = xmlh.parse_or_die(instr.encode('utf-8'))

  org_id = str(139)
  mission_statement = "350.org is an international campaign that's building a movement to unite the world around solutions to the climate crisis--the solutions that science and justice demand."
  org_desc = "On October 10 we'll be helping host a Global Work Party, with thousands of communities setting up solar panels or digging community gardens or laying out bike paths."

  start_date = '2010-10-01'
  today = datetime.now()
  last_updated = today.strftime("%Y-%m-%dT%H:%M:%S")

  numorgs = 1
  numopps = 0
  xmlh.print_progress("loading 350.org custom XML...")

  # convert to footprint format
  outstr = '<?xml version="1.0" ?>'
  outstr += '<FootprintFeed schemaVersion="0.1">'
  outstr += '<FeedInfo>'
  outstr += xmlh.output_val('providerID', org_id)
  outstr += xmlh.output_val('providerName', "350org")
  outstr += xmlh.output_val('feedID', "350org")
  outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
  outstr += xmlh.output_val('providerURL', "http://www.350.org/")
  outstr += '</FeedInfo>'
  # 1 "organization" in 350.org postings
  outstr += '<Organizations><Organization>'
  outstr += xmlh.output_val('organizationID', org_id)
  outstr += '<nationalEIN></nationalEIN>'
  outstr += '<name>350.org</name>'
  outstr += xmlh.output_val('missionStatement', mission_statement)
  outstr += xmlh.output_val('description', org_desc)
  outstr += '<location>'
  outstr += xmlh.output_val("city", "")
  outstr += xmlh.output_val("region", "")
  outstr += xmlh.output_val("postalCode", "")
  outstr += '</location>'
  # TODO: make these variables
  outstr += '<organizationURL>http://www.350.org/</organizationURL>'
  outstr += '<donateURL>http://www.350.org/donate</donateURL>'
  outstr += '<logoURL>http://www.350.org/sites/all/themes/threefifty/logo.gif</logoURL>'
  outstr += '<detailURL>http://www.350.org/about</detailURL>'
  outstr += '</Organization></Organizations>'

  outstr += '\n<VolunteerOpportunities>\n'
  nodes = feed.getElementsByTagName('node')
  for i, node in enumerate(nodes):
    if maxrec > 0 and i > maxrec:
       break
    title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>'
    desc = '<![CDATA[' + xmlh.get_tag_val(node, "Body") + ']]>'
    url = xmlh.get_tag_val(node, "Link")
    lat = xmlh.get_tag_val(node, "Latitude")
    lng = xmlh.get_tag_val(node, "Longitude")

    start_datetime = xmlh.get_tag_val(node, "Start_Date")
    start_time = None
    if not start_datetime:
      start_date = "2010-10-10"
    else:
      start_datetime = start_datetime.replace(" (All day)",  "T00:00:00")
      dt = start_datetime.split("T")
      start_date = dt[0][0:10]
      if len(dt) > 1:
        start_time = dt[1]

    end_datetime = xmlh.get_tag_val(node, "End_Date")
    end_time = None
    if not end_datetime:
      open_ended = True
    else:
      open_ended = False
      end_datetime = end_datetime.replace(" (All day)",  "T23:00:00")
      dt = end_datetime.split("T")
      end_date = dt[0][0:10]
      if len(dt) > 1:
        end_time = dt[1]
      
    end_datetime = xmlh.get_tag_val(node, "End_Date")
    locstr = "%s, %s %s" % (xmlh.get_tag_val(node, "City"), 
                            xmlh.get_tag_val(node, "Province"), 
                            xmlh.get_tag_val(node, "Country"))

    outstr += '<VolunteerOpportunity>'
    outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (str(i))
    outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (org_id)
    outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % (org_id)
    outstr += '<title>%s</title>' % (title)
    outstr += '<detailURL>%s</detailURL>' % (url)
    outstr += '<description>%s</description>' % (desc)
    outstr += '<abstract>%s</abstract>' % (desc)
    outstr += '<lastUpdated>%s</lastUpdated>' %(last_updated)
    outstr += '<locations><location>'
    outstr += '<location_string>%s</location_string>' % (locstr)
    outstr += '<latitude>%s</latitude>' % (lat)
    outstr += '<longitude>%s</longitude>' % (lng)
    outstr += '</location></locations>'
    outstr += '<dateTimeDurations><dateTimeDuration>'
    outstr += '<startDate>%s</startDate>' % (start_date)
    if start_time:
      outstr += '<startTime>%s</startTime>' % (start_time)
    if open_ended:
      outstr += '<openEnded>Yes</openEnded>'
    else:
      outstr += '<openEnded>No</openEnded>'
      outstr += '<endDate>%s</endDate>' % (end_date)
      if end_time:
        outstr += '<endTime>%s</endTime>' % (end_time)
    outstr += '</dateTimeDuration></dateTimeDurations>'
    outstr += '</VolunteerOpportunity>\n'
    numopps += 1
  outstr += '</VolunteerOpportunities>'
  outstr += '</FootprintFeed>'

  return outstr, numorgs, numopps
コード例 #7
0
    outstr += '<organizationURL>http://www.idealist.org/</organizationURL>'
    outstr += '<donateURL>http://www.idealist.org/</donateURL>'
    outstr += '<logoURL>http://www.idealist.org/css/skin02/images/logoBG.png</logoURL>'
    outstr += '<detailURL>http://www.idealist.org/</detailURL>'
    outstr += '</Organization></Organizations>'

    outstr += '\n<VolunteerOpportunities>\n'
    try:
        nodes = feed.getElementsByTagName('entry')
    except:
        nodes = []

    for i, node in enumerate(nodes):
        if maxrec > 0 and i > maxrec:
            break
        title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>'
        desc = '<![CDATA[' + xmlh.get_tag_val(node, "summary") + ']]>'
        url = xmlh.get_tag_val(node, "id")

        start_date = last_updated
        open_ended = True
        outstr += '<VolunteerOpportunity>'
        outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (
            str(i))
        outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>'
        outstr += org_id
        outstr += '</sponsoringOrganizationID></sponsoringOrganizationIDs>'
        outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>'
        outstr += org_id
        outstr += '</volunteerHubOrganizationID></volunteerHubOrganizationIDs>'
        outstr += '<title>%s</title>' % (title)
コード例 #8
0
def parse(instr, maxrec, progress):
    """return FPXML given sparked feed data"""
    feed = xmlh.parse_or_die(instr.encode('utf-8'))

    org_id = str(139)
    mission_statement = "Sparked makes it easy for people with busy lives to help nonprofits get valuable work done when it's convenient. We call it microvolunteering. Through the convenience of the Internet, and with the collaboration of others, micro-volunteers use their professional skills to help causes they care about."
    org_desc = "Sparked is the world's first Microvolunteering network"

    today = datetime.now()
    last_updated = today.strftime("%Y-%m-%dT%H:%M:%S")
    start_date = last_updated

    numorgs = 1
    numopps = 0
    xmlh.print_progress("loading sparked.com custom XML...")

    # convert to footprint format
    outstr = '<?xml version="1.0" ?>'
    outstr += '<FootprintFeed schemaVersion="0.1">'
    outstr += '<FeedInfo>'
    outstr += xmlh.output_val('providerID', org_id)
    outstr += xmlh.output_val('providerName', "sparked")
    outstr += xmlh.output_val('feedID', "sparked")
    outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
    outstr += xmlh.output_val('providerURL', "http://www.sparked.com/")
    outstr += '</FeedInfo>'
    # 1 "organization" in sparked.com postings
    outstr += '<Organizations><Organization>'
    outstr += xmlh.output_val('organizationID', org_id)
    outstr += '<nationalEIN></nationalEIN>'
    outstr += '<name>sparked.com</name>'
    outstr += xmlh.output_val('missionStatement', mission_statement)
    outstr += xmlh.output_val('description', org_desc)
    outstr += '<location>'
    outstr += xmlh.output_val("city", "San Francisco")
    outstr += xmlh.output_val("region", "CA")
    outstr += xmlh.output_val("postalCode", "94105")
    outstr += '</location>'
    outstr += '<organizationURL>http://www.sparked.com/</organizationURL>'
    outstr += '<donateURL>http://www.sparked.com/</donateURL>'
    outstr += '<logoURL>http://www.sparked.com/imgver4/logo_sparked.gif</logoURL>'
    outstr += '<detailURL>http://www.sparked.com/</detailURL>'
    outstr += '</Organization></Organizations>'

    outstr += '\n<VolunteerOpportunities>\n'
    nodes = feed.getElementsByTagName('challenge')
    for i, node in enumerate(nodes):
        if maxrec > 0 and i > maxrec:
            break
        title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>'
        desc = '<![CDATA[' + xmlh.get_tag_val(node, "description") + ']]>'
        url = xmlh.get_tag_val(node, "url")

        start_date = last_updated
        open_ended = True
        #01234567
        #02/15/11
        mdy = xmlh.get_tag_val(node, "deadline")
        if mdy:
            try:
                end_date = str(2000 +
                               int(mdy[6:])) + "-" + mdy[0:2] + "-" + mdy[3:5]
                open_ended = False
            except:
                pass
        outstr += '<VolunteerOpportunity>'
        outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (
            str(i))
        outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (
            org_id)
        outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % (
            org_id)
        outstr += '<micro>Yes</micro>'
        outstr += '<title>%s</title>' % (title)
        outstr += '<detailURL>%s</detailURL>' % (url)
        outstr += '<description>%s</description>' % (desc)
        outstr += '<abstract>%s</abstract>' % (desc)
        outstr += '<lastUpdated>%s</lastUpdated>' % (last_updated)
        outstr += '<dateTimeDurations><dateTimeDuration>'
        outstr += '<startDate>%s</startDate>' % (start_date)
        if open_ended:
            outstr += '<openEnded>Yes</openEnded>'
        else:
            outstr += '<openEnded>No</openEnded>'
            outstr += '<endDate>%s</endDate>' % (end_date)
        outstr += '</dateTimeDuration></dateTimeDurations>'
        outstr += '<locations><location><virtual>Yes</virtual></location></locations>'
        outstr += '</VolunteerOpportunity>\n'
        numopps += 1
    outstr += '</VolunteerOpportunities>'
    outstr += '</FootprintFeed>'

    return outstr, numorgs, numopps
コード例 #9
0
  outstr += '<organizationURL>http://www.idealist.org/</organizationURL>'
  outstr += '<donateURL>http://www.idealist.org/</donateURL>'
  outstr += '<logoURL>http://www.idealist.org/css/skin02/images/logoBG.png</logoURL>'
  outstr += '<detailURL>http://www.idealist.org/</detailURL>'
  outstr += '</Organization></Organizations>'

  outstr += '\n<VolunteerOpportunities>\n'
  try:
    nodes = feed.getElementsByTagName('entry')
  except:
    nodes = []
    
  for i, node in enumerate(nodes):
    if maxrec > 0 and i > maxrec:
       break
    title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>'
    desc = '<![CDATA[' + xmlh.get_tag_val(node, "summary") + ']]>'
    url = xmlh.get_tag_val(node, "id")

    start_date = last_updated
    open_ended = True
    outstr += '<VolunteerOpportunity>'
    outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (str(i))
    outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>'
    outstr += org_id
    outstr += '</sponsoringOrganizationID></sponsoringOrganizationIDs>'
    outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>'
    outstr += org_id 
    outstr += '</volunteerHubOrganizationID></volunteerHubOrganizationIDs>'
    outstr += '<title>%s</title>' % (title)
    outstr += '<detailURL>%s</detailURL>' % (url)
コード例 #10
0
 def get_val(self, field):
   """return a value for this record"""
   return xmlh.get_tag_val(self.opp, field)
コード例 #11
0
 def tag_by_source_id(self, rec, feedinfo):
   """matches the feed_providerID against the list of vetted IDs."""
   if xmlh.get_tag_val(feedinfo, "providerID") in self.id_list:
     return 1.0
   return 0.0
コード例 #12
0
 def parse(instr, maxrecs, progress):
   numorgs = numopps = 0
   instr = re.sub(r'<(/?db):', r'<\1_', instr)
   opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>',
                     instr, re.DOTALL)
   volopps = ""
   for i, oppstr in enumerate(opps):
     #if progress and i > 0 and i % 250 == 0:
     #  print str(datetime.now())+": ", i, " opportunities processed."
     if (maxrecs > 0 and i > maxrecs):
       break
     xmlh.print_rps_progress("opps", progress, i, maxrecs)
 
     item = xmlh.simple_parser(oppstr, known_elnames, progress=False)
 
     orgid = register_org(item)
 
     # logoURL -- sigh, this is for the opportunity not the org
     volopps += '<VolunteerOpportunity>'
     volopps += xmlh.output_val('volunteerOpportunityID', str(i))
     volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid))
     volopps += xmlh.output_node('volunteerHubOrganizationID', item, "LocalID")
     volopps += xmlh.output_node('title', item, "Title")
     volopps += xmlh.output_node('abstract', item, "Description")
     volopps += xmlh.output_node('description', item, "Description")
     volopps += xmlh.output_node('detailURL', item, "DetailURL")
     volopps += xmlh.output_val('volunteersNeeded', "-8888")
 
     try:
       oppdates = item.getElementsByTagName("OpportunityDate")
     except:
       oppdates = []
     
     if len(oppdates) > 1:
       print datetime.now(), \
           "parse_servenet.py: only 1 OpportunityDate supported."
       #return None
       oppdate = oppdates[0]
     elif len(oppdates) == 0:
       oppdate = None
     else:
       oppdate = oppdates[0]
     volopps += '<dateTimeDurations><dateTimeDuration>'
 
     if oppdate:
       volopps += xmlh.output_val('openEnded', 'No')
       volopps += xmlh.output_val('duration', 'P%s%s' % 
                                 (xmlh.get_tag_val(oppdate, "DurationQuantity"),
                                  xmlh.get_tag_val(oppdate, "DurationUnit")))
       volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
       volopps += xmlh.output_node('startDate', oppdate, "StartDate")
       volopps += xmlh.output_node('endDate', oppdate, "EndDate")
     else:
       volopps += xmlh.output_val('openEnded', 'Yes')
       volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
     volopps += '</dateTimeDuration></dateTimeDurations>'
 
     volopps += '<locations>'
     try:
       opplocs = item.getElementsByTagName("Location")
     except:
       opplocs = []
     for opploc in opplocs:
       volopps += '<location>'
       virtual_tag = opploc.getElementsByTagName("Virtual")
       if virtual_tag and xmlh.get_tag_val(opploc, "Virtual").lower() == "yes":
         volopps += xmlh.output_val('virtual', 'Yes')
       else:
         volopps += xmlh.output_node('region', opploc, "StateOrProvince")
         volopps += xmlh.output_node('country', opploc, "Country")
         volopps += xmlh.output_node('postalCode', opploc, "ZipOrPostalCode")
       volopps += '</location>'
     volopps += '</locations>'
     volopps += '<categoryTags/>'
     volopps += '</VolunteerOpportunity>'
     numopps += 1
     
   # convert to footprint format
   outstr = '<?xml version="1.0" ?>'
   outstr += '<FootprintFeed schemaVersion="0.1">'
   outstr += '<FeedInfo>'
   outstr += xmlh.output_val('providerID', providerID)
   outstr += xmlh.output_val('providerName', providerName)
   outstr += xmlh.output_val('feedID', feedID)
   outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
   outstr += xmlh.output_val('providerURL', providerURL)
   outstr += xmlh.output_val('description', feedDescription)
   # TODO: capture ts -- use now?!
   outstr += '</FeedInfo>'
 
   # hardcoded: Organization
   outstr += '<Organizations>'
   for key in ORGS:
     outstr += ORGS[key]
     numorgs += 1
   outstr += '</Organizations>'
   outstr += '<VolunteerOpportunities>'
   outstr += volopps
   outstr += '</VolunteerOpportunities>'
   outstr += '</FootprintFeed>'
 
   #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr)
   return outstr, numorgs, numopps
コード例 #13
0
                             'clientID' : CLIENT_ID})
  try:
    maps_fh = urllib2.urlopen("http://maps.google.com/maps/geo?%s" % params)
    res = maps_fh.read()
    maps_fh.close()
  except IOError, err:
    print_debug("geocode_call: Error contacting Maps API. Sleeping. " + str(err))
    time.sleep(1)
    return geocode_call(query, retries - 1)

  #print_debug("response length: "+str(len(res)))
  if re.search(r'403 Forbidden', res):
    respcode = 403
  else:
    node = xmlh.simple_parser(res, [], False)
    respcode = xmlh.get_tag_val(node, "code")
    if respcode == "":
      #print_debug("unparseable response: "+res)
      return False

  respcode = int(respcode)
  if respcode in (400, 601, 602, 603):  # problem with the query
    return None

  if respcode in (403, 500, 620):  # problem with the server
    print_debug("geocode_call: Connection problem or quota exceeded.  Sleeping...")
    if retries == 4:
      xmlh.print_progress("geocoder: %d" % respcode, "", SHOW_PROGRESS)
    time.sleep(5)
    return geocode_call(query, retries - 1)
コード例 #14
0
def parse(instr, maxrecs, progress):
    """return FPXML given usaservice data"""
    # TODO: progress
    known_elnames = [
        'channel',
        'db:abstract',
        'db:address',
        'db:attendee_count',
        'db:categories',
        'db:city',
        'db:country',
        'db:county',
        'db:dateTime',
        'db:event',
        'db:eventType',
        'db:guest_total',
        'db:host',
        'db:latitude',
        'db:length',
        'db:longitude',
        'db:rsvp',
        'db:scheduledTime',
        'db:state',
        'db:street',
        'db:title',
        'db:venue_name',
        'db:zipcode',
        'description',
        'docs',
        'guid',
        'item',
        'language',
        'link',
        'pubDate',
        'rss',
        'title',
    ]

    # convert to footprint format
    s = '<?xml version="1.0" ?>'
    s += '<FootprintFeed schemaVersion="0.1">'
    s += '<FeedInfo>'
    # TODO: assign provider IDs?
    s += '<providerID>101</providerID>'
    s += '<providerName>usaservice.org</providerName>'
    s += '<feedID>1</feedID>'
    s += '<createdDateTime>%s</createdDateTime>' % xmlh.current_ts()
    s += '<providerURL>http://www.usaservice.org/</providerURL>'
    s += '<description>Syndicated events</description>'
    # TODO: capture ts -- use now?!
    s += '</FeedInfo>'

    numorgs = numopps = 0
    # hardcoded: Organization
    s += '<Organizations>'
    s += '<Organization>'
    s += '<organizationID>0</organizationID>'
    s += '<nationalEIN></nationalEIN>'
    s += '<name></name>'
    s += '<missionStatement></missionStatement>'
    s += '<description></description>'
    s += '<location><city></city><region></region><postalCode></postalCode></location>'
    s += '<organizationURL></organizationURL>'
    s += '<donateURL></donateURL>'
    s += '<logoURL></logoURL>'
    s += '<detailURL></detailURL>'
    s += '</Organization>'
    numorgs += 1
    s += '</Organizations>'

    s += '<VolunteerOpportunities>'

    instr = re.sub(r'<(/?db):', r'<\1_', instr)
    for i, line in enumerate(instr.splitlines()):
        if (maxrecs > 0 and i > maxrecs):
            break
        xmlh.print_rps_progress("opps", progress, i, maxrecs)
        item = xmlh.simple_parser(line, known_elnames, progress=False)

        # unmapped: db_rsvp  (seems to be same as link, but with #rsvp at end of url?)
        # unmapped: db_host  (no equivalent?)
        # unmapped: db_county  (seems to be empty)
        # unmapped: attendee_count
        # unmapped: guest_total
        # unmapped: db_title   (dup of title, above)
        s += '<VolunteerOpportunity>'
        s += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (
            xmlh.get_tag_val(item, "guid"))
        # hardcoded: sponsoringOrganizationID
        s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>'
        # hardcoded: volunteerHubOrganizationID
        s += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>0</volunteerHubOrganizationID></volunteerHubOrganizationIDs>'
        s += '<title>%s</title>' % (xmlh.get_tag_val(item, "title"))
        s += '<abstract>%s</abstract>' % (xmlh.get_tag_val(item, "abstract"))
        s += '<volunteersNeeded>-8888</volunteersNeeded>'

        dbscheduledTimes = item.getElementsByTagName("db_scheduledTime")
        if (dbscheduledTimes.length != 1):
            print datetime.now(
            ), "parse_usaservice: only 1 db_scheduledTime supported."
            return None
        dbscheduledTime = dbscheduledTimes[0]
        s += '<dateTimeDurations><dateTimeDuration>'
        length = xmlh.get_tag_val(dbscheduledTime, "db_length")
        if length == "" or length == "-1":
            s += '<openEnded>Yes</openEnded>'
        else:
            s += '<openEnded>No</openEnded>'
        date, time = xmlh.get_tag_val(dbscheduledTime,
                                      "db_dateTime").split(" ")
        s += '<startDate>%s</startDate>' % (date)
        # TODO: timezone???
        s += '<startTime>%s</startTime>' % (time)
        s += '</dateTimeDuration></dateTimeDurations>'

        dbaddresses = item.getElementsByTagName("db_address")
        if (dbaddresses.length != 1):
            print datetime.now(
            ), "parse_usaservice: only 1 db_address supported."
            return None
        dbaddress = dbaddresses[0]
        s += '<locations><location>'
        s += '<name>%s</name>' % (xmlh.get_tag_val(item, "db_venue_name"))
        s += '<streetAddress1>%s</streetAddress1>' % (xmlh.get_tag_val(
            dbaddress, "db_street"))
        s += '<city>%s</city>' % (xmlh.get_tag_val(dbaddress, "db_city"))
        s += '<region>%s</region>' % (xmlh.get_tag_val(dbaddress, "db_state"))
        s += '<country>%s</country>' % (xmlh.get_tag_val(
            dbaddress, "db_country"))
        s += '<postalCode>%s</postalCode>' % (xmlh.get_tag_val(
            dbaddress, "db_zipcode"))
        s += '<latitude>%s</latitude>' % (xmlh.get_tag_val(
            item, "db_latitude"))
        s += '<longitude>%s</longitude>' % (xmlh.get_tag_val(
            item, "db_longitude"))
        s += '</location></locations>'

        type = xmlh.get_tag_val(item, "db_eventType")
        s += '<categoryTags><categoryTag>%s</categoryTag></categoryTags>' % (
            type)

        s += '<contactName>%s</contactName>' % xmlh.get_tag_val(
            item, "db_host")
        s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(item, "link"))
        s += '<description>%s</description>' % (xmlh.get_tag_val(
            item, "description"))
        pubdate = xmlh.get_tag_val(item, "pubDate")
        if re.search("[0-9][0-9] [A-Z][a-z][a-z] [0-9][0-9][0-9][0-9]",
                     pubdate):
            # TODO: parse() is ignoring timzone...
            ts = dateutil.parser.parse(pubdate)
            pubdate = ts.strftime("%Y-%m-%dT%H:%M:%S")
        s += '<lastUpdated>%s</lastUpdated>' % (pubdate)
        s += '</VolunteerOpportunity>'
        numopps += 1

    s += '</VolunteerOpportunities>'
    s += '</FootprintFeed>'
    #s = re.sub(r'><([^/])', r'>\n<\1', s)
    return s, numorgs, numopps
コード例 #15
0
    def parse(instr, maxrecs, progress):
        numorgs = numopps = 0
        instr = re.sub(r'<(/?db):', r'<\1_', instr)
        opps = re.findall(r'<VolunteerOpportunity>.+?</VolunteerOpportunity>',
                          instr, re.DOTALL)
        volopps = ""
        for i, oppstr in enumerate(opps):
            #if progress and i > 0 and i % 250 == 0:
            #  print str(datetime.now())+": ", i, " opportunities processed."
            if (maxrecs > 0 and i > maxrecs):
                break
            xmlh.print_rps_progress("opps", progress, i, maxrecs)

            item = xmlh.simple_parser(oppstr, known_elnames, progress=False)

            orgid = register_org(item)

            # logoURL -- sigh, this is for the opportunity not the org
            volopps += '<VolunteerOpportunity>'
            volopps += xmlh.output_val('volunteerOpportunityID', str(i))
            volopps += xmlh.output_val('sponsoringOrganizationID', str(orgid))
            volopps += xmlh.output_node('volunteerHubOrganizationID', item,
                                        "LocalID")
            volopps += xmlh.output_node('title', item, "Title")
            volopps += xmlh.output_node('abstract', item, "Description")
            volopps += xmlh.output_node('description', item, "Description")
            volopps += xmlh.output_node('detailURL', item, "DetailURL")
            volopps += xmlh.output_val('volunteersNeeded', "-8888")

            try:
                oppdates = item.getElementsByTagName("OpportunityDate")
            except:
                oppdates = []

            if len(oppdates) > 1:
                print datetime.now(), \
                    "parse_servenet.py: only 1 OpportunityDate supported."
                #return None
                oppdate = oppdates[0]
            elif len(oppdates) == 0:
                oppdate = None
            else:
                oppdate = oppdates[0]
            volopps += '<dateTimeDurations><dateTimeDuration>'

            if oppdate:
                volopps += xmlh.output_val('openEnded', 'No')
                volopps += xmlh.output_val(
                    'duration',
                    'P%s%s' % (xmlh.get_tag_val(oppdate, "DurationQuantity"),
                               xmlh.get_tag_val(oppdate, "DurationUnit")))
                volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
                volopps += xmlh.output_node('startDate', oppdate, "StartDate")
                volopps += xmlh.output_node('endDate', oppdate, "EndDate")
            else:
                volopps += xmlh.output_val('openEnded', 'Yes')
                volopps += xmlh.output_val('commitmentHoursPerWeek', '0')
            volopps += '</dateTimeDuration></dateTimeDurations>'

            volopps += '<locations>'
            try:
                opplocs = item.getElementsByTagName("Location")
            except:
                opplocs = []
            for opploc in opplocs:
                volopps += '<location>'
                virtual_tag = opploc.getElementsByTagName("Virtual")
                if virtual_tag and xmlh.get_tag_val(
                        opploc, "Virtual").lower() == "yes":
                    volopps += xmlh.output_val('virtual', 'Yes')
                else:
                    volopps += xmlh.output_node('region', opploc,
                                                "StateOrProvince")
                    volopps += xmlh.output_node('country', opploc, "Country")
                    volopps += xmlh.output_node('postalCode', opploc,
                                                "ZipOrPostalCode")
                volopps += '</location>'
            volopps += '</locations>'
            volopps += '<categoryTags/>'
            volopps += '</VolunteerOpportunity>'
            numopps += 1

        # convert to footprint format
        outstr = '<?xml version="1.0" ?>'
        outstr += '<FootprintFeed schemaVersion="0.1">'
        outstr += '<FeedInfo>'
        outstr += xmlh.output_val('providerID', providerID)
        outstr += xmlh.output_val('providerName', providerName)
        outstr += xmlh.output_val('feedID', feedID)
        outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
        outstr += xmlh.output_val('providerURL', providerURL)
        outstr += xmlh.output_val('description', feedDescription)
        # TODO: capture ts -- use now?!
        outstr += '</FeedInfo>'

        # hardcoded: Organization
        outstr += '<Organizations>'
        for key in ORGS:
            outstr += ORGS[key]
            numorgs += 1
        outstr += '</Organizations>'
        outstr += '<VolunteerOpportunities>'
        outstr += volopps
        outstr += '</VolunteerOpportunities>'
        outstr += '</FootprintFeed>'

        #outstr = re.sub(r'><([^/])', r'>\n<\1', outstr)
        return outstr, numorgs, numopps
コード例 #16
0
def parse(s, maxrecs, progress):
    """return FPXML given volunteermatch data"""
    # TODO: progress
    known_elnames = [
        'feed',
        'title',
        'subtitle',
        'div',
        'span',
        'updated',
        'id',
        'link',
        'icon',
        'logo',
        'author',
        'name',
        'uri',
        'email',
        'rights',
        'entry',
        'published',
        'g:publish_date',
        'g:expiration_date',
        'g:event_date_range',
        'g:start',
        'g:end',
        'updated',
        'category',
        'summary',
        'content',
        'awb:city',
        'awb:country',
        'awb:state',
        'awb:postalcode',
        'g:location',
        'g:age_range',
        'g:employer',
        'g:job_type',
        'g:job_industry',
        'awb:paid',
    ]
    xmldoc = xmlh.simple_parser(s, known_elnames, progress)

    pubdate = xmlh.get_tag_val(xmldoc, "created")
    ts = dateutil.parser.parse(pubdate)
    pubdate = ts.strftime("%Y-%m-%dT%H:%M:%S")

    # convert to footprint format
    s = '<?xml version="1.0" ?>'
    s += '<FootprintFeed schemaVersion="0.1">'
    s += '<FeedInfo>'
    # TODO: assign provider IDs?
    s += '<providerID>104</providerID>'
    s += '<providerName>volunteermatch.org</providerName>'
    s += '<feedID>1</feedID>'
    s += '<providerURL>http://www.volunteermatch.org/</providerURL>'
    s += '<createdDateTime>%s</createdDateTime>' % (pubdate)
    s += '<description></description>'
    s += '</FeedInfo>'

    numorgs = numopps = 0

    # hardcoded: Organization
    s += '<Organizations>'
    items = xmldoc.getElementsByTagName("listing")
    if (maxrecs > items.length or maxrecs == -1):
        maxrecs = items.length

    for item in items[0:maxrecs]:
        orgs = item.getElementsByTagName("parent")
        if (orgs.length == 1):
            org = orgs[0]
            s += '<Organization>'
            s += '<organizationID>%s</organizationID>' % (xmlh.get_tag_val(
                org, "key"))
            s += '<nationalEIN></nationalEIN>'
            s += '<name>%s</name>' % (xmlh.get_tag_val(org, "name"))
            s += '<missionStatement></missionStatement>'
            s += '<description></description>'
            s += '<location><city></city><region></region><postalCode></postalCode></location>'
            s += '<organizationURL>%s</organizationURL>' % (xmlh.get_tag_val(
                org, "URL"))
            s += '<donateURL></donateURL>'
            s += '<logoURL></logoURL>'
            s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(
                org, "detailURL"))
            s += '</Organization>'
            numorgs += 1
        else:
            print datetime.now(
            ), "parse_volunteermatch: listing does not have an organization"
            return None

    s += '</Organizations>'

    s += '<VolunteerOpportunities>'
    items = xmldoc.getElementsByTagName("listing")
    for item in items[0:maxrecs]:
        s += '<VolunteerOpportunity>'
        s += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (
            xmlh.get_tag_val(item, "key"))

        orgs = item.getElementsByTagName("parent")
        if (orgs.length == 1):
            org = orgs[0]
            s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (
                xmlh.get_tag_val(org, "key"))
        else:
            s += '<sponsoringOrganizationIDs><sponsoringOrganizationID>0</sponsoringOrganizationID></sponsoringOrganizationIDs>'
            print datetime.now(
            ), "parse_volunteermatch: listing does not have an organization"

        s += '<title>%s</title>' % (xmlh.get_tag_val(item, "title"))

        s += '<volunteersNeeded>-8888</volunteersNeeded>'

        s += '<dateTimeDurations><dateTimeDuration>'
        durations = xmlh.get_children_by_tagname(item, "duration")
        if (len(durations) == 1):
            duration = durations[0]
            ongoing = duration.getAttribute("ongoing")
            if (ongoing == 'true'):
                s += '<openEnded>Yes</openEnded>'
            else:
                s += '<openEnded>No</openEnded>'

            listingTimes = duration.getElementsByTagName("listingTime")
            if (listingTimes.length == 1):
                listingTime = listingTimes[0]
                s += '<startTime>%s</startTime>' % (xmlh.get_tag_val(
                    listingTime, "startTime"))
                s += '<endTime>%s</endTime>' % (xmlh.get_tag_val(
                    listingTime, "endTime"))
        else:
            print datetime.now(
            ), "parse_volunteermatch: number of durations in item != 1"
            return None

        commitments = item.getElementsByTagName("commitment")
        l_period = l_duration = ""
        if (commitments.length == 1):
            commitment = commitments[0]
            l_num = xmlh.get_tag_val(commitment, "num")
            l_duration = xmlh.get_tag_val(commitment, "duration")
            l_period = xmlh.get_tag_val(commitment, "period")
            if ((l_duration == "hours") and (l_period == "week")):
                s += '<commitmentHoursPerWeek>' + l_num + '</commitmentHoursPerWeek>'
            elif ((l_duration == "hours") and (l_period == "day")):
                # note: weekdays only
                s += '<commitmentHoursPerWeek>' + str(
                    int(l_num) * 5) + '</commitmentHoursPerWeek>'
            elif ((l_duration == "hours") and (l_period == "month")):
                hrs = int(float(l_num) / 4.0)
                if hrs < 1: hrs = 1
                s += '<commitmentHoursPerWeek>' + str(
                    hrs) + '</commitmentHoursPerWeek>'
            elif ((l_duration == "hours") and (l_period == "event")):
                # TODO: ignore for now, later compute the endTime if not already provided
                pass
            else:
                print datetime.now(
                ), "parse_volunteermatch: commitment given in units != hours/week: ", l_duration, "per", l_period

        s += '</dateTimeDuration></dateTimeDurations>'

        dbaddresses = item.getElementsByTagName("location")
        if (dbaddresses.length != 1):
            print datetime.now(
            ), "parse_volunteermatch: only 1 location supported."
            return None
        dbaddress = dbaddresses[0]
        s += '<locations><location>'
        s += '<streetAddress1>%s</streetAddress1>' % (xmlh.get_tag_val(
            dbaddress, "street1"))
        s += '<city>%s</city>' % (xmlh.get_tag_val(dbaddress, "city"))
        s += '<region>%s</region>' % (xmlh.get_tag_val(dbaddress, "region"))
        s += '<postalCode>%s</postalCode>' % (xmlh.get_tag_val(
            dbaddress, "postalCode"))

        geolocs = item.getElementsByTagName("geolocation")
        if (geolocs.length == 1):
            geoloc = geolocs[0]
            s += '<latitude>%s</latitude>' % (xmlh.get_tag_val(
                geoloc, "latitude"))
            s += '<longitude>%s</longitude>' % (xmlh.get_tag_val(
                geoloc, "longitude"))

        s += '</location></locations>'

        s += '<audienceTags>'
        audiences = item.getElementsByTagName("audience")
        for audience in audiences:
            type = xmlh.node_data(audience)
            s += '<audienceTag>%s</audienceTag>' % (type)
        s += '</audienceTags>'

        s += '<categoryTags>'
        categories = item.getElementsByTagName("category")
        for category in categories:
            type = xmlh.node_data(category)
            s += '<categoryTag>%s</categoryTag>' % (type)
        s += '</categoryTags>'

        s += '<skills>%s</skills>' % (xmlh.get_tag_val(item, "skill"))

        s += '<detailURL>%s</detailURL>' % (xmlh.get_tag_val(
            item, "detailURL"))
        s += '<description>%s</description>' % (xmlh.get_tag_val(
            item, "description"))

        expires = xmlh.get_tag_val(item, "expires")
        ts = dateutil.parser.parse(expires)
        expires = ts.strftime("%Y-%m-%dT%H:%M:%S")
        s += '<expires>%s</expires>' % (expires)

        s += '</VolunteerOpportunity>'
        numopps += 1

    s += '</VolunteerOpportunities>'
    s += '</FootprintFeed>'

    #s = re.sub(r'><([^/])', r'>\n<\1', s)
    #print(s)
    return s, numorgs, numopps
コード例 #17
0
def parse(instr, maxrec, progress):
  """return FPXML given sparked feed data"""
  feed = xmlh.parse_or_die(instr.encode('utf-8'))

  org_id = str(139)
  mission_statement = "Sparked makes it easy for people with busy lives to help nonprofits get valuable work done when it's convenient. We call it microvolunteering. Through the convenience of the Internet, and with the collaboration of others, micro-volunteers use their professional skills to help causes they care about."
  org_desc = "Sparked is the world's first Microvolunteering network"

  today = datetime.now()
  last_updated = today.strftime("%Y-%m-%dT%H:%M:%S")
  start_date = last_updated

  numorgs = 1
  numopps = 0
  xmlh.print_progress("loading sparked.com custom XML...")

  # convert to footprint format
  outstr = '<?xml version="1.0" ?>'
  outstr += '<FootprintFeed schemaVersion="0.1">'
  outstr += '<FeedInfo>'
  outstr += xmlh.output_val('providerID', org_id)
  outstr += xmlh.output_val('providerName', "sparked")
  outstr += xmlh.output_val('feedID', "sparked")
  outstr += xmlh.output_val('createdDateTime', xmlh.current_ts())
  outstr += xmlh.output_val('providerURL', "http://www.sparked.com/")
  outstr += '</FeedInfo>'
  # 1 "organization" in sparked.com postings
  outstr += '<Organizations><Organization>'
  outstr += xmlh.output_val('organizationID', org_id)
  outstr += '<nationalEIN></nationalEIN>'
  outstr += '<name>sparked.com</name>'
  outstr += xmlh.output_val('missionStatement', mission_statement)
  outstr += xmlh.output_val('description', org_desc)
  outstr += '<location>'
  outstr += xmlh.output_val("city", "San Francisco")
  outstr += xmlh.output_val("region", "CA")
  outstr += xmlh.output_val("postalCode", "94105")
  outstr += '</location>'
  outstr += '<organizationURL>http://www.sparked.com/</organizationURL>'
  outstr += '<donateURL>http://www.sparked.com/</donateURL>'
  outstr += '<logoURL>http://www.sparked.com/imgver4/logo_sparked.gif</logoURL>'
  outstr += '<detailURL>http://www.sparked.com/</detailURL>'
  outstr += '</Organization></Organizations>'

  outstr += '\n<VolunteerOpportunities>\n'
  nodes = feed.getElementsByTagName('challenge')
  for i, node in enumerate(nodes):
    if maxrec > 0 and i > maxrec:
       break
    title = '<![CDATA[' + xmlh.get_tag_val(node, "title") + ']]>'
    desc = '<![CDATA[' + xmlh.get_tag_val(node, "description") + ']]>'
    url = xmlh.get_tag_val(node, "url")

    start_date = last_updated
    open_ended = True
    #01234567
    #02/15/11
    mdy = xmlh.get_tag_val(node, "deadline")
    if mdy:
      try:
        end_date = str(2000 + int(mdy[6:])) + "-" + mdy[0:2] + "-" + mdy[3:5]
        open_ended = False
      except:
        pass
    outstr += '<VolunteerOpportunity>'
    outstr += '<volunteerOpportunityID>%s</volunteerOpportunityID>' % (str(i))
    outstr += '<sponsoringOrganizationIDs><sponsoringOrganizationID>%s</sponsoringOrganizationID></sponsoringOrganizationIDs>' % (org_id)
    outstr += '<volunteerHubOrganizationIDs><volunteerHubOrganizationID>%s</volunteerHubOrganizationID></volunteerHubOrganizationIDs>' % (org_id)
    outstr += '<micro>Yes</micro>'
    outstr += '<title>%s</title>' % (title)
    outstr += '<detailURL>%s</detailURL>' % (url)
    outstr += '<description>%s</description>' % (desc)
    outstr += '<abstract>%s</abstract>' % (desc)
    outstr += '<lastUpdated>%s</lastUpdated>' %(last_updated)
    outstr += '<dateTimeDurations><dateTimeDuration>'
    outstr += '<startDate>%s</startDate>' % (start_date)
    if open_ended:
      outstr += '<openEnded>Yes</openEnded>'
    else:
      outstr += '<openEnded>No</openEnded>'
      outstr += '<endDate>%s</endDate>' % (end_date)
    outstr += '</dateTimeDuration></dateTimeDurations>'
    outstr += '<locations><location><virtual>Yes</virtual></location></locations>'
    outstr += '</VolunteerOpportunity>\n'
    numopps += 1
  outstr += '</VolunteerOpportunities>'
  outstr += '</FootprintFeed>'

  return outstr, numorgs, numopps
コード例 #18
0
        maps_fh = urllib2.urlopen(url)
        res = maps_fh.read()
        maps_fh.close()
    except IOError, err:
        print_debug("geocode_call: Error calling Maps API" + str(err) + "\n" +
                    url)
        return False

    try:
        node = xmlh.simple_parser(res, [], False)
        node = node.getElementsByTagName('GeocodeResponse')[0]
    except:
        print_debug("unparseable response: " + res)
        return False

    respcode = xmlh.get_tag_val(node, "status")
    if respcode != "OK":
        print_debug("Maps API reponded " + respcode)
        return None

    if respcode == "UNKNOWN_ERROR":  # problem with the server
        print_debug("geocode_call: Connection problem.  retrying...")
        if retries > 0:
            time.sleep(3)
        return geocode_call(query, retries - 1)

    result_node = node.getElementsByTagName('result')[0]
    addr = xmlh.get_tag_val(result_node, "formatted_address")
    # removes "USA" from all addresses.
    addr = re.sub(r', USA$', r'', addr)