Ejemplos de getxml en Python, ejemplos de coltrane.utils.getxml en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: digg.py Proyecto: Schwanksta/tumblewire

	def __call__(self):
		
		# Get the users most recent diggs
		user_url = 'http://services.digg.com/user/%s/diggs/?appkey=%s&count=%s' % (self.username, self.api_key, self.count)
		user_xml = utils.getxml(user_url)
		
		# Parse out the story_id and datetime
		diggs = [(i.get('story'), i.get('date')) for i in user_xml.getchildren()]
		
		# A list of we'll ultimately pass out
		stories = []
		
		# Now loop through the diggs
		for story, date in diggs:
			# And pull information about the stories
			story_url = 'http://services.digg.com/story/%s/?appkey=%s' % (str(story), self.api_key)
			story_xml = utils.getxml(story_url)
			story_obj = story_xml
			
			# A dict to stuff all the good stuff in
			story_dict = {
				# Since the digg_date is expressed in epoch seconds, we can start like so...
				'date': dateutil.parser.parse((time.ctime(float(date)))),
			}
			
			# Loop through the story node
			story_node = story_obj.getiterator('story')
			for ele in story_node:
				# Get the link
				link = smart_unicode(ele.get('link'))
				story_dict['url'] = link
				
				# Get the title
				title_node = ele.find('title')
				story_dict['title'] = smart_unicode(title_node.text)
			
				# Get the description
				description_node = ele.find('description')
				story_dict['description'] = smart_unicode(description_node.text)
				
				# Get the topic
				topic_node = ele.find('topic')
				story_dict['topic'] = smart_unicode(topic_node.get('name'))
			
			# Pass the dict out to our list
			stories.append(story_dict)
			
		return stories

Ejemplo n.º 2

0

Mostrar archivo

Archivo: github.py Proyecto: joskid/palewi.re

 def get_latest_data(self):
     self.xml = utils.getxml(self.feed_url)
     self.commit_list = []
     self.entry_list = self.xml.getiterator('{http://www.w3.org/2005/Atom}entry')
     for entry in self.entry_list:
         title = entry.find('{http://www.w3.org/2005/Atom}title').text
         match = self.GITHUB_TITLE_REGEX.search(title)
         # If it doesn't match, it's one of the less important entries
         # like when you start watching somebody's repo.
         if not match:
             # And we can just skip those
             continue
         pub_date = self._extract_entry_pubdate(entry)
         html = entry.find('{http://www.w3.org/2005/Atom}content').text
         soup = BeautifulSoup(html)
         commits_html = soup.find('div', attrs={'class': 'commits'}).findAll('li')
         for commit_html in commits_html:
             # Create a dict to stuff the goodies
             entry_dict = {
                 'pub_date': pub_date,
                 'branch': smart_unicode(match.group('branch')),
                 'repository': smart_unicode(match.group('repository')),
                 'message': self._extract_commit_message(commit_html),
                 'url': self._extract_commit_url(commit_html),
             }
             # Add the dict to the entry list
             self.commit_list.append(entry_dict)
     # Pass out the commit_list
     return self.commit_list

Ejemplo n.º 3

0

Mostrar archivo

Archivo: digg.py Proyecto: arwelsh/palewi.re

 def get_latest_data(self):
     
     # Get the users most recent diggs
     self.base_url = 'http://services.digg.com/1.0/endpoint?method=user.getDiggs&username=%s&count=%s'
     self.url = self.base_url % (self.username, self.count)
     self.xml = utils.getxml(self.url)
     # Parse out the story_id and datetime
     self.diggs = [(i.get('story'), i.get('date')) for i in self.xml.getchildren()]
     # A list of we'll ultimately pass out
     self.link_list = []
     # Now loop through the diggs
     for story, date in self.diggs:
         # And pull information about the stories
         story_url = 'http://services.digg.com/2.0/story.getInfo?story_ids=%s' % str(story)
         story_json = utils.getjson(story_url)
         story_obj = story_json['stories'][0]
         # A dict to stuff all the good stuff in
         story_dict = {
             # Since the digg_date is expressed in epoch seconds, 
             # we can start like so...
             'date': utils.parsedate(time.ctime(float(date))),
         }
         # Get the link
         story_dict['url'] = smart_unicode(story_obj.get('url'))
         # Get the title
         story_dict['title'] = smart_unicode(story_obj.get('title'))
         story_dict['description'] = smart_unicode(story_obj.get('description'))
         # Get the topic
         story_dict['topic'] = smart_unicode(story_obj.get("topic").get('name'))
         # Pass the dict out to our list
         self.link_list.append(story_dict)
         
     return self.link_list

Ejemplo n.º 4

0

Mostrar archivo

Archivo: flixster.py Proyecto: arwelsh/palewi.re

 def get_latest_data(self):
     # Fetch the XML via web request
     self.url = 'http://www.flixster.com/api/v1/users/%s/ratings.rss' % self.username
     self.xml = utils.getxml(self.url)
     # Parse the XML down to the item entries
     self.channel = self.xml.find('channel')
     self.items = self.channel.findall('item')
     # Make a list to stuff all the cleaned data into.
     self.movies = []
     # Loop through all the entries
     for item in self.items:
         # Dictionary where we'll stuff all the goodies
         movie_dict = {}
         # Get the name of the movie
         title = item.find('title').text
         movie_dict['title'] = smart_unicode(title)
         # Get the URL to the review
         url = item.find('link').text
         movie_dict['url'] = smart_unicode(url)
         # Get the start rating, translate it to a float.
         rating = item.find('rating').text
         movie_dict['rating'] = self._prep_rating(rating)
         # Get the pubdate
         pub_date = item.find('pubDate').text
         movie_dict['pub_date'] = utils.parsedate(pub_date)
         # Add it to the list
         self.movies.append(movie_dict)
     return self.movies

Ejemplo n.º 5

0

Mostrar archivo

Archivo: lastfm.py Proyecto: arwelsh/palewi.re

 def _tags_for_url(self, url):
     tags = set()
     xml = utils.getxml(url)
     for t in xml.getiterator("tag"):
         count = utils.safeint(t.find("count").text)
         if count >= self.tag_usage_threshold:
             tag = slugify(smart_unicode(t.find("name").text))
             tags.add(tag[:50])
     return tags

Ejemplo n.º 6

0

Mostrar archivo

Archivo: lastfm.py Proyecto: joskid/palewi.re

 def _tags_for_url(self, url):
     tags = set()
     try:
         xml = utils.getxml(url)
     except HttpLib2Error, e:
         if e.code == 408:
             return ""
         else:
             raise

Ejemplo n.º 7

0

Mostrar archivo

Archivo: delicious.py Proyecto: Schwanksta/tumblewire

 def __call__(self, **params):
     # Enforce Yahoo's "no calls quicker than every 1 second" rule
     delta = time.time() - DeliciousClient.lastcall
     if delta < 2:
         time.sleep(2 - delta)
     DeliciousClient.lastcall = time.time()
     url = ("https://api.del.icio.us/%s?" % self.method) + urllib.urlencode(params)        
     xml = utils.getxml(url, username=self.username, password=self.password)
     return xml

Ejemplo n.º 8

0

Mostrar archivo

Archivo: github.py Proyecto: Schwanksta/tumblewire

    def __call__(self):

        # Fetch the XML via web request
        url = "http://github.com/%s.atom" % self.username
        xml = utils.getxml(url)
        # pdb.set_trace()
        commits = []

        GITHUB_TITLE_REGEX = re.compile(r"Schwanksta pushed to (?P<branch>(.*)) at (?P<repository>(.*))")

        # Loop through all the entries
        entries = list(xml.getiterator("{http://www.w3.org/2005/Atom}entry"))
        for entry in entries:

            # Grab the date
            pub_date = entry.find("{http://www.w3.org/2005/Atom}published").text

            # Grab the title
            title = entry.find("{http://www.w3.org/2005/Atom}title").text

            # Test it against our regex
            match = GITHUB_TITLE_REGEX.search(title)

            # If it doesn't match, it's one of the less important entries
            # like when you start watching somebody's repo.
            if not match:
                # And we can just skip those
                continue

            # Grab the HTML with the commits
            html = entry.find("{http://www.w3.org/2005/Atom}content").text
            soup = BeautifulSoup(html)
            commits_html = soup.find("div", attrs={"class": "commits"}).findAll("li")

            # Loop through the one-to-many commits
            for commit_html in commits_html:

                # Create a dict to stuff the goodies
                entry_dict = {}
                entry_dict["pub_date"] = dateutil.parser.parse(pub_date).strftime("%Y-%m-%d %H:%M:%S")

                # Add the matches to our dictionary
                entry_dict["branch"] = smart_unicode(match.group("branch"))
                entry_dict["repository"] = smart_unicode(match.group("repository"))

                # Add the others
                entry_dict["url"] = smart_unicode(commit_html.find("a")["href"])
                entry_dict["message"] = smart_unicode(commit_html.find("blockquote").string.strip())
                print entry_dict
                # Add the dict to the entry list
                commits.append(entry_dict)

        return commits

Ejemplo n.º 9

0

Mostrar archivo

Archivo: twitter.py Proyecto: joskid/palewi.re

 def sync(self):
     last_update_date = Shout.sync.get_last_update()
     logger.debug("Last update date: %s", last_update_date)
     xml = utils.getxml(RECENT_STATUSES_URL % self.username)
     for status in xml.getiterator("item"):
         message = status.find('title')
         message_text = smart_unicode(message.text)
         url  = smart_unicode(status.find('link').text)
         # pubDate delivered as UTC
         timestamp = utils.parsedate(str(status.find('pubDate').text))
         if not self._status_exists(url):
             self._handle_status(message_text, url, timestamp)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: osm.py Proyecto: arwelsh/palewi.re

 def get_latest_data(self):
     self.xml = utils.getxml(self.feed_url)
     commit_list = []
     for link in self.xml.getiterator("{http://www.w3.org/2005/Atom}entry"):
         entry_dict = dict(
             pub_date = utils.parsedate(link.find('{http://www.w3.org/2005/Atom}published').text),
             message = self.prep_message(link.find('{http://www.w3.org/2005/Atom}title').text),
             branch = '',
             repository = 'openstreetmap',
             url = smart_unicode(link.find('{http://www.w3.org/2005/Atom}id').text)
         )
         commit_list.append(entry_dict)
     return commit_list

Ejemplo n.º 11

0

Mostrar archivo

Archivo: lastfm.py Proyecto: arwelsh/palewi.re

 def sync(self):
     last_update_date = Track.sync.get_last_update()
     xml = utils.getxml(RECENT_TRACKS_URL % self.username)
     for track in xml.getiterator("track"):
         artist = track.find('artist')
         artist_name = smart_unicode(artist.text)
         artist_mbid = artist.get('mbid')
         track_name = smart_unicode(track.find('name').text)
         track_mbid = smart_unicode(track.find('mbid').text)
         url = smart_unicode(track.find('url').text)
         timestamp = datetime.datetime.fromtimestamp(int(track.find('date').get('uts')))
         if timestamp > last_update_date:
             tags = self._tags_for_track(artist_name, track_name)
             self._handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: newsblur.py Proyecto: arwelsh/palewi.re

 def get_latest_data(self):
     self.link_list = []
     self.xml = utils.getxml(self.url)
     for link in self.xml.getiterator("{http://www.w3.org/2005/Atom}entry"):
         title = smart_unicode(link.find('{http://www.w3.org/2005/Atom}title').text)
         url = smart_unicode(link.find('{http://www.w3.org/2005/Atom}link').get('href'))
         date = link.find('{http://www.w3.org/2005/Atom}updated').text
         date = utils.parsedate(date)
         d = dict(
             title=title,
             date=date,
             url=url,
         )
         self.link_list.append(d)
     return self.link_list

Ejemplo n.º 13

0

Mostrar archivo

Archivo: twitter.py Proyecto: Schwanksta/tumblewire

def update():
    last_update_date = Shout.sync.get_last_update()
    log.debug("Last update date: %s", last_update_date)
    
    xml = utils.getxml(RECENT_STATUSES_URL % settings.TWITTER_USER)
    for status in xml.getiterator("item"):
        message      = status.find('title')
        message_text = smart_unicode(message.text)
        url          = smart_unicode(status.find('link').text)

        # pubDate delivered as UTC
        timestamp = utils.parsedate(str(status.find('pubDate').text))

        if not _status_exists(message_text, url, timestamp):
            _handle_status(message_text, url, timestamp)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: lastfm.py Proyecto: Schwanksta/tumblewire

def update():
	last_update_date = Track.sync.get_last_update()
	log.debug("Last update date: %s", last_update_date)
	
	xml = utils.getxml(RECENT_TRACKS_URL % settings.LASTFM_USER)
	for track in xml.getiterator("track"):
		artist = track.find('artist')
		artist_name = smart_unicode(artist.text)
		artist_mbid = artist.get('mbid')
		track_name = smart_unicode(track.find('name').text)
		track_mbid = smart_unicode(track.find('mbid').text)
		url = smart_unicode(track.find('url').text)
		timestamp = datetime.datetime.fromtimestamp(int(track.find('date').get('uts')))
		if timestamp > last_update_date:
			log.debug("Handling track: %r - %r", artist_name, track_name)
			tags = _tags_for_track(artist_name, track_name)
			_handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: foursquare.py Proyecto: arwelsh/palewi.re

 def get_latest_data(self):
     self.location_list = []
     self.xml = utils.getxml(self.url)
     for checkin in self.xml.getiterator("item"):
         title = smart_unicode(checkin.find('title').text)
         description = smart_unicode(checkin.find('description').text)
         url = smart_unicode(checkin.find('link').text)
         date = checkin.find('pubDate').text
         date = utils.parsedate(date)
         d = dict(
             title=title,
             description=description,
             date=date,
             url=url,
         )
         self.location_list.append(d)
     return self.location_list

Ejemplo n.º 16

0

Mostrar archivo

Archivo: delicious.py Proyecto: palewire/palewi.re

 def get_latest_data(self):
     self.link_list = []
     self.xml = utils.getxml("http://delicious.com/v2/rss/palewire")
     for link in self.xml.getiterator("item"):
         title = smart_unicode(link.find('title').text)
         description = smart_unicode(link.find('description').text)
         url = smart_unicode(link.find('link').text)
         date = link.find('pubDate').text
         date = utils.parsedate(date)
         d = dict(
             title=title,
             description=description,
             date=date,
             url=url,
         )
         self.link_list.append(d)
     return self.link_list

Ejemplo n.º 17

0

Mostrar archivo

Archivo: readernaut.py Proyecto: Schwanksta/tumblewire

	def __call__(self):
		
		# Fetch the XML via web request
		url = 'http://readernaut.com/api/v1/xml/%s/books/' % self.username
		xml = utils.getxml(url)
		
		books = []
		
		for book in xml.getchildren():
			
			# Dictionary where we'll stuff all the goodies
			book_dict = {}
			
			# Get the date
			date = book.find('created').text
			book_dict['date'] = dateutil.parser.parse(date)
			
			# Step down the XML
			edition = book.find('book_edition')
			
			# Get the title
			title = edition.find('title').text
			book_dict['title'] = smart_unicode(title)
		
			# Get the ISBN
			isbn = edition.find('isbn').text
			book_dict['isbn'] = smart_unicode(isbn)
			
			# Get the authors as a text list
			authors = []
			for author in edition.getiterator('authors'):
				name = getattr(author.find('author'), 'text', None)
				if name:
					authors.append(smart_unicode(name))
			book_dict['authors'] = get_text_list(authors, 'and')
			
			# Get the link
			url = edition.find('permalink').text
			book_dict['url'] = smart_unicode(url)
			
			books.append(book_dict)
			
		return books

Ejemplo n.º 18

0

Mostrar archivo

Archivo: pandora.py Proyecto: Schwanksta/tumblewire

def update():
    last_update_date = Track.sync.get_last_update()
    log.debug("Last update date: %s", last_update_date)

    xml = utils.getxml(BOOKMARKED_TRACKS_URL % settings.PANDORA_USER)
    for track in xml.getiterator("item"):
        artist = track.find("{http://musicbrainz.org/mm/mm-2.1#}Artist").find("{http://purl.org/dc/elements/1.1/}title")
        artist_name = smart_unicode(artist.text)
        track_name = smart_unicode(
            track.find("{http://musicbrainz.org/mm/mm-2.1#}Track").find("{http://purl.org/dc/elements/1.1/}title").text
        )
        album_name = smart_unicode(
            track.find("{http://musicbrainz.org/mm/mm-2.1#}Album").find("{http://purl.org/dc/elements/1.1/}title").text
        )
        url = smart_unicode(track.find("link").text)
        pdate = track.find("pubDate").text.split("-")[0].strip()
        timestamp = datetime.datetime.strptime(pdate, "%a, %d %b %Y %H:%M:%S")
        if timestamp > last_update_date:
            log.debug("Handling track: %r - %r", artist_name, track_name)
            _handle_track(artist_name, track_name, album_name, url, timestamp)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: goodreads.py Proyecto: arwelsh/palewi.re

    def get_latest_data(self):
        # Fetch the XML via web request
        url = 'https://www.goodreads.com/review/list_rss/%s?key=%s&shelf=read' % (
            self.user_id,
            self.api_key
        )
        xml = utils.getxml(url)
        
        books = []
        
        for book in xml.getiterator("item"):

            # Dictionary where we'll stuff all the goodies
            book_dict = {}

            # Get the date
            date = book.find('pubDate').text
            book_dict['date'] = dateutil.parser.parse(date)

            # Get the title
            title = book.find('title').text
            book_dict['title'] = smart_unicode(title)

            # Get the ISBN
            isbn = book.find('isbn').text
            # If no ISBN, substitute the GoodReads id
            if not isbn:
                isbn = 'goodreads:%s' % book.find('book_id').text
            book_dict['isbn'] = smart_unicode(isbn)

            # Get the authors
            author = book.find('author_name').text
            book_dict['authors'] = smart_unicode(author)

            # Get the link
            url = book.find('guid').text
            book_dict['url'] = smart_unicode(url)

            books.append(book_dict)

        return books