Python DOM Exemples, pkg.DOM.DOM Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : PandoraStations.py Projet : masnth/pandora_extractor

 def scan(self):
     """ Scan the stations page and retrieve all stations to __stations
     @return bool: True if the account contains some stations
         False if something wrong, e.g. incorrect profileUsername, or no station found
     """
     preURL = self.STATIONS_VIEW_BASE_URL
     preURL = preURL.replace("[username]", str(self.profileUsername))
     url = self.STATIONS_REQUEST_BASE_URL
     url = url.replace("[username]", str(self.profileUsername))
     
     try:
         html = self.get_request(preURL)
         html = self.get_request(url)
     except:
         # there must be something wrong with the url, i.e. incorrect profile username
         return False
     
     elements = DOM.get_elements("div", {"class":"infobox-body"}, html)
     for e in elements:
         stationNodes = DOM.get_elements("a", {"href":"/station/[0-9]+"}, e.nodeValue)
         if len(stationNodes)==0:
             continue
         stationNode = stationNodes[0]
         stationName = String.decode_html_entities(stationNode.nodeValue)
         stationId = stationNode.get_attr("href").split('/')[2].strip() 
         self.__stations.append({"name":stationName, "id":stationId})
     return True

Exemple #2

0

Afficher le fichier

Fichier : PandoraStationExtractor.py Projet : masnth/pandora_extractor

 def next_list(self):
     """ Each time this func is invoked, a next pagination of tracks is loaded
     Then self.get_cur_tracks should be called to retrieve the current list of tracks
     @return bool: True if this new pagination still contains tracks, 
                   False if no more tracks, i.e. last pagination
                   None if something wrong with the station, e.g. incorrect stationId
     """
     self.__curStartIdx += self.__prevItems
     self.__prevItems = 0
     self.__curThumbUpTracks = []
     
     url = self.STATION_TRACKS_BASE_URL
     url = url.replace("[stationId]", self.__stationId)
     url = url.replace("[startIdx]", str(self.__curStartIdx))
     
     try:
         response = urllib2.urlopen(url)
     except:
         # there must be something wrong with the url, i.e. incorrect url
         return None
     html = response.read()
     elements = DOM.get_elements("li", {"data-date": "[0-9]+", "data-artist": "[^>]+"}, html)
     for e in elements:
         trackNodes = DOM.get_elements("h3", {}, e.nodeValue)
         if len(trackNodes)==0:
             continue
         trackNode = trackNodes[0]
         songNodes = DOM.get_elements("a", {}, trackNode.nodeValue)
         if len(songNodes)<2:
             continue
         song = String.decode_html_entities(songNodes[0].nodeValue)
         song = String.symbols_to_words(song)
         song = self.__remove_redundant_words(song)
         artist = String.decode_html_entities(songNodes[1].nodeValue)
         artist = String.symbols_to_words(artist)
         record = song+' '+artist
         if not record in self.__thumbUpTracks:
             self.__thumbUpTracks.append(record)
             self.__curThumbUpTracks.append(record)
         self.__prevItems += 1
     
     if self.__prevItems == 0:
         return False
     return True

Exemple #3

0

Afficher le fichier

 def search(self):
     """ Execute the search
     @return int: number of returned records with respect to the keyword
     """
     searchUrl = self.__create_search_url()
     response = urllib2.urlopen(searchUrl)
     html = response.read()
     elements = DOM.get_elements("div", {"id": "song_[0-9]+", "class": "song_item"}, html)
     for e in elements:
         titleNodes = DOM.get_elements("span", {"id": "song_title"}, e.nodeValue)
         if len(titleNodes)==0:
             continue
         title = titleNodes[0].nodeValue
         playLinkNodes = DOM.get_elements("div", {"class": "play_link"}, e.nodeValue)
         if len(playLinkNodes)==0:
             continue
         playLinkNode = playLinkNodes[0]
         urlNodes = DOM.get_elements("a", {"href": "[^>]+\.mp3"}, playLinkNode.nodeValue)
         if len(urlNodes)==0:
             continue
         url = urlNodes[0].get_attr("href")
         self.__results.append({"title": title, "url": url})
     return len(self.__results)