def scan(self): """ Scan the stations page and retrieve all stations to __stations @return bool: True if the account contains some stations False if something wrong, e.g. incorrect profileUsername, or no station found """ preURL = self.STATIONS_VIEW_BASE_URL preURL = preURL.replace("[username]", str(self.profileUsername)) url = self.STATIONS_REQUEST_BASE_URL url = url.replace("[username]", str(self.profileUsername)) try: html = self.get_request(preURL) html = self.get_request(url) except: # there must be something wrong with the url, i.e. incorrect profile username return False elements = DOM.get_elements("div", {"class":"infobox-body"}, html) for e in elements: stationNodes = DOM.get_elements("a", {"href":"/station/[0-9]+"}, e.nodeValue) if len(stationNodes)==0: continue stationNode = stationNodes[0] stationName = String.decode_html_entities(stationNode.nodeValue) stationId = stationNode.get_attr("href").split('/')[2].strip() self.__stations.append({"name":stationName, "id":stationId}) return True
def next_list(self): """ Each time this func is invoked, a next pagination of tracks is loaded Then self.get_cur_tracks should be called to retrieve the current list of tracks @return bool: True if this new pagination still contains tracks, False if no more tracks, i.e. last pagination None if something wrong with the station, e.g. incorrect stationId """ self.__curStartIdx += self.__prevItems self.__prevItems = 0 self.__curThumbUpTracks = [] url = self.STATION_TRACKS_BASE_URL url = url.replace("[stationId]", self.__stationId) url = url.replace("[startIdx]", str(self.__curStartIdx)) try: response = urllib2.urlopen(url) except: # there must be something wrong with the url, i.e. incorrect url return None html = response.read() elements = DOM.get_elements("li", {"data-date": "[0-9]+", "data-artist": "[^>]+"}, html) for e in elements: trackNodes = DOM.get_elements("h3", {}, e.nodeValue) if len(trackNodes)==0: continue trackNode = trackNodes[0] songNodes = DOM.get_elements("a", {}, trackNode.nodeValue) if len(songNodes)<2: continue song = String.decode_html_entities(songNodes[0].nodeValue) song = String.symbols_to_words(song) song = self.__remove_redundant_words(song) artist = String.decode_html_entities(songNodes[1].nodeValue) artist = String.symbols_to_words(artist) record = song+' '+artist if not record in self.__thumbUpTracks: self.__thumbUpTracks.append(record) self.__curThumbUpTracks.append(record) self.__prevItems += 1 if self.__prevItems == 0: return False return True
def search(self): """ Execute the search @return int: number of returned records with respect to the keyword """ searchUrl = self.__create_search_url() response = urllib2.urlopen(searchUrl) html = response.read() elements = DOM.get_elements("div", {"id": "song_[0-9]+", "class": "song_item"}, html) for e in elements: titleNodes = DOM.get_elements("span", {"id": "song_title"}, e.nodeValue) if len(titleNodes)==0: continue title = titleNodes[0].nodeValue playLinkNodes = DOM.get_elements("div", {"class": "play_link"}, e.nodeValue) if len(playLinkNodes)==0: continue playLinkNode = playLinkNodes[0] urlNodes = DOM.get_elements("a", {"href": "[^>]+\.mp3"}, playLinkNode.nodeValue) if len(urlNodes)==0: continue url = urlNodes[0].get_attr("href") self.__results.append({"title": title, "url": url}) return len(self.__results)