コード例 #1
0
 def search(self):
     countTrial = 0
     accepted = False
     while not accepted and countTrial < self.__MAX_TRIALS:
         countTrial += 1
         response = urllib2.urlopen(self.VIDEO_SEARCH_BASE_URL +
                                    self.__query)
         html = response.read()
         elements = re.findall(self.__REGEX_SEARCH_RESULT_ITEM, html)
         if len(elements) >= self.__MIN_ACCEPTED_RESULTS:
             accepted = True
     for e in elements:
         title = re.findall(self.__REGEX_TITLE_ATTR, e)[0].split('=')[1]
         title = title[1:len(title) - 1]
         title = title.strip().replace("\s+", ' ')
         if String.count_match_words(self.__IGNORE_WORDS, title) > 0:
             continue
         vidId = re.findall(self.__REGEX_ID_ATTR, e)[0].split('=')[1]
         vidId = vidId[1:len(vidId) - 1]
         vidId = vidId.strip().replace("\s+", ' ')
         self.__results.append({
             self.__KEY_TITLE:
             title,
             self.__KEY_URL:
             self.VIDEO_VIEW_BASE_URL + vidId
         })
     return len(self.__results)
コード例 #2
0
 def scan(self):
     """ Scan the stations page and retrieve all stations to __stations
     @return bool: True if the account contains some stations
         False if something wrong, e.g. incorrect profileUsername, or no station found
     """
     preURL = self.STATIONS_VIEW_BASE_URL
     preURL = preURL.replace("[username]", str(self.profileUsername))
     url = self.STATIONS_REQUEST_BASE_URL
     url = url.replace("[username]", str(self.profileUsername))
     
     try:
         html = self.get_request(preURL)
         html = self.get_request(url)
     except:
         # there must be something wrong with the url, i.e. incorrect profile username
         return False
     
     elements = DOM.get_elements("div", {"class":"infobox-body"}, html)
     for e in elements:
         stationNodes = DOM.get_elements("a", {"href":"/station/[0-9]+"}, e.nodeValue)
         if len(stationNodes)==0:
             continue
         stationNode = stationNodes[0]
         stationName = String.decode_html_entities(stationNode.nodeValue)
         stationId = stationNode.get_attr("href").split('/')[2].strip() 
         self.__stations.append({"name":stationName, "id":stationId})
     return True
コード例 #3
0
 def next_list(self):
     """ Each time this func is invoked, a next pagination of tracks is loaded
     Then self.get_cur_tracks should be called to retrieve the current list of tracks
     @return bool: True if this new pagination still contains tracks, 
                   False if no more tracks, i.e. last pagination
                   None if something wrong with the station, e.g. incorrect stationId
     """
     self.__curStartIdx += self.__prevItems
     self.__prevItems = 0
     self.__curThumbUpTracks = []
     
     url = self.STATION_TRACKS_BASE_URL
     url = url.replace("[stationId]", self.__stationId)
     url = url.replace("[startIdx]", str(self.__curStartIdx))
     
     try:
         response = urllib2.urlopen(url)
     except:
         # there must be something wrong with the url, i.e. incorrect url
         return None
     html = response.read()
     elements = DOM.get_elements("li", {"data-date": "[0-9]+", "data-artist": "[^>]+"}, html)
     for e in elements:
         trackNodes = DOM.get_elements("h3", {}, e.nodeValue)
         if len(trackNodes)==0:
             continue
         trackNode = trackNodes[0]
         songNodes = DOM.get_elements("a", {}, trackNode.nodeValue)
         if len(songNodes)<2:
             continue
         song = String.decode_html_entities(songNodes[0].nodeValue)
         song = String.symbols_to_words(song)
         song = self.__remove_redundant_words(song)
         artist = String.decode_html_entities(songNodes[1].nodeValue)
         artist = String.symbols_to_words(artist)
         record = song+' '+artist
         if not record in self.__thumbUpTracks:
             self.__thumbUpTracks.append(record)
             self.__curThumbUpTracks.append(record)
         self.__prevItems += 1
     
     if self.__prevItems == 0:
         return False
     return True
コード例 #4
0
 def __get_video_streams_info(self):
     # prepare to get the streams info of the video
     infoUrl = self.GET_VIDEO_BASE_URL + self.__videoId
     resp = urllib2.urlopen(infoUrl)
     content = resp.read()
     # also, prepare to get the video title
     streams = ""
     for item in content.split('&'):
         if item.find(self.GET_VIDEO_TITLE_PARAM) == 0:
             if self.__videoTitle == '':
                 self.__videoTitle = String.url_decode(item.split('=')[1])
                 self.__videoTitle = self.__videoTitle.replace('+', ' ')
                 self.__videoTitle = self.__videoTitle.replace('/', ' ')
         elif item.find(self.GET_VIDEO_STREAM_PARAM) == 0:
             streams = item.strip()
             if self.__videoTitle != '':
                 break;
     streams = map(lambda stream: String.url_decode("type"+stream), streams.split("type"))[1:]
     return streams
コード例 #5
0
 def __get_video_streams_info(self):
     # prepare to get the streams info of the video
     infoUrl = self.GET_VIDEO_BASE_URL + self.__videoId
     resp = urllib2.urlopen(infoUrl)
     content = resp.read()
     # also, prepare to get the video title
     streams = ""
     for item in content.split('&'):
         if item.find(self.GET_VIDEO_TITLE_PARAM) == 0:
             if self.__videoTitle == '':
                 self.__videoTitle = String.url_decode(item.split('=')[1])
                 self.__videoTitle = self.__videoTitle.replace('+', ' ')
                 self.__videoTitle = self.__videoTitle.replace('/', ' ')
         elif item.find(self.GET_VIDEO_STREAM_PARAM) == 0:
             streams = item.strip()
             if self.__videoTitle != '':
                 break
     streams = map(lambda stream: String.url_decode("type" + stream),
                   streams.split("type"))[1:]
     return streams
コード例 #6
0
 def __get_next_video_url(self):
     videoUrl = ""
     keyWords = float(len(self.__keyword.split(' ')))
     for i in range(self.__currentItemIdx + 1, len(self.__results)):
         e = self.__results[i]
         title = e[self.__KEY_TITLE]
         points = String.count_match_words(self.__keyword, title) / keyWords
         if points > self.__MIN_WORDS_ACCEPTED:
             videoUrl = e[self.__KEY_URL]
             self.__currentItemIdx = i
             break
     return videoUrl
コード例 #7
0
 def __get_next_video_url(self):
     videoUrl = ""
     keyWords = float(len(self.__keyword.split(' ')))
     for i in range(self.__currentItemIdx+1, len(self.__results)):
         e = self.__results[i]
         title = e[self.__KEY_TITLE]
         points = String.count_match_words(self.__keyword, title)/keyWords
         if points > self.__MIN_WORDS_ACCEPTED:
             videoUrl = e[self.__KEY_URL]
             self.__currentItemIdx = i
             break
     return videoUrl
コード例 #8
0
 def __get_first_match_video_url(self):
     maxPoints = .0
     videoUrl = ""
     keyWords = float(len(self.__keyword.split(' ')))
     for i in range(0, len(self.__results)):
         e = self.__results[i]
         title = e[self.__KEY_TITLE]
         points = String.count_match_words(self.__keyword, title) / keyWords
         if points > maxPoints:
             maxPoints = points
             videoUrl = e[self.__KEY_URL]
             self.__currentItemIdx = i
         elif maxPoints > 0.:
             break
     return videoUrl
コード例 #9
0
 def __get_first_match_video_url(self):
     maxPoints = .0
     videoUrl = ""
     keyWords = float(len(self.__keyword.split(' ')))
     for i in range(0, len(self.__results)):
         e = self.__results[i]
         title = e[self.__KEY_TITLE]
         points = String.count_match_words(self.__keyword, title)/keyWords
         if points > maxPoints:
             maxPoints = points
             videoUrl = e[self.__KEY_URL]
             self.__currentItemIdx = i
         elif maxPoints > 0.:
             break
     return videoUrl
コード例 #10
0
 def search(self):
     countTrial = 0
     accepted = False
     while not accepted and countTrial<self.__MAX_TRIALS:
         countTrial += 1
         response = urllib2.urlopen(self.VIDEO_SEARCH_BASE_URL + self.__query)
         html = response.read()
         elements = re.findall(self.__REGEX_SEARCH_RESULT_ITEM, html)
         if len(elements) >= self.__MIN_ACCEPTED_RESULTS:
             accepted = True
     for e in elements:
         title = re.findall(self.__REGEX_TITLE_ATTR, e)[0].split('=')[1]
         title = title[1:len(title)-1]
         title = title.strip().replace("\s+", ' ');
         if String.count_match_words(self.__IGNORE_WORDS, title) > 0:
             continue
         vidId = re.findall(self.__REGEX_ID_ATTR, e)[0].split('=')[1]
         vidId = vidId[1:len(vidId)-1]
         vidId = vidId.strip().replace("\s+", ' ');
         self.__results.append({
             self.__KEY_TITLE: title, self.__KEY_URL: self.VIDEO_VIEW_BASE_URL+vidId
         })
     return len(self.__results)