def search(self): countTrial = 0 accepted = False while not accepted and countTrial < self.__MAX_TRIALS: countTrial += 1 response = urllib2.urlopen(self.VIDEO_SEARCH_BASE_URL + self.__query) html = response.read() elements = re.findall(self.__REGEX_SEARCH_RESULT_ITEM, html) if len(elements) >= self.__MIN_ACCEPTED_RESULTS: accepted = True for e in elements: title = re.findall(self.__REGEX_TITLE_ATTR, e)[0].split('=')[1] title = title[1:len(title) - 1] title = title.strip().replace("\s+", ' ') if String.count_match_words(self.__IGNORE_WORDS, title) > 0: continue vidId = re.findall(self.__REGEX_ID_ATTR, e)[0].split('=')[1] vidId = vidId[1:len(vidId) - 1] vidId = vidId.strip().replace("\s+", ' ') self.__results.append({ self.__KEY_TITLE: title, self.__KEY_URL: self.VIDEO_VIEW_BASE_URL + vidId }) return len(self.__results)
def __get_next_video_url(self): videoUrl = "" keyWords = float(len(self.__keyword.split(' '))) for i in range(self.__currentItemIdx + 1, len(self.__results)): e = self.__results[i] title = e[self.__KEY_TITLE] points = String.count_match_words(self.__keyword, title) / keyWords if points > self.__MIN_WORDS_ACCEPTED: videoUrl = e[self.__KEY_URL] self.__currentItemIdx = i break return videoUrl
def __get_next_video_url(self): videoUrl = "" keyWords = float(len(self.__keyword.split(' '))) for i in range(self.__currentItemIdx+1, len(self.__results)): e = self.__results[i] title = e[self.__KEY_TITLE] points = String.count_match_words(self.__keyword, title)/keyWords if points > self.__MIN_WORDS_ACCEPTED: videoUrl = e[self.__KEY_URL] self.__currentItemIdx = i break return videoUrl
def __get_first_match_video_url(self): maxPoints = .0 videoUrl = "" keyWords = float(len(self.__keyword.split(' '))) for i in range(0, len(self.__results)): e = self.__results[i] title = e[self.__KEY_TITLE] points = String.count_match_words(self.__keyword, title) / keyWords if points > maxPoints: maxPoints = points videoUrl = e[self.__KEY_URL] self.__currentItemIdx = i elif maxPoints > 0.: break return videoUrl
def __get_first_match_video_url(self): maxPoints = .0 videoUrl = "" keyWords = float(len(self.__keyword.split(' '))) for i in range(0, len(self.__results)): e = self.__results[i] title = e[self.__KEY_TITLE] points = String.count_match_words(self.__keyword, title)/keyWords if points > maxPoints: maxPoints = points videoUrl = e[self.__KEY_URL] self.__currentItemIdx = i elif maxPoints > 0.: break return videoUrl
def search(self): countTrial = 0 accepted = False while not accepted and countTrial<self.__MAX_TRIALS: countTrial += 1 response = urllib2.urlopen(self.VIDEO_SEARCH_BASE_URL + self.__query) html = response.read() elements = re.findall(self.__REGEX_SEARCH_RESULT_ITEM, html) if len(elements) >= self.__MIN_ACCEPTED_RESULTS: accepted = True for e in elements: title = re.findall(self.__REGEX_TITLE_ATTR, e)[0].split('=')[1] title = title[1:len(title)-1] title = title.strip().replace("\s+", ' '); if String.count_match_words(self.__IGNORE_WORDS, title) > 0: continue vidId = re.findall(self.__REGEX_ID_ATTR, e)[0].split('=')[1] vidId = vidId[1:len(vidId)-1] vidId = vidId.strip().replace("\s+", ' '); self.__results.append({ self.__KEY_TITLE: title, self.__KEY_URL: self.VIDEO_VIEW_BASE_URL+vidId }) return len(self.__results)