def fetch_page_count(self, cat_id): count = 0 response = urllib2.urlopen(self.video_list_url % (cat_id, 1)) contents = response.read() page = BeautifulSoup(contents) link_table = page.find('table', {'id': 'bbsnum'}) count = re.search(r"<a href=\"\./\?page=(\d+)&[^>]*>Last >></a>", link_table.renderContents()).group(1) return count
def fetch_video(self, cat_id, vid_id): vid = None response = urllib2.urlopen(self.video_url % (cat_id, vid_id)) contents = response.read() page = BeautifulSoup(contents) re_match = re.search(r'\.swf\?link=(\d+)', page.renderContents()) if re_match: vid = {} file_id = re_match.group(1) vid['title'] = page.find('div', {'id': 'bbsDetail'}).h3.string vid['file_url'] = 'http://flvdn.gomtv.net/viewer/%s.flv' % file_id if not os.path.exists(os.path.join(self.base_data_path, cat_id)): os.makedirs(os.path.join(self.base_data_path, cat_id)) vid['local_vid_path'] = os.path.join(self.base_data_path, cat_id, str(vid_id) + '.flv') return vid