def get_title_and_episode_info(self): url = wc_nate.LIST_URL.format(title_id = self.title_info) content = wc_util.get_text_from_url(url) content_soup = bs4.BeautifulSoup(content) title_name = self.get_title_name(content_soup) title_info = { 'title_id': self.title_info, 'title_name': title_name, } episode_infos = self.get_episode_infos(content_soup) return title_info, episode_infos
def populate_episode_info(self): content_text = wc_util.get_text_from_url(self.headers['episode_url']) content = bs4.BeautifulSoup(content_text) image_soups = content.find('div', class_ = 'wt_viewer').find_all('img') image_urls = [] for image in image_soups: image_urls.append(image['src']) info_writer = logger.InfoWriter(self.directory) info_writer.write_webtoon_title(self.headers['title_name']) info_writer.write_episode_title(self.headers['episode_name']) info_writer.write_episode_thumbnail_url(self.headers['thumbnail_url']) for image_url in image_urls: info_writer.write_episode_image_url(image_url) info_writer.write_complete() info_writer.close()
def populate_episode_info(self): content_text = wc_util.get_text_from_url(self.headers['episode_url']) content = bs4.BeautifulSoup(content_text) image_soups = content.find('div', class_='wt_viewer').find_all('img') image_urls = [] for image in image_soups: image_urls.append(image['src']) info_writer = logger.InfoWriter(self.directory) info_writer.write_webtoon_title(self.headers['title_name']) info_writer.write_episode_title(self.headers['episode_name']) info_writer.write_episode_thumbnail_url(self.headers['thumbnail_url']) for image_url in image_urls: info_writer.write_episode_image_url(image_url) info_writer.write_complete() info_writer.close()
def populate_episode_info(self): url = wc_nate.VIEWER_URL.format(**self.headers) content = wc_util.get_text_from_url(url) content_soup = bs4.BeautifulSoup(content) # get thumbnail url t_url = self.get_thumbnail_url(content_soup) # get image url image_url = self.get_image_url(content_soup) info_writer = logger.InfoWriter(self.directory) info_writer.write_webtoon_title(self.title_info['title_name']) info_writer.write_episode_title(self.episode_info['episode_name']) info_writer.write_episode_thumbnail_url(t_url) info_writer.write_episode_image_url(image_url) info_writer.write_complete() info_writer.close()
def get_title_and_episode_info(self): rss_url = LIST_URL.format(category = self.title_info[0], title_id = self.title_info[1]) content = wc_util.get_text_from_url(rss_url) content_soup = bs4.BeautifulSoup(content) channel = content_soup.find('rss').find('channel') title_name = channel.find('title').string.strip() items = channel.find_all('item') title_info = { 'category' : self.title_info[0], 'title_id' : self.title_info[1], 'title_name' : title_name, } episode_infos = [] for item in items: episode_infos.append(self.build_episode_info(item)) # In RSS, the episodes are sorted in reverse chronological order. return title_info, reversed(episode_infos)
def get_title_and_episode_info(self): rss_url = LIST_URL.format(category=self.title_info[0], title_id=self.title_info[1]) content = wc_util.get_text_from_url(rss_url) content_soup = bs4.BeautifulSoup(content) channel = content_soup.find('rss').find('channel') title_name = channel.find('title').string.strip() items = channel.find_all('item') title_info = { 'category': self.title_info[0], 'title_id': self.title_info[1], 'title_name': title_name, } episode_infos = [] for item in items: episode_infos.append(self.build_episode_info(item)) # In RSS, the episodes are sorted in reverse chronological order. return title_info, reversed(episode_infos)
def get_title_and_episode_info(self): episode_infos = [] title_info = None current_page = 1 while True: url = self.build_list_url(current_page) content_text = wc_util.get_text_from_url(url) content_soup = bs4.BeautifulSoup(content_text) if not title_info: title_name = self.get_title_name(content_soup) title_info = { 'category' : self.title_info[0], 'title_id' : self.title_info[1], 'title_name' : title_name, } episode_infos.extend(self.get_episode_infos(content_soup)) if self.is_last_page(content_soup): break else: current_page = current_page + 1 return title_info, reversed(episode_infos)
def get_title_and_episode_info(self): episode_infos = [] title_info = None current_page = 1 while True: url = self.build_list_url(current_page) content_text = wc_util.get_text_from_url(url) content_soup = bs4.BeautifulSoup(content_text) if not title_info: title_name = self.get_title_name(content_soup) title_info = { 'category': self.title_info[0], 'title_id': self.title_info[1], 'title_name': title_name, } episode_infos.extend(self.get_episode_infos(content_soup)) if self.is_last_page(content_soup): break else: current_page = current_page + 1 return title_info, reversed(episode_infos)