def get_data(self): headers = get_random_header() if hasattr(self, '_referer'): headers['referer'] = self._referer r = scraper.get(self.url, headers=get_random_header()) soup = BeautifulSoup(r.text, 'html.parser') self._scrape_metadata(soup) self._episode_urls = self._scarpe_episodes(soup) self._len = len(self._episode_urls) logging.debug('EPISODE IDS: length: {}, ids: {}'.format( self._len, self._episode_urls)) self._episode_urls = [(no + 1, id) for no, id in enumerate(self._episode_urls)] return self._episode_urls
def search(cls, query): headers = get_random_header() headers['referer'] = 'http://kissanime.ru/' res = scraper.post( 'http://kissanime.ru/Search/Anime', data={ 'type': 'Anime', 'keyword': query, }, headers=headers, ) soup = BeautifulSoup(res.text, 'html.parser') # If only one anime found, kissanime redirects to anime page. # We don't want that if soup.title.text.strip().lower() != "find anime": return [ SearchResult( title=soup.find('a', 'bigChar').text, url='https://kissanime.ru' + soup.find('a', 'bigChar').get('href'), poster='', ) ] searched = [s for i, s in enumerate(soup.find_all('td')) if not i % 2] ret = [] for res in searched: res = SearchResult( title=res.text.strip(), url='https://kissanime.ru' + res.find('a').get('href'), poster='', ) logging.debug(res) ret.append(res) return ret
def search(cls, query): headers = get_random_header() headers['referer'] = 'http://kisscartoon.ac/' res = scraper.get( 'http://kisscartoon.ac/Search/', params={ 's': query, }, headers=headers, ) logging.debug('Result url: {}'.format(res.url)) soup = BeautifulSoup(res.text, 'html.parser') ret = [] for res in soup.select_one('.listing').find_all('a'): res = SearchResult( title=res.text.strip('Watch '), url=res.get('href'), poster='', ) logging.debug(res) ret.append(res) return ret
import os import requests from anime_downloader import session from anime_downloader.const import get_random_header __all__ = [ 'get', 'post', 'soupify', ] logger = logging.getLogger(__name__) cf_session = cfscrape.create_scraper() default_headers = get_random_header() temp_dir = tempfile.mkdtemp(prefix='animedl') logger.debug(f"HTML file temp_dir: {temp_dir}") def setup(func): """ setup is a decorator which takes a function and converts it into a request method """ def setup_func(url: str, cf: bool = False, sel: bool = False, referer: str = None, cache: bool = True, headers=None,
def driver_select(): ''' This configures what each browser should do and returns the corresponding driver. ''' browser = get_browser_config() data_dir = get_data_dir() executable = get_browser_executable() binary = get_driver_binary() if browser == 'firefox': fireFox_Options = webdriver.FirefoxOptions() ops = [ "--width=1920", "--height=1080", "-headless", "--log fatal" ] for option in ops: fireFox_Options.add_argument(option) fireFox_Profile = webdriver.FirefoxProfile() fireFox_Profile.set_preference( "general.useragent.override", get_random_header()['user-agent'] ) driver = webdriver.Firefox( # sets user-agent firefox_profile=fireFox_Profile, # sets various firefox settings options=fireFox_Options, # by default it will be None, if a binary location is in the config then it will use that firefox_binary=None if not executable else executable, # by default it will be "geckodriver", if a geckodriver location is in the config then it will use that executable_path=(binary if binary else "geckodriver"), # an attempt at stopping selenium from printing a pile of garbage to the console. service_log_path=os.path.devnull ) elif browser == 'chrome': from selenium.webdriver.chrome.options import Options profile_path = os.path.join(data_dir, 'Selenium_chromium') chrome_options = Options() ops = [ "--headless", "--disable-gpu", '--log-level=OFF', f"--user-data-dir={profile_path}", "--no-sandbox", "--window-size=1920,1080", f"user-agent={get_random_header()['user-agent']}" # noqa ] for option in ops: chrome_options.add_argument(option) cap = None if executable: from selenium.webdriver.common.desired_capabilities import DesiredCapabilities cap = DesiredCapabilities.CHROME cap['binary_location'] = executable driver = webdriver.Chrome( # sets user-agent, and various chrome settings options=chrome_options, # by default it will be "chromedriver", if a chromedriver location is in the config then it will use that executable_path=(binary if binary else "chromedriver"), # by default it will be None, if a binary location is in the config then it will use that desired_capabilities=cap, # an attempt at stopping selenium from printing a pile of garbage to the console. service_log_path=os.path.devnull ) return driver