def _get_data(self):
        # Extract the important bits from the embed page, with thanks to the
        # code I saw from github user py7hon in his/her mp4upload-direct
        # program as inspiration for this. Only with regex.
        source_parts_re = re.compile(
            r'.*?(www\d+).*?\|video\|(.*?)\|(\d+)\|.*?', re.DOTALL)

        mp4u_embed = session.get(self.url).text
        domain, video_id, protocol = source_parts_re.match(mp4u_embed).groups()

        logging.debug('Domain: %s, Video ID: %s, Protocol: %s' %
                      (domain, video_id, protocol))

        url = self.url.replace('embed-', '')
        # Return to non-embed page to collect title
        mp4u_page = BeautifulSoup(session.get(url).text, 'html.parser')

        title = mp4u_page.find('span', {'class': 'dfilename'}).text
        title = title[:title.rfind('_')][:title.rfind('.')].replace(' ', '_')

        logging.debug('Title is %s' % title)

        # Create the stream url
        stream_url = 'https://{}.mp4upload.com:{}/d/{}/{}.mp4'
        stream_url = stream_url.format(domain, protocol, video_id, title)

        logging.debug('Stream URL: %s' % stream_url)

        return {
            'stream_url': stream_url,
            'meta': {
                'title': title,
                'thumbnail': ''
            }
        }
    def _ranged_download(self):
        http_chunksize = self.options['range_size']

        range_start = 0
        range_end = http_chunksize

        # Make a new file, maybe not the best way
        with open(self.path, 'w'):
            pass

        r = session.get(self.url,
                        headers={'referer': self.referer},
                        stream=True)
        while self.downloaded < self.total_size:
            r = session.get(self.url,
                            headers=set_range(range_start, range_end,
                                              self.referer),
                            stream=True)
            if r.status_code == 206:
                with open(self.path, 'ab') as f:
                    for chunk in r.iter_content(chunk_size=self.chunksize):
                        if chunk:
                            f.write(chunk)
                            self.report_chunk_downloaded()

            if range_end == '':
                break
            range_start = os.stat(self.path).st_size
            range_end += http_chunksize
            if range_end > self.total_size:
                range_end = ''
Example #3
0
    def search(cls, query):
        r = session.get('https://www4.9anime.is/search?',
                        params={'keyword': query},
                        headers=desktop_headers)

        logging.debug(r.url)

        soup = BeautifulSoup(r.text, 'html.parser')

        search_results = soup.find('div', {
            'class': 'film-list'
        }).find_all('div', {'class': 'item'})

        ret = []

        logging.debug('Search results')

        for item in search_results:
            s = SearchResult(title=item.find('a', {
                'class': 'name'
            }).contents[0],
                             url=item.find('a')['href'],
                             poster=item.find('img')['src'])
            meta = dict()
            m = item.find('div', {'class': 'status'})
            for item in m.find_all('div'):
                meta[item.attrs['class'][0]] = item.text.strip()
            s.meta = meta
            logging.debug(s)
            ret.append(s)

        return ret
Example #4
0
    def _scarpe_episodes(self, soup):
        ts = soup.find('html')['data-ts']
        self._episodeClass.ts = ts
        logging.debug('data-ts: {}'.format(ts))

        # TODO: !HACK!
        # The below code should be refractored whenever I'm not lazy.
        # This was done as a fix to 9anime's switch to lazy loading of
        # episodes. I'm busy and lazy now, so I'm writing bad code.
        # Gomen'nasai
        api_url = "https://www8.9anime.is/ajax/film/servers/{}"
        api_url = api_url.format(
            self.url.rsplit('watch/', 1)[1].rsplit('.', 1)[1].split('/')[0])
        params = {}
        params['_'] = int(generate_(params))
        params['_'] = 648
        soup = BeautifulSoup(
            session.get(api_url, params=params).json()['html'], 'html.parser')
        episodes = soup.find('div', {'class': 'server', 'data-name': 33})
        episodes = episodes.find_all('li')

        if episodes == []:
            err = 'No episodes found in url "{}"'.format(self.url)
            args = [self.url]
            raise NotFoundError(err, *args)

        episode_ids = []

        for x in episodes:
            for a in x.find_all('a'):
                ep_id = a.get('data-id')
                episode_ids.append(ep_id)

        return episode_ids
Example #5
0
    def _get_sources(self):
        params = {
            'v': '1.1',
            'episode_id': self.url.split('id=')[-1],
        }
        headers = desktop_headers
        headers['referer'] = self.url
        res = session.get(self._episode_list_url,
                          params=params,
                          headers=headers)
        url = res.json()['value']

        headers = desktop_headers
        headers['referer'] = self.url
        res = session.get('https:' + url, headers=headers)

        return [('no_extractor', res.json()['playlist'][0]['file'])]
Example #6
0
    def _get_sources(self):
        soup = BeautifulSoup(session.get(self.url).text, 'html.parser')
        url = 'https:'+soup.select_one('li.anime a').get('data-video')

        res = requests.get(url)
        ep_re = re.compile(r"file:.*?'(.*?)'")

        stream_urls = ep_re.findall(res.text)
        return [('no_extractor', url) for url in stream_urls]
    def _non_range_download(self):
        r = session.get(self.url, stream=True)

        if r.status_code == 200:
            with open(self.path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=self.chunksize):
                    if chunk:
                        f.write(chunk)
                        self.report_chunk_downloaded()
Example #8
0
 def search(self, query):
     r = session.get('https://twist.moe')
     soup = BeautifulSoup(r.text, 'html.parser')
     all_anime = soup.select_one('nav.series').select('li')
     animes = []
     for anime in all_anime:
         animes.append(SearchResult(
             title=anime.find('span').contents[0].strip(),
             url='https://twist.moe' + anime.find('a')['href'],
             poster='',
         ))
     animes = [ani[0] for ani in process.extract(query, animes)]
     return animes
    def _ranged_download(self):
        http_chunksize = self.range_size

        range_start = 0
        range_end = http_chunksize

        url = self.source.stream_url
        headers = self.source.headers
        if 'user-agent' not in headers:
            headers[
                'user-agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0"

        if self.source.referer:
            headers['Referer'] = self.source.referer

        # Make a new file, maybe not the best way
        with open(self.path, 'w'):
            pass

        r = session.get(url, headers=headers, stream=True)
        while self.downloaded < self._total_size:
            r = session.get(url,
                            headers=set_range(range_start, range_end, headers),
                            stream=True)
            if r.status_code == 206:
                with open(self.path, 'ab') as f:
                    for chunk in r.iter_content(chunk_size=self.chunksize):
                        if chunk:
                            f.write(chunk)
                            self.report_chunk_downloaded()

            if range_end == '':
                break
            range_start = os.stat(self.path).st_size
            range_end += http_chunksize
            if range_end > self._total_size:
                range_end = ''
Example #10
0
    def _get_data(self):
        url = self.url + '&q=' + self.quality
        logging.debug('Calling Rapid url: {}'.format(url))
        headers = self.headers
        headers['referer'] = url
        try:
            r = session.get(url, headers=headers)
            # This is a fix for new rapidvideo logic
            # It will return OK for a get request
            # even if there is a click button
            # This will make sure a source link is present
            soup = BeautifulSoup(r.text, 'html.parser')
            get_source(soup, self.quality)
        except:
            r = session.post(url, {
                'confirm.x': 12,
                'confirm.y': 12,
                'block': 1,
            },
                             headers=headers)
        soup = BeautifulSoup(r.text, 'html.parser')

        # TODO: Make these a different function. Can be reused in other classes
        #       too
        title_re = re.compile(r'"og:title" content="(.*)"')
        image_re = re.compile(r'"og:image" content="(.*)"')

        try:
            stream_url = get_source(soup, self.quality)
        except IndexError:
            stream_url = None

        try:
            title = str(title_re.findall(r.text)[0])
            thumbnail = str(image_re.findall(r.text)[0])
        except Exception as e:
            title = ''
            thumbnail = ''
            logging.debug(e)
            pass

        return {
            'stream_url': stream_url,
            'meta': {
                'title': title,
                'thumbnail': thumbnail,
            },
        }
    def _non_range_download(self):
        url = self.source.stream_url
        headers = {
            'user-agent':
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko/20100101Firefox/56.0"
        }
        if self.source.referer:
            headers['Referer'] = self.source.referer
        r = session.get(url, headers=headers, stream=True)

        if r.status_code == 200:
            with open(self.path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=self.chunksize):
                    if chunk:
                        f.write(chunk)
                        self.report_chunk_downloaded()
Example #12
0
    def _get_sources(self):
        soup = BeautifulSoup(session.get(self.url).text, 'html.parser')
        extractors_url = []

        for element in soup.select('.anime_muti_link > ul > li'):
            extractor_class = element.get('class')[0]
            source_url = element.a.get('data-video')

            # only use mp4upload and rapidvideo as sources
            if extractor_class == 'mp4':
                extractor_class = 'mp4upload'
            elif extractor_class != 'rapidvideo':
                continue
            logging.debug('%s: %s' % (extractor_class, source_url))
            extractors_url.append((extractor_class, source_url,))
        return extractors_url
Example #13
0
    def _get_data(self):
        url = self.url + '&q=' + self.quality
        logging.debug('Calling Rapid url: {}'.format(url))
        headers = self.headers
        headers['referer'] = url
        try:
            r = session.get(url, headers=headers)
        except:
            r = session.post(url, {
                'cursor.x': 12,
                'cursor.y': 12,
                'block': 1,
            },
                             headers=headers)
        soup = BeautifulSoup(r.text, 'html.parser')

        # TODO: Make these a different function. Can be reused in other classes
        #       too
        src_re = re.compile(r'src: "(.*)"')
        title_re = re.compile(r'"og:title" content="(.*)"')
        image_re = re.compile(r'"og:image" content="(.*)"')

        try:
            stream_url = soup.find_all('source')[0].get('src')
        except IndexError:
            try:
                stream_url = str(src_re.findall(r.text)[0])
            except IndexError:
                stream_url = None

        try:
            title = str(title_re.findall(r.text)[0])
            thumbnail = str(image_re.findall(r.text)[0])
        except Exception as e:
            title = ''
            thumbnail = ''
            logging.debug(e)
            pass

        return {
            'stream_url': stream_url,
            'meta': {
                'title': title,
                'thumbnail': thumbnail,
            },
        }
Example #14
0
    def _scarpe_episodes(self, soup):
        anime_id = soup.select_one('input#movie_id').attrs['value']
        params = {
            'default_ep': 0,
            'ep_start': 0,
            'ep_end': 999999,  # Using a very big number works :)
            'id': anime_id,
        }

        res = session.get(self._episode_list_url, params=params)
        soup = BeautifulSoup(res.text, 'html.parser')

        epurls = list(
            reversed(['https://www2.gogoanime.se'+a.get('href').strip()
                      for a in soup.select('li a')])
        )

        return epurls
Example #15
0
    def get_data(self):
        anime_name = self.url.split('/a/')[-1].split('/')[0]
        url = self._api_url.format(anime_name)
        episodes = session.get(
            url,
            headers={
                'x-access-token': '1rj2vRtegS8Y60B3w3qNZm5T2Q0TN2NR'
            }
        )
        episodes = episodes.json()
        self.title = anime_name
        episode_urls = ['https://eu1.twist.moe' +
                        decrypt(episode['source'].encode('utf-8'), KEY).decode('utf-8')
                        for episode in episodes]

        self._episode_urls = [(i+1, episode_url) for i, episode_url in enumerate(episode_urls)]
        self._len = len(self._episode_urls)

        return self._episode_urls
Example #16
0
    def _get_data(self):

        #Need a javascript deobsufication api/python, so someone smarter
        #than me can work on that for now I will add the pattern I observed

        #alternatively you can pattern match on `src` for stream_url part
        source_parts_re = re.compile(
            r'action=\"([^"]+)\".*value=\"([^"]+)\".*Click Here to Download',
            re.DOTALL)

        #Kwik servers don't have direct link access you need to be referred
        #from somewhere, I will just use the url itself.

        download_url = self.url.replace('kwik.cx/e/', 'kwik.cx/f/')

        kwik_text = session.get(download_url,
                                headers={
                                    'referer': download_url
                                }).text
        post_url, token = source_parts_re.search(kwik_text).group(1, 2)

        stream_url = session.post(post_url,
                                  headers={
                                      'referer': download_url
                                  },
                                  data={
                                      '_token': token
                                  },
                                  allow_redirects=False).headers['Location']

        title = stream_url.rsplit('/', 1)[-1].rsplit('.', 1)[0]

        logging.debug('Stream URL: %s' % stream_url)
        return {
            'stream_url': stream_url,
            'meta': {
                'title': title,
                'thumbnail': ''
            },
            'referer': None
        }
Example #17
0
def bypass_hcaptcha(url):
    """
    :param url: url to page which gives hcaptcha
    :return: Returns Response object (cookies stored for future use)
    """
    host = urlparse(url).netloc
    bypassed = False
    session = requests.session()

    headers = {
        'User-Agent':
        choice((
            'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/605.1.15 (KHTML, like Gecko)',
            'Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G36',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
        ))
    }

    logger.info("Bypassing captcha...")

    #Retry until success
    while not bypassed:
        site_key = str(uuid4())
        response = session.post('https://hcaptcha.com/getcaptcha',
                                headers=headers,
                                data={
                                    'sitekey': site_key,
                                    'host': host
                                }).json()

        try:
            key = response['key']
            tasks = [row['task_key'] for row in response['tasklist']]
            job = response['request_type']
            timestamp = round(time()) + choice(range(30, 120))
            answers = dict(
                zip(tasks,
                    [choice(['true', 'false'])
                     for index in range(len(tasks))]))

            mouse_movements = []
            last_movement = timestamp

            for index in range(choice(range(1000, 10000))):
                last_movement += choice(range(10))
                mouse_movements.append(
                    [choice(range(500)),
                     choice(range(500)), last_movement])

            json = {
                'job_mode': job,
                'answers': answers,
                'serverdomain': host,
                'sitekey': site_key,
                'motionData': {
                    'st': timestamp,
                    'dct': timestamp,
                    'mm': mouse_movements
                }
            }

            response = session.post(f'https://hcaptcha.com/checkcaptcha/{key}',
                                    json=json)

            response = response.json()
            bypassed = response['pass']
        except (TypeError, KeyError):
            pass

        if bypassed:
            token = response['generated_pass_UUID']

            resp = helpers.soupify(session.get(url))
            bypass_url = f'https://{host}{resp.form.get("action")}'

            data = dict((x.get('name'), x.get('value'))
                        for x in resp.select('form > input'))
            data.update({
                'id': resp.strong.text,
                'g-recaptcha-response': token,
                'h-captcha-response': token
            })

            resp = session.post(bypass_url, data=data)

            if resp.status_code == 200:
                pickle.dump(resp.cookies,
                            open(f'{tempfile.gettempdir()}/{host}', 'wb'))
                logger.info("Succesfully bypassed captcha!")

                return resp
            else:
                bypassed = False