def _get_data(self): url = self.url + '&q=' + self.quality logging.debug('Calling Rapid url: {}'.format(url)) headers = self.headers headers['referer'] = url try: r = session.get(url, headers=headers) # This is a fix for new rapidvideo logic # It will return OK for a get request # even if there is a click button # This will make sure a source link is present soup = BeautifulSoup(r.text, 'html.parser') get_source(soup, self.quality) except: r = session.post(url, { 'confirm.x': 12, 'confirm.y': 12, 'block': 1, }, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') # TODO: Make these a different function. Can be reused in other classes # too title_re = re.compile(r'"og:title" content="(.*)"') image_re = re.compile(r'"og:image" content="(.*)"') try: stream_url = get_source(soup, self.quality) except IndexError: stream_url = None try: title = str(title_re.findall(r.text)[0]) thumbnail = str(image_re.findall(r.text)[0]) except Exception as e: title = '' thumbnail = '' logging.debug(e) pass return { 'stream_url': stream_url, 'meta': { 'title': title, 'thumbnail': thumbnail, }, }
def _get_data(self): url = self.url + '&q=' + self.quality logging.debug('Calling Rapid url: {}'.format(url)) headers = self.headers headers['referer'] = url try: r = session.get(url, headers=headers) except: r = session.post(url, { 'cursor.x': 12, 'cursor.y': 12, 'block': 1, }, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') # TODO: Make these a different function. Can be reused in other classes # too src_re = re.compile(r'src: "(.*)"') title_re = re.compile(r'"og:title" content="(.*)"') image_re = re.compile(r'"og:image" content="(.*)"') try: stream_url = soup.find_all('source')[0].get('src') except IndexError: try: stream_url = str(src_re.findall(r.text)[0]) except IndexError: stream_url = None try: title = str(title_re.findall(r.text)[0]) thumbnail = str(image_re.findall(r.text)[0]) except Exception as e: title = '' thumbnail = '' logging.debug(e) pass return { 'stream_url': stream_url, 'meta': { 'title': title, 'thumbnail': thumbnail, }, }
def _get_data(self): #Need a javascript deobsufication api/python, so someone smarter #than me can work on that for now I will add the pattern I observed #alternatively you can pattern match on `src` for stream_url part source_parts_re = re.compile( r'action=\"([^"]+)\".*value=\"([^"]+)\".*Click Here to Download', re.DOTALL) #Kwik servers don't have direct link access you need to be referred #from somewhere, I will just use the url itself. download_url = self.url.replace('kwik.cx/e/', 'kwik.cx/f/') kwik_text = session.get(download_url, headers={ 'referer': download_url }).text post_url, token = source_parts_re.search(kwik_text).group(1, 2) stream_url = session.post(post_url, headers={ 'referer': download_url }, data={ '_token': token }, allow_redirects=False).headers['Location'] title = stream_url.rsplit('/', 1)[-1].rsplit('.', 1)[0] logging.debug('Stream URL: %s' % stream_url) return { 'stream_url': stream_url, 'meta': { 'title': title, 'thumbnail': '' }, 'referer': None }
def bypass_hcaptcha(url): """ :param url: url to page which gives hcaptcha :return: Returns Response object (cookies stored for future use) """ host = urlparse(url).netloc bypassed = False session = requests.session() headers = { 'User-Agent': choice(( 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/605.1.15 (KHTML, like Gecko)', 'Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36' )) } logger.info("Bypassing captcha...") #Retry until success while not bypassed: site_key = str(uuid4()) response = session.post('https://hcaptcha.com/getcaptcha', headers=headers, data={ 'sitekey': site_key, 'host': host }).json() try: key = response['key'] tasks = [row['task_key'] for row in response['tasklist']] job = response['request_type'] timestamp = round(time()) + choice(range(30, 120)) answers = dict( zip(tasks, [choice(['true', 'false']) for index in range(len(tasks))])) mouse_movements = [] last_movement = timestamp for index in range(choice(range(1000, 10000))): last_movement += choice(range(10)) mouse_movements.append( [choice(range(500)), choice(range(500)), last_movement]) json = { 'job_mode': job, 'answers': answers, 'serverdomain': host, 'sitekey': site_key, 'motionData': { 'st': timestamp, 'dct': timestamp, 'mm': mouse_movements } } response = session.post(f'https://hcaptcha.com/checkcaptcha/{key}', json=json) response = response.json() bypassed = response['pass'] except (TypeError, KeyError): pass if bypassed: token = response['generated_pass_UUID'] resp = helpers.soupify(session.get(url)) bypass_url = f'https://{host}{resp.form.get("action")}' data = dict((x.get('name'), x.get('value')) for x in resp.select('form > input')) data.update({ 'id': resp.strong.text, 'g-recaptcha-response': token, 'h-captcha-response': token }) resp = session.post(bypass_url, data=data) if resp.status_code == 200: pickle.dump(resp.cookies, open(f'{tempfile.gettempdir()}/{host}', 'wb')) logger.info("Succesfully bypassed captcha!") return resp else: bypassed = False