def getPage(self, baseUrl, addParams={}, post_data=None): printDBG('etap1') if not post_data: post_data = (None, None) sts, data = self.cm.getPage(baseUrl, addParams, post_data[1]) if not data: data = '' if '!![]+!![]' in data: try: printDBG('etap2') oRequestHandler = cRequestHandler(baseUrl) if post_data: oRequestHandler.setRequestType( cRequestHandler.REQUEST_TYPE_POST) oRequestHandler.addParametersLine(post_data[0]) data = oRequestHandler.request() sts = True cook = GestionCookie().Readcookie('www_dpstream_top') self.cookieHeader = str(cook) if ';' in cook: cook_tab = cook.split(';') else: cook_tab = cook cj = self.cm.getCookie(self.COOKIE_FILE) for item in cook_tab: if '=' in item: cookieKey, cookieValue = item.split('=') cookieItem = cookielib.Cookie( version=0, name=cookieKey, value=cookieValue, port=None, port_specified=False, domain='.' + self.cm.getBaseUrl(baseUrl, True), domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=time.time() + 3600 * 48, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cj.set_cookie(cookieItem) cj.save(self.COOKIE_FILE, ignore_discard=True) printDBG('ffffff' + self.cookieHeader) except Exception, e: printDBG('ERREUR:' + str(e)) addParams['cloudflare_params'] = { 'domain': self.up.getDomain(baseUrl), 'cookie_file': self.COOKIE_FILE, 'User-Agent': self.USER_AGENT } sts, data = self.cm.getPageCFProtection( baseUrl, addParams, post_data[1])
def GetHeadercookie(self,url): Domain = re.sub(r'https*:\/\/([^/]+)(\/*.*)', '\\1', url) cook = GestionCookie().Readcookie(Domain.replace('.', '_')) if cook == '': return '' return '|' + urllib.urlencode({'User-Agent': UA, 'Cookie': cook})
def getPage1(self,baseUrl, addParams = {}, post_data = None): if addParams == {}: addParams = dict(self.defaultParams) sts, data = self.cm.getPage(baseUrl,addParams,post_data) if not data: data=strwithmeta('',{}) printDBG('ddddaaattttaaaa'+str(data.meta)) #printDBG('ddddaaattttaaaa'+data) if ('!![]+!![]' in data) or (data.meta.get('status_code',0)==503): try: if os.path.exists(self.COOKIE_FILE): os.remove(self.COOKIE_FILE) printDBG('cookie removed') printDBG('Start CLoudflare Vstream methode') oRequestHandler = cRequestHandler(baseUrl) if post_data: post_data_vstream = '' for key in post_data: if post_data_vstream=='': post_data_vstream=key+'='+post_data[key] else: post_data_vstream=post_data_vstream+'&'+key+'='+post_data[key] oRequestHandler.setRequestType(cRequestHandler.REQUEST_TYPE_POST) oRequestHandler.addParametersLine(post_data_vstream) data = oRequestHandler.request() sts = True printDBG('cook_vstream_file='+self.up.getDomain(baseUrl).replace('.','_')) cook = GestionCookie().Readcookie(self.up.getDomain(baseUrl).replace('.','_')) printDBG('cook_vstream='+cook) #printDBG('cook_vstream='+data) if ';' in cook: cook_tab = cook.split(';') else: cook_tab = cook cj = self.cm.getCookie(self.COOKIE_FILE) for item in cook_tab: if '=' in item: printDBG('item='+item) cookieKey, cookieValue = item.split('=') cookieItem = cookielib.Cookie(version=0, name=cookieKey, value=cookieValue, port=None, port_specified=False, domain='.'+self.cm.getBaseUrl(baseUrl, True), domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=time.time()+3600*48, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cj.set_cookie(cookieItem) cj.save(self.COOKIE_FILE, ignore_discard = True) except Exception, e: printDBG('ERREUR:'+str(e)) printDBG('Start CLoudflare E2iplayer methode') addParams['cloudflare_params'] = {'domain':self.up.getDomain(baseUrl), 'cookie_file':self.COOKIE_FILE, 'User-Agent':self.USER_AGENT} sts, data = self.cm.getPageCFProtection(baseUrl, addParams, post_data)
def GetHtml(self, url, htmlcontent='', cookies='', postdata=None, Gived_headers=''): # Memorise headers self.Memorised_Headers = Gived_headers # Memorise postdata self.Memorised_PostData = postdata # Memorise cookie self.Memorised_Cookies = cookies # VSlog(cookies) # cookies in headers? if Gived_headers != '': if Gived_headers.get('Cookie', None): if cookies: self.Memorised_Cookies = cookies + '; ' + Gived_headers.get('Cookie') else: self.Memorised_Cookies = Gived_headers['Cookie'] self.hostComplet = re.sub(r'(https*:\/\/[^/]+)(\/*.*)', '\\1', url) self.host = re.sub(r'https*:\/\/', '', self.hostComplet) self.url = url cookieMem = GestionCookie().Readcookie(self.host.replace('.', '_')) if not (cookieMem == ''): if not (self.Memorised_Cookies): cookies = cookieMem else: cookies = self.Memorised_Cookies + '; ' + cookieMem data = {} if postdata: method = 'POST' # Need to convert data to dictionnary d = postdata.split('&') for dd in d: ddd = dd.split('=') data[ddd[0]] = ddd[1] else: method = 'GET' s = cloudscraper.create_scraper(browser={'custom': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0'}) r = s.request(method,url, headers=self.SetHeader(), cookies=self.ParseCookies(cookies), data=data) # r = s.request(method, url) MemCookie = r.cookies.get_dict() if r: sContent = r.text.encode('utf-8') self.RedirectionUrl = r.url self.Header = r.headers else: sContent = '' # self.RedirectionUrl = r.url # self.Header = r.headers MemCookie = {} # r.cookies.clear() GestionCookie().DeleteCookie(self.host.replace('.', '_')) # fh = open('c:\\test.txt', 'w') # fh.write(sContent) # fh.close() # Memorisation des cookies c = '' cookie = MemCookie if cookie: for i in cookie: c = c + i + '=' + cookie[i] + ';' # Write them GestionCookie().SaveCookie(self.host.replace('.', '_'), c) return sContent
def GetHtml(self, url, htmlcontent='', cookies='', postdata=None, Gived_headers=''): #Memorise headers self.Memorised_Headers = Gived_headers #Memorise postdata self.Memorised_PostData = postdata #Memorise cookie self.Memorised_Cookies = cookies #print(cookies) #cookies in headers ? if Gived_headers != '': if Gived_headers.get('Cookie', None): if cookies: self.Memorised_Cookies = cookies + '; ' + Gived_headers.get( 'Cookie') else: self.Memorised_Cookies = Gived_headers['Cookie'] self.hostComplet = re.sub(r'(https*:\/\/[^/]+)(\/*.*)', '\\1', url) self.host = re.sub(r'https*:\/\/', '', self.hostComplet) self.url = url cookieMem = GestionCookie().Readcookie(self.host.replace('.', '_')) if not (cookieMem == ''): if not (self.Memorised_Cookies): cookies = cookieMem else: cookies = self.Memorised_Cookies + '; ' + cookieMem data = {} if postdata: method = 'POST' #Need to convert data to dictionnary d = postdata.split('&') for dd in d: ddd = dd.split('=') data[ddd[0]] = ddd[1] else: method = 'GET' s = CloudflareScraper() r = s.request(method, url, headers=self.SetHeader(), cookies=self.ParseCookies(cookies), data=data) if r: sContent = r.text.encode("utf-8") else: sContent = '' s.MemCookie = '' GestionCookie().DeleteCookie(self.host.replace('.', '_')) #fh = open('c:\\test.txt', "w") #fh.write(sContent) #fh.close() #Memorisation des cookies c = '' cookie = s.MemCookie if cookie: for i in cookie: c = c + i + '=' + cookie[i] + ';' #Write them GestionCookie().SaveCookie(self.host.replace('.', '_'), c) return sContent