def cf_get_tokens(self): """ gets login token :return: """ dblist = 'dblist_sql' if dblist: sql = 'sql' else: cfscrape.get_cookie_string() pass
def get_cookies_jp(url): for retry in range(10): try: if ifproxy == 'true': cookie_value, user_agent = get_cookie_string(url, proxies=proxies, timeout=15) else: cookie_value, user_agent = get_cookie_string(url, timeout=15) #print('通过5秒检测!\n') return (cookie_value, user_agent) except: # print(format_exc()) #print('通过失败,重新尝试...') continue
def steal_library_header(url, proxy): print('\n正在尝试通过', url, '的5秒检测...如果超过20秒卡住...重启程序...') for retry in range(10): try: if proxy: cookie_value, user_agent = get_cookie_string(url, proxies=proxy, timeout=15) else: cookie_value, user_agent = get_cookie_string(url, timeout=15) print('通过5秒检测!\n') return {'User-Agent': user_agent, 'Cookie': cookie_value} except: # print(format_exc()) print('通过失败,重新尝试...') continue print('>>通过javlibrary的5秒检测失败:', url) system('pause')
def pull_blocked_images(self, url): with web_data[1]: cookie_arg, user_agnt = cfscrape.get_cookie_string(url) return self.scraper.get(url, cookies=cookie_arg, user_agent=user_agnt).content
def steal_library_header(url): print('\n正在尝试通过', url, '的5秒检测...') for retry in range(10): try: cookie_value, user_agent = get_cookie_string(url, timeout=15) print('通过5秒检测!\n') return {'User-Agent': user_agent, 'Cookie': cookie_value} except: print('通过失败,重新尝试...') continue print('>>无法通过javlibrary的5秒检测:', url)
def generate_cf_token(i): proxy = proxy_list[i].strip().split(":") proxies = {"http": "http://" + proxy[0] + ":" + proxy[1]} # proxies = {"http": "http://"+proxy[0]+":"+proxy[1],"https": "https://"+proxy[0]+":"+proxy[1]} try: cookie_value, user_agent = cfscrape.get_cookie_string(url, proxies=proxies) tokens_string = "Cookie: " + cookie_value + "\r\n" user_agent_string = "User-Agent: " + user_agent + "\r\n" cf_token.append(proxy[0] + "#" + proxy[1] + "#" + tokens_string + user_agent_string) except: pass
def get_download_list(crawl_page, parent): """ Get Download List from Hiyobi :param crawl_page: 수집할 페이지 범위 :param parent: 진행 상황을 표시할 화면 :return: 수집된 Gallery List """ Logger.LOGGER.info("Load exception list from Firebase") parent.current_state.emit("Load exception list from Firebase") exception_list = FirebaseClient.fbclient.get_document_list() # TODO Remove next line on build # exception_list = [] parent.notifyProgress.emit(100 * 1 / (crawl_page+2)) try: gallery_list = [] Logger.LOGGER.info("Get cookie data for Cloudflare..") parent.current_state.emit("Get cookie data for Cloudflare..") cookie_value, user_agent = cfscrape.get_cookie_string(URL_HIYOBI) headers = {'User-Agent': user_agent} cookies = {'session_id': cookie_value} parent.notifyProgress.emit(100 * 2 / (crawl_page+2)) except Exception: Logger.LOGGER.error("Unexpected Exception Error..") Logger.LOGGER.error(traceback.format_exc()) Dialog.ErrorDialog().open_dialog("Unexpected Exception Error", "Unexpected Exception Request Error!") try: for i in tqdm(range(1, crawl_page+1)): # print("[SYSTEM]: Load From Page %d.." % i) parent.current_state.emit("Load From Page %d.." % i) soup = BeautifulSoup(requests.get(URL_HIYOBI+'list/'+str(i), cookies=cookies, headers=headers).content, 'html.parser') galleries = soup.find_all('div', class_="gallery-content") for data in galleries: gallery = Gallery.Gallery() gallery.initialize(data) if gallery.code not in exception_list: gallery_list.append(gallery) parent.notifyProgress.emit(100 * (i+2) / (crawl_page+2)) return gallery_list except requests.exceptions.RequestException: Logger.LOGGER.error("Hiyobi Requests Error..") Dialog.ErrorDialog().open_dialog("Hiyobi Error", "Hiyobi Request Error!") except Exception: Logger.LOGGER.error("Unexpected Error in Hiyobi Request") Logger.LOGGER.error(traceback.format_exc()) Dialog.ErrorDialog().open_dialog("Unexpected Exception Error", "Unexpected Exception Request Error!")
def cookie(url): sess = cfscrape.create_scraper() try: print(bcolors.green + "[+] Cloudflare cookie scraper ") print("[+] Target: ", bcolors.red, url, bcolors.lightcyan) request = "GET / HTTP/1.1\r\n" cookie_value, user_agent = cfscrape.get_cookie_string(url) request += "Cookie: %s\r\nUser_Agent: %s\r\n" % (cookie_value, user_agent) data = sess.get(url) out = BeautifulSoup(data.content, 'html.parser') print("[+] Print Cookie\n") print(request) os.system('tput setaf 10') print("\n[+] Scraper ") print(out) except KeyError: pass
def update_cloudfare(): if not cloudfareSupport: return "", "Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0" filename = os.path.join(xbmc.translatePath(addon.getAddonInfo("profile")), "cloudfare.txt") if os.path.isfile(filename) and (time.time() - os.path.getmtime(filename) < 3600): f = open(filename, "r") content = [x.strip("\n") for x in f.readlines()] f.close() cookie_value = content[0] user_agent = content[1] return cookie_value, user_agent else: notify("Okay", "Get Cloudflare token") cookie_value, user_agent = cfscrape.get_cookie_string("http://serienstream.to") f = open(filename, "w") f.write(cookie_value + "\n") f.write(user_agent + "\n") f.close() return cookie_value, user_agent
def _curl(self, url, referer): user_agent = self._choieUa() if len(referer) == 0: referer = self.referer cookie_arg, a = cfscrape.get_cookie_string(url) cmd = "curl --referer '{referer}' --cookie '{cookie_arg}' -A '{user_agent}' '{url}'" loginContent = None try: loginContent = subprocess.check_output(cmd.format( referer=referer, cookie_arg=cookie_arg, user_agent=user_agent, url=url), shell=True) except subprocess.CalledProcessError as e: loginContent = None return loginContent, cookie_arg
def pass_cf(): global user, cookie, soso, scraper, error_cf if "https" in url: cfscrape.DEFAULT_CIPHERS = "TLS_AES_256_GCM_SHA384:ECDHE-ECDSA-AES256-SHA384" try: if method_pass_cf == "1": cookie, user = cfscrape.get_cookie_string(url) else: scraper = cfscrape.create_scraper() soso = scraper.get(url, timeout=15) print("[!] Bypass Has Been Completed!") numthreads() except: error_cf += 1 print("[!] Bypassing Again... [" +str(error_cf)+ "]") if error_cf>5: os.system("cls") print("[!] ERROR BYPASS\n[!] Please Select Another Attack Or Ignore Method[!]") start_mode() else: pass_cf()
def SetupCache(ForceUpdate=True): """Cookie Cache handler""" current_timestamp = Datetime.TimestampFromDatetime(Datetime.Now()) if not Dict['cookies']: Dict['cookies'] = {'expire': '1466626689', 'current_cookies': 'na'} if ForceUpdate or current_timestamp >= int(Dict['cookies']['expire']): HTTP.ClearCookies() Log('Updating Cookies') cookies, ua = cfscrape.get_cookie_string(BASE_URL + '/', HTTP.Headers['User-Agent']) Dict['cookies']['current_cookies'] = cookies Log(cookies) r_cf_clearance = Regex(r'cf_clearance\=.*\-(\d+)\-(\d+)').search(cookies) if r_cf_clearance: date = int(r_cf_clearance.group(1)) expire = date + int(r_cf_clearance.group(2)) else: Log.Warn('SetupCache Warning: cookies have no "cf_clearance" cookie') expire = Datetime.TimestampFromDatetime(Datetime.Now()) HTTP.Headers['Cookie'] = cookies Dict['cookies']['expire'] = '%i' %expire Dict.Save() else: Log('Loading Saved Cookies into Global HTTP Headers') HTTP.Headers['Cookie'] = Dict['cookies']['current_cookies'] current_datetime = Datetime.FromTimestamp(int(current_timestamp)) expire_datetime = Datetime.FromTimestamp(int(Dict['cookies']['expire'])) Log('Time left until Cookies need to be updated = %s' %str(expire_datetime - current_datetime)) #Log("Current system time: " + str(current_timestamp)) #Log("Current cookie time: " + (Dict['cookies']['expire'])) return
def icurl(url): user_agent = ichoieUa() startUrl = "https://www.yifile.com" referer = startUrl cookie_arg, a = cfscrape.get_cookie_string(startUrl) #print(cookie_arg, a) cmd = "curl --referer '{referer}' --cookie '{cookie_arg}' -A '{user_agent}' '{url}'" loginContent = None try: loginContent = subprocess.check_output(cmd.format( referer=referer, cookie_arg=cookie_arg, user_agent=user_agent, url=url), shell=True) except subprocess.CalledProcessError as e: loginContent = None print(loginContent, cookie_arg) return loginContent, cookie_arg
# Imports import sys import os import re import subprocess import mechanize import cfscrape sys.argv.remove(sys.argv[0]) ###################################################### # Get Cloudflare cookies ###################################################### print "Fetching Cloudflare cookies ..." cookies = cfscrape.get_cookie_string( "http://broadcasthe.net/login.php", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36" ) ###################################################### # Log into BTN ###################################################### print "Logging into BTN..." br = mechanize.Browser() cj = mechanize.LWPCookieJar() br.set_cookiejar(cj) # Set some headers br.set_handle_robots(False) br.addheaders = [( 'User-agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
####################################################### # Imports import sys import os import re import subprocess import mechanize import cfscrape sys.argv.remove(sys.argv[0]) ###################################################### # Get Cloudflare cookies ###################################################### print "Fetching Cloudflare cookies ..." cookies = cfscrape.get_cookie_string("http://broadcasthe.net/login.php", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"); ###################################################### # Log into BTN ###################################################### print "Logging into BTN..." br = mechanize.Browser() cj = mechanize.LWPCookieJar() br.set_cookiejar(cj) # Set some headers br.set_handle_robots(False) br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'), ('cookie', cookies[0])] # Open the login page br.open("https://broadcasthe.net/login.php")
import cfscrape import sys import json if __name__ == "__main__": cookie_arg, user_agent = cfscrape.get_cookie_string(sys.argv[1]) print(json.dumps({'cookies': cookie_arg, 'agent': user_agent}))
import cfscrape request = "GET / HTTP/1.1\r\n" cookie_value, user_agent = cfscrape.get_cookie_string( "http://ab4.cdn.vizplay.org/v/9a6e0341fc4455d427998f64c7302a8c.mp4?st=D8XtHFTHAawXjBCQVuD39g&hash=r7eBYMQox1UUBcic_XS_nA") request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent) print request
import subprocess import cfscrape import requests import sys # With get_tokens() cookie dict: # tokens, user_agent = cfscrape.get_tokens("http://somesite.com") # cookie_arg = "cf_clearance=%s; __cfduid=%s" % (tokens["cf_clearance"], tokens["__cfduid"]) # With get_cookie_string() cookie header; recommended for curl and similar external applications: ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" cookie_arg, user_agent = cfscrape.get_cookie_string("http://" + sys.argv[1], user_agent=ua) file = open(sys.argv[1] + '-cookies.txt', 'w') file.write(cookie_arg) file.close() print(cookie_arg) #result = subprocess.check_output(["curl", "--cookie", cookie_arg, "-A", user_agent, "http://animeflv.net"])
if any([site == 'TPSE', site == 'DEM', site == 'WWT']) and any([str(r.status_code) == '403', str(r.status_code) == '404', str(r.status_code) == '503']): if str(r.status_code) != '503': logger.warn('Unable to download from ' + site + ' [' + str(r.status_code) + ']') #retry with the alternate torrent link. url = helpers.torrent_create(site, linkit, True) logger.fdebug('Trying alternate url: ' + str(url)) try: r = requests.get(url, params=payload, verify=verify, stream=True, headers=headers) except Exception, e: return "fail" else: logger.warn('Cloudflare protection online for ' + site + '. Attempting to bypass...') try: scraper = cfscrape.create_scraper() cf_cookievalue, cf_user_agent = cfscrape.get_cookie_string(url) headers = {'Accept-encoding': 'gzip', 'User-Agent': cf_user_agent} r = scraper.get(url, verify=verify, cookies=cf_cookievalue, stream=True, headers=headers) except Exception, e: return "fail" if str(r.status_code) != '200': logger.warn('Unable to download torrent from ' + site + ' [Status Code returned: ' + str(r.status_code) + ']') return "fail" if any([site == 'TPSE', site == 'DEM', site == 'WWT']): if r.headers.get('Content-Encoding') == 'gzip': buf = StringIO(r.content) f = gzip.GzipFile(fileobj=buf)
def fun(): choice = ("1") banner() while choice != ("12"): menu() choice = input("\033[1;34m[+]\033[1;m \033[1;91mEnter your choice:\033[1;m ") if choice == ("3"): try: target = input("\033[1;91m[+] Enter Domain or IP Address: \033[1;m").lower() os.system("reset") print("\033[34m[~] Searching for Whois Lookup: \033[0m".format(target) + target) time.sleep(1.5) command = ("whois " + target) proces = os.popen(command) results = str(proces.read()) print(results + command) except Exception: pass elif choice == ("2"): try: target = input("\033[1;91m[+] Enter Domain or IP Address: \033[1;m").lower() os.system("reset") print("\033[34m[~] Searching for DNS Lookup: \033[0m".format(target) + target) time.sleep(1.5) command = ("dig " + target + " +trace ANY") proces = os.popen(command) results = str(proces.read()) print(results + command) except Exception: pass elif choice == ("1"): try: os.system("reset") os.system("gnome-terminal -e 'bash -c \"sudo etherape; exec bash\"'") except Exception: pass elif choice == ("4"): try: target = input("\033[1;91m[+] Enter Domain or IP Address: \033[1;m").lower() os.system("reset") print("\033[34m[~] Scanning Nmap Port Scan: \033[0m" + target) print("This will take a moment... Get some coffee 😃 )\n") time.sleep(1.5) scanner = nmap.PortScanner() command = ("nmap -Pn " + target) process = os.popen(command) results = str(process.read()) logPath = "logs/nmap-" + strftime("%Y-%m-%d_%H:%M:%S", gmtime()) print(results + command + logPath) print("\033[34mNmap Version: \033[0m", scanner.nmap_version()) except KeyboardInterrupt: print("\n") print("[-] User Interruption Detected..!") time.sleep(1) elif choice == ("5"): try: target = input("\033[1;91m[+] Enter Domain or IP Address: \033[1;m").lower() os.system("reset") print("\033[34m[~] Scanning HTTP Header Grabber: \033[0m\n" + target) time.sleep(1.5) command = ("http -v " + target) proces = os.popen(command) results = str(proces.read()) print(results + command) except Exception: pass elif choice == ("6"): target = input("\033[1;91m[+] Enter the Domain to test: \033[1;m").lower() os.system("reset") if not (target.startswith("http://") or target.startswith("https://")): target = "http://" + target print("\033[1;34m[~] Testing Clickjacking Test: \033[1;m" + target) time.sleep(2) try: resp = requests.get(target) headers = resp.headers print("\nHeader set are: \n") for item, xfr in headers.items(): print("\033[1;34m" + item + ":" + xfr + "\033[1;m") if "X-Frame-Options" in headers.keys(): print("\n[+] \033[1;34mClick Jacking Header is present\033[1;m") print("[+] \033[1;34mYou can't clickjack this site !\033[1;m\n") else: print("\n[*] \033[1;34mX-Frame-Options-Header is missing ! \033[1;m") print("[!] \033[1;34mClickjacking is possible,this site is vulnerable to Clickjacking\033[1;m\n") except Exception as ex: print("\033[1;34mException caught: " + str(ex)) elif choice == ("7"): try: target = input("\033[1;91m[+] Enter Domain: \033[1;m").lower() os.system("reset") print("\033[34m[~] Scanning Robots.txt Scanner: \033[0m\n" + target) time.sleep(1.5) if not (target.startswith("http://") or target.startswith("https://")): target = "http://" + target robot = target + "/robots.txt" try: bots = urlopen(robot).read().decode("utf-8") print("\033[34m" + (bots) + "\033[1;m") except URLError: print("\033[1;31m[-] Can\'t access to {page}!\033[1;m".format(page=robot)) except Exception as ex: print("\033[1;34mException caught: " + str(ex)) elif choice == ("8"): target = input("\033[1;91m[+] Enter Domain: \033[1;m").lower() if not (target.startswith("http://") or target.startswith("https://")): target = "http://" + target os.system("reset") print("[+] Cloudflare cookie scraper ") time.sleep(1.5) sess = cfscrape.create_scraper() try: print("[+] Target: " + target) request = "GET / HTTP/1.1\r\n" cookie_value, user_agent = cfscrape.get_cookie_string(target) request += "Cookie: %s\r\nUser_Agent: %s\r\n" % (cookie_value, user_agent) data = sess.get(target) out = BeautifulSoup(data.content,'html.parser') print("[+] Print Cookie\n") print(request) os.system('tput setaf 10') print("\n[+] Scraper ") print(out) except ValueError: print('[X] Unable to find Cloudflare cookies. This website does not have Cloudflare IUAM enabled.') elif choice == ("9"): try: target = input("\033[1;91m[+] Enter Domain: \033[1;m").lower() os.system("reset") print("\033[34m[~] Scanning Link Grabber: \033[0m\n" + target) time.sleep(2) if not (target.startswith("http://") or target.startswith("https://")): target = "http://" + target deq = deque([target]) pro = set() try: while len(deq): url = deq.popleft() pro.add(url) parts = urlsplit(url) base = "{0.scheme}://{0.netloc}".format(parts) print("[+] Crawling URL " + "\033[34m" + url + "\033[0m") try: response = requests.get(url) except (requests.exceptions.MissingSchema, requests.exceptions.ConnectionError): continue soup = BeautifulSoup(response.text, "lxml") for anchor in soup.find_all("a"): link = anchor.attrs["href"] if "href" in anchor.attrs else '' if link.startswith("/"): link = base + link if not link in deq and not link in pro: deq.append(link) continue except KeyboardInterrupt: print("\n") print("[-] User Interruption Detected..!") time.sleep(1) print("\n \t\033[34m[!] I like to See Ya, Hacking Anywhere ..!\033[0m\n") except Exception: pass elif choice == ("10"): try: target = input("\033[1;91m[+] Enter Domain or IP Address: \033[1;m").lower() url = ("http://ip-api.com/json/") response = urllib.request.urlopen(url + target) data = response.read() jso = json.loads(data) os.system("reset") print("\033[34m[~] Searching IP Location Finder: \033[0m".format(url) + target) time.sleep(1.5) print("\n [+] \033[34mUrl: " + target + "\033[0m") print(" [+] " + "\033[34m" + "IP: " + jso["query"] + "\033[0m") print(" [+] " + "\033[34m" + "Status: " + jso["status"] + "\033[0m") print(" [+] " + "\033[34m" + "Region: " + jso["regionName"] + "\033[0m") print(" [+] " + "\033[34m" + "Country: " + jso["country"] + "\033[0m") print(" [+] " + "\033[34m" + "City: " + jso["city"] + "\033[0m") print(" [+] " + "\033[34m" + "ISP: " + jso["isp"] + "\033[0m") print(" [+] " + "\033[34m" + "Lat & Lon: " + str(jso['lat']) + " & " + str(jso['lon']) + "\033[0m") print(" [+] " + "\033[34m" + "Zipcode: " + jso["zip"] + "\033[0m") print(" [+] " + "\033[34m" + "TimeZone: " + jso["timezone"] + "\033[0m") print(" [+] " + "\033[34m" + "AS: " + jso["as"] + "\033[0m" + "\n") except URLError: print("\033[1;31m[-] Please provide a valid IP address!\033[1;m") elif choice == ("11"): try: target = input("\033[1;91m[+] Enter Domain: \033[1;m").lower() if not (target.startswith("http://") or target.startswith("https://")): target = "https://" + target os.system("reset") print("\033[34m[~] Detecting CMS with Identified Technologies and Custom Headers from target url: \033[0m") time.sleep(5) command = ("mtr " + "-4 -rwc 1 " + target) obj = webtech.WebTech() results = obj.start_from_url(target, timeout=1) sys.stdout.write(results) except Exception: pass elif choice == ("12"): try: target = input("\033[1;91m[+] Enter Domain or IP Address: \033[1;m").lower() os.system("reset") print("\033[34m[~] Searching for Traceroute \033[0m".format(target) + target) print(">> This will take a moment... Get some coffee << )\n") time.sleep(5) command = ("mtr " + "-4 -rwc 1 " + target) proces = os.popen(command) results = str(proces.read()) print("\033[1;34m" + results + command + "\033[1;m") fun() except KeyError: pass elif choice == ("13"): target = input("\033[1;91m[+] Enter Domain: \033[1;m").lower() os.system("reset") print("\033[34m[~] Start crawler... \033[0m") time.sleep(5) print("[+] Target: " + target) if not (target.startswith("http://") or target.startswith("https://")): target = "http://" + target try: content = get(target).text regex_t = re.compile(r"<title>(.*?)<\/title>") tit = re.findall(regex_t, content) regex_l = re.compile(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+") link = re.findall(regex_l, content) robots = get(target + "/robots.txt").text print("[+] Title: "+ ''.join(tit) + "\n") print("[+] Extract links: \n" + '\n'.join(link) + "\n") print("[+] Robots.txt: \n" + robots) except KeyError: pass elif choice == ("14"): target = input("\033[1;91m[+] Enter Domain: \033[0m") os.system("reset") print("\033[34m[~] Scanning Certificate Transparency log monitor: \033[0m\n" + target) time.sleep(1.5) print("[+] Target: " + target) try: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36', } results = requests.get('https://api.certspotter.com/v1/issuances?domain='+target+'&expand=dns_names&expand=issuer&expand=cert | jq ".[].dns_names[]" | sed "s/\"//g" | sed "s/\*\.//g" | sort -u | grep '+target,headers=headers) results = results.text.split('\n') print(*results, sep = "\n") except KeyError: pass elif choice == ("15"): time.sleep(1) print("\n\t\033[34mBlue Eye\033[0m DONE... Exiting... \033[34mLike to See Ya Hacking Anywhere ..!\033[0m\n") sys.exit() else: os.system("reset") print("\033[1;31m[-] Invalid option..! \033[1;m")
global outfile outfile = cfurl.split('/')[-1] print("output file: %s \n" % outfile) print("creating new session..\n") scraper = cfscrape.create_scraper() # returns a requests.Session object try: if usecurl == 1 and writeout == 1: r = scraper.get(cfurl, stream=True) print("status: ") print(r.status_code) print("\nheaders: ") print(r.headers) print("\nfetching cookies for %s.. \n" % cfurl) cookie_arg = cfscrape.get_cookie_string(cfurl) print("trying to download using cURL to %s.. \n" % outfile) command_text = 'curl -# --cookie ' + cookie_arg + ' -O ' + cfurl output = Popen(command_text, shell=True, stdout=PIPE, stderr=PIPE, stdin=PIPE) response, errors = output.communicate() print("response: " + str(response)) print("errors: " + str(errors)) elif usecurl == 1 and writeout == 0: print("status: ") print(scraper.get(cfurl).status_code) print("\nheaders: ") print(scraper.get(cfurl).headers) print("getting cookies for url: %s \n" % cfurl) cookie_arg = cfscrape.get_cookie_string(cfurl) print(cookie_arg)
if any([site == 'TPSE', site == 'DEM', site == 'WWT']) and any([str(r.status_code) == '403', str(r.status_code) == '404', str(r.status_code) == '503']): if str(r.status_code) != '503': logger.warn('Unable to download from ' + site + ' [' + str(r.status_code) + ']') #retry with the alternate torrent link. url = helpers.torrent_create(site, linkit, True) logger.fdebug('Trying alternate url: ' + str(url)) try: r = requests.get(url, params=payload, verify=verify, stream=True, headers=headers) except Exception, e: return "fail" else: logger.warn('Cloudflare protection online for ' + site + '. Attempting to bypass...') try: scraper = cfscrape.create_scraper() cf_cookievalue, cf_user_agent = cfscrape.get_cookie_string(url) headers = {'Accept-encoding': 'gzip', 'User-Agent': cf_user_agent} r = scraper.get(url, verify=verify, cookies=cf_cookievalue, stream=True, headers=headers) except Exception, e: return "fail" if str(r.status_code) != '200': logger.warn('Unable to download torrent from ' + site + ' [Status Code returned: ' + str(r.status_code) + ']') return "fail" if any([site == 'TPSE', site == 'DEM', site == 'WWT']): if r.headers.get('Content-Encoding') == 'gzip': buf = StringIO(r.content) f = gzip.GzipFile(fileobj=buf)
import cfscrape import sys import json if __name__ == "__main__": cookie_arg, user_agent = cfscrape.get_cookie_string(sys.argv[1]) print(json.dumps({'cookies': cookie_arg, 'agent': user_agent}))
import subprocess import cfscrape import sys try: dlLink = sys.argv[1] spigotmcLogin = sys.argv[2] spigotmcPass = sys.argv[3] except IndexError: print( 'Something went wrong with link parsing or SpigotMC login details. Please check your configuration in spigotmgr.' ) sys.exit(1) cookie_arg, user_agent = cfscrape.get_cookie_string(dlLink) result = subprocess.check_output([ "curl", "--cookie", cookie_arg, "-A", user_agent, "--data", "login="******"&password="******"-O", "-J", "-L", dlLink ])
import cfscrape request = "GET / HTTP/1.1\r\n" cookie_value, user_agent = cfscrape.get_cookie_string( "http://ab4.cdn.vizplay.org/v/9a6e0341fc4455d427998f64c7302a8c.mp4?st=D8XtHFTHAawXjBCQVuD39g&hash=r7eBYMQox1UUBcic_XS_nA" ) request += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_value, user_agent) print request
def getCF(cfurl, links): checkcurl = '' checklinks = '' if links == 1: checklinks = 'yes' global followdirs else: checklinks = 'no' if usecurl == 1: checkcurl = 'yes' else: checkcurl = 'no' if debug == 1: print("\n\033[32;1mlocals: \n\033[0m") for name, val in locals().iteritems(): print("\033[35;1m%s:\033[32;21m %s \033[0m" % (str(name), str(val))) print("\n\033[32;1mglobals: \n\033[0m") for name, val in globals().iteritems(): print("\n\033[35;1m%s:\033[36;21m %s \033[0m" % (str(name), str(val))) print('\033[0m\r\n') print("\n\033[31;1musing curl:\033[31;21m\033[33m %s \033[0m\n" % checkcurl) print("\n\033[34;1mharvesting links:\033[34;21m\033[33m %s \033[0m\n" % checklinks) p = urlparse(cfurl) part = p.path.split('/')[-1] path = p.path.strip(part) if '/' not in path[:1]: path = '/' + path urlfqdn = p.scheme + '://' + p.netloc parent = urlfqdn + path childdir = path.strip('/') domaindir = os.path.join('download', p.netloc) parentdir = os.path.join(domaindir, childdir) if firsturl in finished and cfurl in firsturl: print('\nABORTING: already retrieved %s!\n') % firsturl sys.exit(1) if writeout == 1: global outfile global existing global checkresume p = urlparse(cfurl) if not os.path.exists('download'): os.makedirs('download') if not os.path.exists(domaindir): os.makedirs(domaindir) outfile = cfurl.split('?')[0] outfile = outfile.split('/')[-1] filename = cfurl.lstrip('https:').strip('/') filename = filename.rstrip(outfile) dirs = filename.split('/') a = 'download' i = 1 for dir in dirs: while i < len(dirs): if not re.search(r'^(.*)\.[.]+$', dir): a = os.path.join(a, dir) if not os.path.exists(a): os.makedirs(a) i += 1 else: break if len(outfile) < 1 or outfile in p.netloc: outfile = 'index.html' outdir = filename.strip() else: part = outfile outdir = filename.rstrip(part) fulloutdir = os.path.join('download', outdir) outfile = outfile.strip('/') if not os.path.exists(fulloutdir): os.makedirs(fulloutdir) print("output file: %s \n" % outfile) global savefile savefile = os.path.join(fulloutdir, outfile) cwd = os.getcwd() fullsavefile = os.path.join(cwd, savefile) print("full path to output file: %s \n" % fullsavefile) scraper = cfscrape.create_scraper() ualist = [ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', 'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', 'Mozilla/5.0 (compatible; MSIE 9.0; AOL 9.7; AOLBuild 4343.19; Windows NT 6.1; WOW64; Trident/5.0; FunWebProducts)', 'Mozilla/5.0 (Windows NT 6.3; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1 WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A', 'Mozilla/5.0 (X11; SunOS i86pc; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; FreeBSD amd64; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; FreeBSD i386; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; Linux i586; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; OpenBSD amd64; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; OpenBSD alpha; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; OpenBSD sparc64; rv:38.0) Gecko/20100101 Firefox/38.0', 'Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20121202 Firefox/17.0 Iceweasel/17.0.1', 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16', 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14 Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14' ] n = random.randint(0,len(ualist)) - 1 ua = ualist[n].strip() def cfcookie(cfurl): sess = requests.session() p = urlparse(cfurl) mnt = p.scheme + '://' sess.mount(mnt, cfscrape.CloudflareAdapter()) sess.get(cfurl) cookies = "\"cf_clearance\"=\"%s\";\"__cfduid\"=\"%s\"" % (sess.cookies["cf_clearance"] , sess.cookies["__cfduid"]) return cookies def getpage(cfurl): r = scraper.get(cfurl, stream=True, verify=False, proxies=proxystring, allow_redirects=True) if 'text' in r.headers.get('Content-Type'): rt = UnicodeDammit.detwingle(r.text) html = BeautifulSoup(rt.decode('utf-8'), "html.parser") print('\r\n--------------------------------------------------------\r\n') if debug == 1: orenc = str(html.original_encoding) print('\n\033[40m\033[35;1mORIGINAL ENCODING: %s \033[0m\n' % orenc) bs = html.prettify(formatter=None) print(bs) print('\r\n--------------------------------------------------------\r\n') else: found = -1 if debug == 1: print('\n\033[34mDEBUG: finished list length: \033[37;1m%d \033[0m\n' % len(finished)) # cURL request - using cURL for cloudflare URLs doesn't seem to work if usecurl == 1: r = scraper.get(cfurl, stream=True, verify=False, allow_redirects=True, proxies=proxystring) print("status: ") print(r.status_code) print("\ngetting cookies for %s.. \n" % cfurl) cookie_arg = cfcookie(cfurl) if cookie_arg: req = "GET / HTTP/1.1\r\n" req += "Cookie: %s\r\nUser-Agent: %s\r\n" % (cookie_arg, ua) houtput = check_output(["curl", "--cookie", cookie_arg, "-A", ua]) curlstring = '--cookie \'' + cookie_arg + '\' -A \'' + ua + '\' -L -k ' if 'curlopts' in locals(): curlstring = '--cookie \'' + cookie_arg + '\' ' + curlopts + ' -A \'' + ua + '\' -k ' else: cookie_arg = cfscrape.get_cookie_string(cfurl) req = "GET / HTTP/1.1\r\n" req += "User-Agent: %s\r\n" % ua houtput = check_output(["curl", "-A", ua]) curlstring = '-A \'' + ua + '\' -L -k ' if 'curlopts' in locals(): curlstring = '-# ' + curlopts + ' -A \'' + ua + '\' -k ' if proxy: curlstring += '-x %s ' % proxy print(reqd) print("\nHEADERS: \n%s \n" % str(houtput)) msg = "\nfetching %s using cURL.. \n" % cfurl if writeout == 1: if os.path.exists(savefile): resumesize = os.path.getsize(savefile) print("\n%s already exists! \n" % outfile) print("\nlocal file size: %s bytes \n" % str(resumesize)) if 'existing' not in globals(): existing = 0 if existing == 0: checkresume = raw_input('choose an option [1-3]: 1) resume download, 2) start new download, 3) skip. --> ') while not re.match(r'^[1-3]$', checkresume): checkresume = raw_input('invalid input. enter 1 to resume, 2 to start new, or 3 to skip --> ') checkexist = raw_input('\ndo this for all downloads? Y/N --> ') while not re.match(r'^[YyNn]$', checkexist): checkexist = raw_input('invalid entry. enter Y to use same action on existing files or N to always ask --> ') if checkexist.lower() == 'y': existing = 1 else: existing = 0 if checkresume == '1': curlstring = curlstring + '-C - -o \'' + savefile + '\' ' msg = "\ntrying to resume download using cURL to %s.. \n" % savefile elif checkresume == '2': curlstring = curlstring + '-O ' msg = "\nstarting new download to %s.. \n" % savefile else: msg = "\nskipping download for %s \n" % outfile else: curlstring = curlstring + '-O ' msg = "\ntrying to download using cURL to %s.. \n" % savefile #command_text = 'cd download && { curl ' + curlstring + cfurl + ' ; cd -; }' else: msg = "\nfetching %s using cURL.. \n" % cfurl command_text = 'curl ' + curlstring + cfurl print(msg) print("\nsubmitting cURL command string: \n%s \n" % command_text) output = Popen(command_text, shell=True, stdout=PIPE, stderr=PIPE, stdin=PIPE) result, errors = output.communicate() if result is not None: ht = BeautifulSoup(str(result)) cpass = ht.find('input', {'name': 'pass'}) if cpass: cloudpass = cpass.get('value') cloudsch = ht.find('input', {'name': 'jschl_vc'}).get('value') reurl = ht.find('form').get('action') if reurl: print("form action: %s \n" % reurl) if '/' in path[:1]: path = path[1:] parent = p.scheme + '://' + p.netloc + path submitstr = 'pass='******'&jschl_vc=' + cloudsch + '&challenge-form=submit' #locstr = 'Location: http://' + p.netloc + path + '?' + submitstr #header = '-H \'' + locstr + '\' -L ' + cfurl go = parent + reurl cs = '-e ' + p.netloc + ' --data-urlencode \'' + submitstr + '\' ' + curlstring if writeout == 0: cs = cs + '-v ' else: cs = '--ignore-content-length ' + curlstring #command = 'cd download && { curl ' + cs + cfurl + ' ; cd -; }' else: cs = curlstring + '-e ' + p.netloc + ' ' if writeout == 0: cs += '-v ' #command = 'cd download && { curl ' + cs + cfurl + ' ; cd -; }' if re.search(r'(\.(htm)l?|\.php|\.txt|\.xml|\.[aj](sp)x?|\.cfm|\.do|\.md|\.json)$',cfurl) or re.search(r'(\.(htm)l?|\.php|\.txt|\.xml|\.[aj](sp)x?|\.cfm|\.do|\.md|\.json)$', outfile): print(ht.prettify(formatter=None)) else: if errors: print("\nerror: %s\n" % str(errors)) cs = curlstring + ' -i ' if writeout == 0: cs += '-v --no-keepalive ' else: cs = '--ignore-content-length ' + cs #command = 'cd download && { curl ' + cs + cfurl + ' ; cd -; }' command = 'curl ' + cs + cfurl print("submitting cURL request:\n%s \n" % command) output = Popen(command, shell=True, stdout=PIPE, stderr=PIPE, stdin=PIPE) response, errors = output.communicate() res = BeautifulSoup(str(response)) res = res.prettify() if response: print("\nresponse: \n %s \n" % str(res)) if errors: print("\nerrors: \n %s \n" % str(errors)) finished.append(cfurl) elif usecurl == 0 and writeout == 1: getkb = lambda a: round(float(float(a)/1024),2) getmb = lambda b: round(float(float(b)/1048576),2) print("\ngetting %s... \n" % cfurl) if os.path.exists(savefile): # FOUND SAVED FILE # GET SIZE OF EXISTING LOCAL FILE resumesize = os.path.getsize(savefile) ksize = getkb(resumesize) msize = getmb(resumesize) sizeqt = 'kb' fsize = ksize if msize > 1: sizeqt = 'mb' fsize = msize existsize = str(fsize) + ' ' + sizeqt print("\n%s already exists! \n" % outfile) print("\nlocal file size: %s \n" % existsize) if 'existing' not in globals(): existing = 0 if existing == 0: checkresume = raw_input('choose an option [1-3]: 1) resume download, 2) start new download, 3) skip. --> ') while not re.match(r'^[1-3]$', checkresume): checkresume = raw_input('invalid input. enter 1 to resume, 2 to start new, or 3 to skip --> ') checkexist = raw_input('\ndo this for all downloads? Y/N --> ') while not re.match(r'^[YyNn]$', checkexist): checkexist = raw_input('invalid entry. enter Y to use same action on existing files or N to always ask --> ') if checkexist.lower() == 'y': existing = 1 else: existing = 0 if checkresume == '1': # RESUME DOWNLOAD AT LAST LOCAL BYTE dld = int(resumesize) resumeheader = {'Range': 'bytes=%s-' % str(dld)} dlmsg = "\nattempting to resume download for %s. this may take awhile depending on file size... \n" % outfile df = open(savefile, 'a+b') elif checkresume == '2': # DISREGARD SAVED FILE, START DOWNLOAD FROM TOP resumeheader = None dlmsg = "\nwriting content to \'download\' directory as file %s. this may take awhile depending on file size... \n" % outfile dld = 0 df = open(savefile, 'wb+') else: # SKIPPING DOWNLOAD resumeheader = None df = open(savefile, 'r+') dlmsg = "\nskipping download for %s\n" % outfile else: # NEW DOWNLOAD REQUEST checkresume = '2' dld = 0 df = open(savefile, 'wb+') resumeheader = None dlmsg = "\nwriting content to \'download\' directory as file %s. this may take awhile depending on file size... \n" % outfile print(dlmsg) if not checkresume == '3': # IF NOT SKIPPING r = scraper.get(cfurl, stream=True, headers=resumeheader, verify=False, allow_redirects=True, proxies=proxystring) filesize = r.headers.get('Content-Length') filetype = r.headers.get('Content-Type') start = time.clock() #today = datetime.now() #startdate = date.strftime(today,"%m-%d-%Y %H:%M:%S ") #print("start time: %s \n" % startdate) with df as dlfile: if filesize is not None and 'text' not in filetype: bytesize = int(filesize) kbsize = getkb(bytesize) mbsize = getmb(bytesize) qt = 'bytes' size = bytesize if kbsize > 10: qt = 'kb' size = kbsize if mbsize > 1 : qt = 'mb' size = mbsize print('\nfile size: ' + str(size) + ' %s \n' % qt) for chunk in r.iter_content(chunk_size=2048): if chunk: dld += len(chunk) dlfile.write(chunk) done = int((50 * int(dld)) / int(filesize)) dldkb = getkb(dld) dldmb = getmb(dld) unit = 'b' prog = str(round(dld,2)) if dldkb > 1: unit = 'kb ' prog = str(round(dldkb,2)) if dldmb > 1: unit = 'mb ' prog = str(round(dldmb,2)) sys.stdout.write("\rdownloaded: %s %s [%s%s] %d kb/s" % (prog, unit, '#' * done, ' ' * (50 - done), 0.128 * (dldkb / (time.clock() - start)))) dlfile.flush() os.fsync(dlfile.fileno()) else: break elif filesize and 'text' in filetype: dlfile.write(r.content) dlfile.flush() os.fsync(dlfile.fileno()) else: for chunk in r.iter_content(chunk_size=1024): if chunk: dld += len(chunk) dlfile.write(chunk) dlfile.flush() os.fsync(dlfile.fileno()) else: break print("\r\nfile %s saved! \n" % outfile) fin = time.clock() - start totalsecs = fin * 360 elapsed = "%s seconds " % str(totalsecs) if totalsecs > 60: totalmins = float(totalsecs / 60) mins = int(totalmins) if mins == 1: unitmin = "minute" else: unitmin = "minutes" strmin = str(mins) + " " + str(unitmin) secs = round((totalsecs % 60), 3) elapsed = str(strmin) + " " + str(secs) if totalmins > 60: totalhours = float(totalmins / 60 ) hours = int(totalmins / 60) if hours == 1: unithr = "hour" else: unithr = "hours" strhr = str(hours) + " " + str(unithr) mins = round((totalmins % 60),3) elapsed = "%s, %s mins, %s secs" % (strhr, mins, secs) else: hours = 0 else: hours = 0 mins = 0 secs = round(totalsecs,3) elapsed = "%s seconds" % str(secs) #ended = datetime.now() #enddate = date.strftime(ended,"%m-%d-%Y %H:%M:%S ") #print("end time: %s \n" % enddate) print("\ndownload time elapsed: %s \n" % str(elapsed)) time.sleep(4) print('\r\n--------------------------------------------------------\r\n') else: print("\nskipped download from %s.\r\nfile has not been modified.\n" % cfurl) getpage(cfurl) finished.append(cfurl) else: getpage(cfurl) finished.append(cfurl) def getlinks(cfurl): r = scraper.get(cfurl, stream=True, verify=False, proxies=proxystring, allow_redirects=True) rt = UnicodeDammit.detwingle(r.text) html = BeautifulSoup(rt.decode('utf-8'), "html.parser") if debug == 1: orenc = str(html.original_encoding) print('\n\033[40m\033[35;1mORIGINAL ENCODING: %s \033[0m\n' % orenc) bs = html.prettify(formatter=None) linkresult = html.findAll('a') if len(linkresult) > 0: foundlinks = len(linkresult) print('\nFOUND %s LINKS AT %s:\n' % (str(foundlinks), cfurl)) for link in linkresult: b = link.get('href') b = str(b) if b not in cfurl and not re.match(r'^(\.\.)?\/$', b): print(b) print('') else: print('\nNO LINKS FOUND.\n') foundlinks = 0 time.sleep(4) return foundlinks def selectdir(geturl): r = scraper.get(geturl, stream=True, verify=False, proxies=proxystring, allow_redirects=True) rt = UnicodeDammit.detwingle(r.text) html = BeautifulSoup(rt.decode('utf-8'), "html.parser") if debug == 1: orenc = str(html.original_encoding) print('\n\033[40m\033[35;1mORIGINAL ENCODING: %s \033[0m\n' % orenc) findlinks = html.findAll('a') dirlist = [] for link in findlinks: b = link.get('href') if not re.match(r'^((\.\.)?\/)$', str(b)): if re.search(r'^(.*)(\/)$', str(b)): dirlist.append(b) p = urlparse(geturl) part = p.path.split('/')[-1] path = p.path.rstrip(part) if '/' not in path[:1]: path = '/' + path urlfqdn = p.scheme + '://' + p.netloc parent = urlfqdn + path i = 0 dirtotal = len(dirlist) if dirtotal > 0: print('\nFOUND %d DIRECTORIES: \n' % dirtotal) while i < dirtotal: sel = i + 1 print(str(sel) + ' - ' + str(dirlist[i])) i += 1 print('') lim = dirtotal + 1 matchtop = r'^(%s)(\/)?$' % urlfqdn if not re.match(matchtop,geturl): print('0 - BACK TO PARENT DIRECTORY \n') startsel = '0-%d' % dirtotal else: startsel = '1-%d' % dirtotal selectdir = raw_input('make a selection [%s] --> ' % startsel) if not int(selectdir) in range(0, lim): selectdir = raw_input('invalid entry. please enter a selection %s --> ' % startsel) if selectdir == '0': geturl = parent subcont = 0 else: n = int(selectdir) - 1 usedir = dirlist[n] geturl = parent + usedir subcont = 1 else: print('\nNO DIRECTORIES FOUND. using current directory.. \n') subcont = 0 geturl = parent + part return geturl, subcont, parent def getparent(cfurl): cff = re.match(r'^http:\/\/(.*)(\/\/)(.*)', cfurl) if cff: cf = 'http://' + str(cff.group(1)) + '/' + str(cff.group(3)) else: cf = str(cfurl) p = urlparse(cf) if '/' not in p.path[-1:]: part = p.path.split('/')[-1] path = p.path.rstrip(part) else: path = p.path if '/' not in path[:1]: path = '/' + path urlfqdn = p.scheme + '://' + p.netloc parent = urlfqdn + path + '/' return parent def followlinks(bx): p = urlparse(bx) if '/' not in p.path[-1:]: part = p.path.split('/')[-1] path = p.path.rstrip(part) else: path = p.path if '/' not in path[:1]: path = '/' + path urlfqdn = p.scheme + '://' + p.netloc parent = urlfqdn + path + '/' s = scraper.get(bx, stream=True, verify=False, proxies=proxystring, allow_redirects=True) print('\n----------------------------------------------------------- \n') print(s) print('\n') scr = UnicodeDammit.detwingle(s.text) shtml = BeautifulSoup(scr, "html.parser") if debug == 1: orenc = str(shtml.original_encoding) print('\n\033[40m\033[35;1mORIGINAL ENCODING: %s \033[0m\n' % orenc) print('\n----------------------------------------------------------- \n') sfindlinks = shtml.findAll('a') slen = len(sfindlinks) sdirs = [] si = 0 while si < slen: for slink in sfindlinks: if debug == 1: print('\n\033[34;1mSLINK LOOP\r\n\033[32;21m* si = %d, si < %d\033[0m\n' % (si, slen)) sl = slink.get('href') si += 1 if sl: if not re.search(r'^((\.\.)?\/)$', str(sl)): if '/' in bx[-1:]: if 'http' not in sl[:4]: sl = sl.lstrip('/') sx = bx + sl else: sx = sl print(sx) getCF(sx, 0) ss = scraper.get(sx, stream=True, verify=False, proxies=proxystring, allow_redirects=True) bs = BeautifulSoup(ss.text, "html.parser") if bs is not None: if debug == 1: orenc = str(bs.original_encoding) print('\n\033[40m\033[35;1mORIGINAL ENCODING: %s \033[0m\n' % orenc) pagehead = bs.html.head.contents pagehead = str(pagehead) if pagehead: pagetitle = re.search(r'<title>(.*)<\/title>', pagehead) pagetitle = str(pagetitle.group(1)) bigtitle = pagetitle.upper() titlestars = lambda a: '*' * (len(str(a)) + 4) pagestars = titlestars(pagetitle) print('\n\033[40m\033[33m%s\n\033[34;1m* %s * \n\033[40m\033[33;21m%s\n\033[0m' % (pagestars, bigtitle, pagestars)) sb = bs.find_all('a', href = re.compile(r'.+$')) #sb = bs.findAll('a') sblen = len(sb) if sblen > 0: n = 0 while n < sblen: for sbl in sb: if debug == 1: print('\n\033[35;1mSBL LOOP\r\n\033[37;21m* n = %d, n < %d \033[0m\n' % (n, sblen)) if sbl is not None: sr = sbl.get('href').strip() sr = str(sr) print('\n* %s \n') % sr if not re.search('http', sr[:4]): parent = getparent(sx) srs = sr.lstrip('/') sr = parent + srs if re.match(r'([^.]+\/)$', str(sr)): followlinks(sr) sdirs.append(sr) else: if '/' not in sr[-1:]: getCF(sr, 0) sdirs.append(sr) n += 1 else: n += 1 continue elif 'Error-222' in bx: print('\nuh-oh. might have triggered a flag with cloudflare.\n') for i in xrange(10,0,-1): time.sleep(1) print('delaying request for %d seconds.. \r' % i) sys.stdout.flush() break else: if not re.search('http', str(sl[:4])): parent = getparent(bx) sl = sl.lstrip('/') sx = parent + sl else: sx = str(sl) sx = str(sx) sdirs.append(sx) print(sx) print('\n----------------------------------------------------------- \n') getCF(sx, 0) si += 1 #if re.search(r'^(.*)(\/)$', str(bx)): else: print('\nno links found at %s \n' % str(slink)) si += 1 continue for sd in sdirs: if '/' in sd[-1:]: print('\nfollowing directory: %s \n' % sd) followlinks(sd) getCF(sd, 1) else: print('\nrequesting link: %s \n' % sd) getCF(sd, 0) return sdirs if links == 1: if 'found' not in locals(): found = getlinks(cfurl) keep = 1 depth = 0 while found > 0 and keep is not 0: follow = raw_input('fetch harvested links? enter Y/N --> ') while not re.search(r'^[yYnN]$', follow): follow = raw_input('invalid entry. enter Y to follow harvested links or N to quit --> ') if follow.lower() == 'n': break elif follow.lower() == 'y': r = scraper.get(cfurl, stream=True, verify=False, proxies=proxystring, allow_redirects=True) html = BeautifulSoup(r.text, "html.parser", from_encoding='utf-8') findlinks = html.findAll('a') s = [] checkfordirs = 0 if len(findlinks) > 0: for d in findlinks: dd = d.get('href') if re.search(r'^(.*)(\/)$', str(dd)): if not re.match(r'^((\.\.)?\/)$', str(dd)) and dd not in cfurl: if 'http' not in dd[:4]: dd = parent + dd s.append(str(dd)) checkfordirs = 1 if len(s) > 0 and checkfordirs == 1: if 'followdirs' not in locals(): followdirs = raw_input('follow directories? enter Y/N --> ') while not re.search(r'^[yYnN]$', followdirs): followdirs = raw_input('invalid entry. enter Y to follow directories or N to only retrieve files --> ') if followdirs.lower() == 'y': depth = 1 else: depth = 0 else: if followdirs.lower() == 'y': depth += 1 else: followdirs = 'n' if debug == 1: print("\n\033[35;1mdepth:\033[37;21m %d \033[0m\n" % depth) if findlinks: total = len(findlinks) else: total = 0 if writeout == 1: if not os.path.exists(parentdir): os.makedirs(parentdir) if total > 0: if followdirs.lower() == 'n': for link in findlinks: b = link.get('href') if b: if not re.search(r'^(.*)(\/)$', str(b)): b = parent + b print("\nrequesting harvested URL: %s \r\n(press CTRL + C to skip)\n" % b) try: getCF(b, links) except KeyboardInterrupt: try: print("\r\nskipping %s... press CTRL + C again to quit.\n" % b) continue except KeyboardInterrupt: print("\nrequest cancelled.\n") break except (KeyboardInterrupt, SystemExit): print("\r\nrequest cancelled by user\n") keep = 0 break except Exception, e: print("\r\nan exception has occurred: %s \n" % str(e)) raise else: continue else: break total = total - 1 links = 1 elif followdirs.lower() == 'y' and depth > 0: choosedir = raw_input("choose subdirectory? Y/N --> ") while not re.match(r'^[YyNn]$', choosedir): choosedir = raw_input("invalid entry. enter Y to pick subdirectory or N to download everything --> ") if choosedir.lower() == 'n': links = 0 for link in findlinks: b = link.get('href') if b: bx = parent + b if not re.match(r'^((\.\.)?\/)$', str(b)): getdirs = followlinks(bx) while len(getdirs) > 0: for sd in getdirs: getdirs = followlinks(sd) print("\nrequesting harvested URL: %s \r\n(press CTRL + C to skip)\n" % bx) try: getCF(bx, links) if debug == 1: print("\nfound: %d \n" % found) except KeyboardInterrupt: try: print("\r\nskipping %s... press CTRL + C again to quit.\n" % bx) continue except KeyboardInterrupt: print("\nrequest cancelled.\n") sys.exit() except (KeyboardInterrupt, SystemExit): print("\r\nrequest cancelled by user\n") break except Exception, e: print("\r\nan exception has occurred: %s \n" % str(e)) raise sys.exit(1) links = 1 found = found - 1 else: subcont = 1 geturl = cfurl while subcont is not 0: depth += 1 if subcont < 1: break geturl, subcont, parent = selectdir(geturl) if debug == 1: print("\ndepth: %d \n" % depth) checksubdir = raw_input("enter 1 to select this directory, 2 to choose a subdirectory, or 3 to go back to parent directory --> ") while not re.match(r'^[1-3]$', checksubdir): checksubdir = raw_input("invalid input. enter a value 1-3 --> ") if checksubdir is not 2: if checksubdir == '3': p = urlparse(geturl) droppath = p.path.split('/')[-1] geturl = geturl.rstrip(droppath) break print('\nrequesting harvested URL: %s \r\n(press CTRL + C to skip) \n' % geturl) try: getCF(geturl, links) found = found - 1 except KeyboardInterrupt: try: print("\r\nskipping %s... press CTRL + C again to quit.\n" % geturl) continue except KeyboardInterrupt: print("\nrequest cancelled.\n") break except (KeyboardInterrupt, SystemExit): print("\r\nrequest cancelled by user\n") keep = 0 break except Exception, e: print("\r\nan exception has occurred: %s \n" % str(e)) raise sys.exit(1) finally: depth -= 1 if debug == 1: print("\ndepth: %d \n" % depth)
#!/bin/python3 import subprocess import cfscrape import json # With get_tokens() cookie dict: # tokens, user_agent = cfscrape.get_tokens("http://somesite.com") # cookie_arg = "cf_clearance=%s; __cfduid=%s" % (tokens["cf_clearance"], tokens["__cfduid"]) # With get_cookie_string() cookie header; recommended for curl and similar external applications: cookie_arg, user_agent = cfscrape.get_cookie_string("https://bittrex.com") # With a custom user-agent string you can optionally provide: # ua = "Scraping Bot" # cookie_arg, user_agent = cfscrape.get_cookie_string("http://somesite.com", user_agent=ua) print("Cookie: " + cookie_arg) print("User Agent: " + user_agent) c = {"cookies": cookie_arg, "userAgent": user_agent} f = open('cf.json', 'w') json.dump(c, f) f.close() #result = subprocess.check_output(["curl", "--cookie", cookie_arg, "-A", user_agent, "http://somesite.com"])
def get_cf_cookies(cls): try: cls.cf_cookies = cfscrape.get_cookie_string(cls.base_url, user_agent=cls.ua)[0] except Exception as err: logging.error(f"Couldn't get cloudflare cookies / {err}")
from imgToPdf import * from time import sleep LOGGING_FILE_NAME = "error.log" LOGGING_MESSAGE_FORMAT = "%(asctime)s %(levelname)-8s %(message)s" LOGGING_DATE_FORMAT = "%m/%d/%Y %I:%M %p" logging.basicConfig(filename=LOGGING_FILE_NAME, format=LOGGING_MESSAGE_FORMAT, datefmt=LOGGING_DATE_FORMAT, level=logging.DEBUG) mainDir = "/Users/gyanesh/Documents/Anime Manga/Unread" url = "https://www.funmanga.com" cookie_arg, user_agent = cfscrape.get_cookie_string(url) def argumentParser(): parser = argparse.ArgumentParser(description='Fetch Url') parser.add_argument('-u', action="store", default=False, dest='url', help="url") parser.add_argument('-p', action="store", default=False, dest='pdf', help="convert to pdf") parser.add_argument('-c',
from __future__ import print_function import sys, json import cfscrape # Load the data that PHP sent us try: data = json.loads(sys.argv[1]) cookies, user_agent = cfscrape.get_cookie_string(data['url'], user_agent=data['user_agent']) print(json.dumps({'cookies': cookies, 'agent': user_agent})) except: print("No JSON sent?") sys.exit(1)
#!/usr/bin/env python # CodeName BY Ari # @WongNdesoCok import cfscrape import sys import requests request = "GET / HTTP/1.1\r\n" sess = cfscrape.create_scraper() print """ ____ _ _ ____ / ___|___ ___ | | _(_) ___ / ___| ___ _ __ __ _ _ __ ___ _ __ | | / _ \ / _ \| |/ / |/ _ \ \___ \ / __| '__/ _` | '_ \ / _ \ '__| | |__| (_) | (_) | <| | __/ ___) | (__| | | (_| | |_) | __/ | \____\___/ \___/|_|\_\_|\___| |____/ \___|_| \__,_| .__/ \___|_| Code: By WongNdesoCok |_| """ jancok = raw_input("Masukan Target{ex:http/https://Target.com}~# ") cookie_value, user_agent = cfscrape.get_cookie_string(jancok) request += "Cookie: %s\r\nUser_Agent: %s\r\n" % (cookie_value, user_agent) print print sess.get(jancok).content print print request
import subprocess import cfscrape import requests requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL' cookie_arg, user_agent = cfscrape.get_cookie_string("https://aduanku.my") print("Cookie: ", cookie_arg, " Agent:", user_agent) result = subprocess.check_output(["curl", "--cookie", cookie_arg, "-A", user_agent, "https://aduanku.my/open311/v2/requests.json", "-o", "./DATA/all-aduanku-requests.json"])
# -*- coding: UTF-8 -*- import cfscrape proxies = { 'http': 'http://127.0.0.1:1087', 'https': 'http://127.0.0.1:1087' } tokens, user_agent = cfscrape.get_cookie_string('http://www.javlibrary.com/', proxies=proxies) # noqa with open('./session.txt', 'w') as file: file.write('{}+{}\r\n'.format(tokens, user_agent)) print '\033[94m' + 'Generate session file is successful! 💊' + '\033[0m'
def __init__(self): print "Connecting...\n" self.cookie_arg, self.user_agent = cfscrape.get_cookie_string( "https://torrentproject.se")
async def _listen(self): """ Uses signalr protocol: https://github.com/TargetProcess/signalr-client-py https://github.com/slazarov/python-bittrex-websocket/blob/master/bittrex_websocket/websocket_client.py """ if self.connected: return conn_data = json.dumps([{'name': self.SOCKET_HUB}]) url = self.SOCKET_URL + 'negotiate' + '?' + urlencode( { 'clientProtocol': '1.5', 'connectionData': conn_data, '_': round(time.time() * 1000) }) cookie_str, user_agent = cfscrape.get_cookie_string(url) async with aiohttp.ClientSession(headers={ 'User-Agent': user_agent, 'Cookie': cookie_str }) as session: async with session.get(url) as r: socket_conf = await r.json() socket_url = self.SOCKET_URL.replace( 'https', 'wss') + 'connect' + '?' + urlencode( { 'transport': 'webSockets', 'clientProtocol': socket_conf['ProtocolVersion'], 'connectionToken': socket_conf['ConnectionToken'], 'connectionData': conn_data, 'tid': 3 }) async with session.ws_connect(socket_url) as ws: self._ws = ws self.connected = True for n, ticker in enumerate(self.tickers, start=1): message = { 'H': self.SOCKET_HUB, 'M': 'SubscribeToExchangeDeltas', 'A': [ticker], 'I': n } await ws.send_str(json.dumps(message)) async for msg in ws: if msg.type == aiohttp.WSMsgType.TEXT: self._last_message = time.time() try: data = json.loads(msg.data) if 'M' in data: for block in data['M']: if block['M'] == 'updateExchangeState': for change in block['A']: trades = change['Fills'] if trades: await self.on_trades( market=change[ 'MarketName'], trades=trades) except Exception as e: logger.error( "Error while handling message: {}".format(e)) elif msg.tp == aiohttp.WSMsgType.closed: break elif msg.tp == aiohttp.WSMsgType.error: break else: logger.warning("Message: {}".format(msg.tp)) self._ws = None