def __init__(self, protocol='https', anonymity=True, autorefresh=True, max_refresh_tries=5, check_url='https://www.google.com', check_timeout=2, limit=-1, verbose=0): self.protocol = protocol self.anonymity = anonymity self._collector = create_collector('default', protocol) self._collector.apply_filter(dict(anonymous=anonymity)) self.autorefresh = autorefresh self._max_refresh_tries = max_refresh_tries self._auto_refresh_counter = 0 self.check_timeout = check_timeout self._check_url = check_url self.limit = limit self.verbose = verbose self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/87.0.4280.66 Safari/537.36' } self.proxies = [] self._proxies = [] self.refresh_proxies()
def autoscrape(): collector = create_collector('my-collector', 'https') proxies = collector.get_proxies({'anonymous': True}) print(len(proxies)) selection = random.choice(proxies) proxy = str(selection[0] + ":" + selection[1]) return proxy
def __init__(self, *args, **kwargs): headers = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain", "User-agent": "Mozilla/5.0 (Linux NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0", } cookies = {'enwiki_session': '17ab96bd8ffbe8ca58a78657a918558'} self.proxy_app = [] self.proxy_dict = {} protocol = 'https' self.collect_proxy = proxyscrape.create_collector('proxy', [protocol]) self.getproxy = self.collect_proxy.get_proxy({ 'code': ('us', 'uk'), 'anonymous': True }) for i in self.getproxy: self.proxy_app.append(i) self.proxy_dict[ self.proxy_app[len(self.proxy_app) - 2]] = self.proxy_app[0] + ':' + self.proxy_app[1] print(self.proxy_dict)
def get_proxy_list(proxy_number): proxy_list = [] collector = proxyscrape.create_collector("default", "http") for i in range(proxy_number): proxygrabber = collector.get_proxy({"anonymous": True}) proxy = proxygrabber.host + ":" + proxygrabber.port proxy_list.append(proxy) return proxy_list
def make_collector(): random_string = get_random_string(10) http_collector = create_collector(random_string, 'https') print("https") #resource_name = get_proxyscrape_resource(proxytype='http', timeout=5000, ssl='yes', anonymity='all', country='us') #add_resource_type("resource", resource_name) print("end https") return http_collector
def generate_proxy(protocol): """Retrieves a proxy according to protocol and returns its address""" if protocol not in ['https', 'http']: return None collector = create_collector(protocol + '-collector', protocol) collected = collector.get_proxy() proxy = protocol + '://' + collected.host + ':' + collected.port return proxy
def downloadproxies(): collector = proxyscrape.create_collector('my-collector', ['http', 'socks4', 'socks5']) proxies = collector.get_proxies() file = open("proxies.txt", "w") file.write(str(proxies)) file.close() print( " \33[38;5;214mSuccessfully saved proxies list!\n\033[0m" )
def autoscrape(): collector = create_collector('my-collector', 'https') full_proxies = collector.get_proxies({'anonymous': True}) print(len(full_proxies)) with open("config/proxies.txt", "w") as f: for proxy in full_proxies: f.write(str(proxy[0]) + ":" + str(proxy[1]) + "\n") uniqlines = set(open('config/proxies.txt').readlines()) with open('config/proxies.txt', 'w') as finish: finish.writelines(set(uniqlines)) return
def get_ips(): global proxy_count, proxies collector = proxyscrape.create_collector('my-collector', 'http') proxies = collector.get_proxies({'country': 'united states'}) proxy_count = len(proxies) print(f"Getting {proxy_count} proxies") ''' for i in proxies: print(f"{i.host}:{i.port}") time.sleep(2) ''' return proxies
def _get_free_proxies_collector(): """Retrieve or create a Collector of free proxies. :return: Collector object """ try: collector = get_collector('scraping-proxies') except CollectorNotFoundError: collector = create_collector('scraping-proxies', ['socks4', 'socks5']) return collector
def __init__(self, filters, refresh_interval, logstats_interval, stop_if_no_proxies, max_proxies_to_try, force_refresh_if_no_proxies, try_with_host): self.collector = create_collector('proxy-pool', ['http', 'https'], refresh_interval) self.collector.apply_filter(filters) self.refresh_interval = refresh_interval self.logstats_interval = logstats_interval self.stop_if_no_proxies = stop_if_no_proxies self.max_proxies_to_try = max_proxies_to_try self.force_refresh_if_no_proxies = force_refresh_if_no_proxies self.try_with_host = try_with_host
def scrapeproxies(proxytype): collector = create_collector('generatedfromcollector', [proxytype]) proxycount = int(input("\nHOW MANY PROXIES WOULD YOU LIKE TO GENERATE?\n> ")) print("\nScraping Proxies...\n") time.sleep(1) proxyprinted = 0 proxyloop = True while proxyloop == True: proxy = collector.get_proxy() print("\u001b[32mSuccesfully\u001b[0m Scraped : " + str(proxy.host) + ":" + str(proxy.port)) proxyprinted = proxyprinted + 1 if proxyprinted == proxycount: proxyloop = False print("\nScraping Finished!") eee = input("Press ENTER To Go Back.")
def __init__(self): self.result_dir = os.path.join(os.getcwd(), "Result") if not os.path.exists(self.result_dir): os.makedirs(self.result_dir) self.collector = create_collector('my-collector', 'http') self.DB_Name = "proxies.db" self.tbl_name = "http" self.db_proxy = DB(dst_path=self.result_dir, dbname=self.DB_Name) self.fields = [ "anonymous", "code", "country", "host", "port", "type", "date", "status" ] self.db_proxy.createTable(tbl_name=self.tbl_name, fields=self.fields)
def get_valid_proxies(iso_codes: tuple = ('ru'), anonymous: bool = True, type_proxy: str = 'https', all: bool = False, number_of_checks: int = 100, check_timeout: int = 10) -> list: collector = create_collector( name='default_collector', resource_types=type_proxy, refresh_interval=3600, ) filter_dict = {'anonymous': anonymous, 'type': type_proxy} if iso_codes != (): filter_dict['code'] = iso_codes collector.apply_filter(filter_dict) proxies, valid_proxies, i = [], [], 0 while True: p = collector.get_proxy() print(p) if p and i < number_of_checks: p_str = f'{p.host}:{p.port}' if p_str not in proxies: print('-', p_str) proxies.append(p_str) try: if type_proxy == 'http': req = r.get('http://api.ipify.org?fromat=json', proxies={'http': p_str}, timeout=check_timeout) else: req = r.get('https://api.ipify.org?fromat=json', proxies={'https': p_str}, timeout=check_timeout) if req.text == p.host: print('--', p_str) if not all: return [p_str] valid_proxies.append(p_str) collector.remove_proxy(p) except: collector.remove_proxy(p) continue else: collector.remove_proxy(p) else: break i += 1 return valid_proxies
def create_proxies(self): if len(self.proxies) == 0: resource_name = get_proxyscrape_resource(proxytype='all', timeout=10000, ssl='yes', anonymity='elite', country='DE') collector = create_collector('my-collector', resources=resource_name) generatedProxies = 0 while generatedProxies <= 3: proxy = collector.get_proxy() if proxy: proxyPath = proxy.host + ":" + proxy.port if proxyPath not in self.proxies: print("Proxy added :", proxyPath) generatedProxies += 1 self.proxies.append(proxyPath)
def proxyscrape_lib(self) -> Set[str]: """Parsing proxies from proxyscrape py library""" free_proxies = scrapers.get_free_proxy_list_proxies() ssl_proxies = scrapers.get_ssl_proxies() try: collector = create_collector("default", "http") except CollectorAlreadyDefinedError: collector = get_collector("default") collector_proxies = set(collector.get_proxies()) proxies = free_proxies | ssl_proxies | collector_proxies for proxy in proxies: prepare_proxy = f"{proxy.host}:{proxy.port}" if prepare_proxy not in self.proxy_set: self.proxy_set.add(prepare_proxy) logger.info( f"From proxyscrape_lib were parsed {len(self.proxy_set)} proxies") return self.proxy_set
def download_proxies(self, location=False): collector = proxyscrape.create_collector('collector1', ['https', 'http']) proxies_raw = "" if location: collector.apply_filter({"country": location}) print("[DOWNLOAD] Downloading proxies...", end="") for i in range(len(self.combos)): a = collector.get_proxy() proxies_raw += str(a[0]) + ":" + str(a[1]) + "\n" print("done") if self.verbose: print(f"[WRITING] Writing proxies to {getcwd()}{self.slash}proxies.txt...", end="") with open("proxies.txt", "w") as pfile: pfile.write(proxies_raw) pfile.close() self.proxy_file = "proxies.txt" if self.verbose: print("done")
def __init__(self, filters, refresh_interval, logstats_interval, stop_if_no_proxies, max_proxies_to_try, force_refresh_if_no_proxies, try_with_host, elite, external_url): self.collector = create_collector('proxy-pool', ['http', 'https'], refresh_interval, None, elite, external_url) self.collector.apply_filter(filters) if self.collector.get_proxies(): logger.info("Proxies: " + str(len(self.collector.get_proxies()))) else: logger.info("Proxies: 0") self.refresh_interval = refresh_interval self.logstats_interval = logstats_interval self.stop_if_no_proxies = stop_if_no_proxies self.max_proxies_to_try = max_proxies_to_try self.force_refresh_if_no_proxies = force_refresh_if_no_proxies self.try_with_host = try_with_host
def update_proxy(): db.taskid = '{date:%Y%m%d-%H%M%S}'.format(date=datetime.now()) try: # Create a collector for https resources collector = proxyscrape.create_collector( 'MyProxy' + str(random.randint(100, 10000)), 'https') # Get proxy proxies = [] for i in range(50): print('getting proxy:', i) proxy = collector.get_proxy({ 'code': ('us', 'uk'), 'anonymous': True }) proxies.append(proxy.host + ':' + proxy.port) # Add proxis db.add_proxy(proxies) # Write to Log db.add_sys_log('get proxy', 'success', '') except Exception as e: print(str(e)) db.add_sys_log('get proxy', 'fail', str(e))
import proxyscrape collector = proxyscrape.create_collector('default', 'http') collector.refresh_proxies() proxies = collector.get_proxies() print(proxies)
import aiohttp import json import random from proxyscrape import create_collector import asyncio from time import sleep from aiohttp_proxy import ProxyConnector from proxy import proxies from proxy2 import proxies as proxies2 from proxy3 import proxies as proxies3 from proxy4 import proxies as proxies4 from proxy5 import proxies as proxies5 import random import sys collector = create_collector('my-collector', 'http') def getNewProxy(*args): new_proxy = getProxy() if(len(args) != 0): while (new_proxy == args[0]): new_proxy = getProxy() return new_proxy def getProxy(): toss = random.randint(0, 1) if(toss==0): proxyObj = collector.get_proxy() proxy = f'{proxyObj.type}://{proxyObj.host}:{proxyObj.port}'
import re import requests from bs4 import BeautifulSoup from requests.compat import urljoin import time import random from proxyscrape import create_collector # the header could be replaced using fake_useragent module HEADERS = { "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7" } # initializing proxy list collector collector = create_collector("my-collector", "http") raw_data = collector.get_proxies() home_page = "https://www.kwestiasmaku.com" # storing a queue of internal links for processing internal_links = set() def parse_html(url): # listing proxy ip, port to be fed into requests proxies = [] for i in raw_data: proxies.append(str(i[0]) + ":" + str(i[1])) # selecting random proxy ip, port def random_proxy():
err_msg, sms_success, ) from telegram import ForceReply from telegram.ext import ConversationHandler, CommandHandler, MessageHandler, Filters from github import Github import threading import string import requests import os import proxyscrape import random from misc.invalid_msg import wrong_option sessions = {} sockets = proxyscrape.create_collector("default", "http") def ask_num(update, context): try: adminlist = (Github(os.getenv("API")).get_repo(repo_path).get_contents( file_name).decoded_content.decode().strip().split("\n")) if str(update.message.from_user.id) in adminlist or ( update.message.from_user.username and update.message.from_user.username.lower() in [i.lower() for i in adminlist]): update.message.reply_text(ask_no, reply_markup=ForceReply()) return 0 else: update.message.reply_text(not_admin) return ConversationHandler.END
def get_proxies(desired_amount: int = 1, proxy_timeout=0.5): proxies = [] # https://stackoverflow.com/a/59531141 try: collector_1 = proxyscrape.get_collector('collector-http') except proxyscrape.errors.CollectorNotFoundError: collector_1 = proxyscrape.create_collector('collector-http', 'http') full_list = list(collector_1.get_proxies()) for item in full_list: proxies.append(item.host + ':' + item.port) print(bs.warning_o(bsl["PROXY"]["FOUND"]), bs.red_o(str(len(proxies))), bs.warning_o(bsl["PROXY"]["HTTP_PROXIES"])) print(bs.warning_o(bsl["PROXY"]["PERFORMANCE_CHECK"])) print(bs.warning_o(bsl["GENERAL"]["CTRL_Z_EXIT"])) time.sleep(1) bs.clear() start_time = time.time() cnt = 0 print( bs.warning_o(bsl["PROXY"]["CHECKED"]) + bs.red_o(' 0 ') + bs.warning_o(bsl["PROXY"]["OUT_OF"]), bs.red_o(str(len(proxies))), bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]), bs.red_o(str(proxy_timeout)), bs.warning_o(bsl["PROXY"]["SECONDS_3"])) print(bs.warning_o(bsl["PROXY"]["CHOSEN"]), bs.red_o(str(cnt)), bs.warning_o(bsl["PROXY"]["OUT_OF"]), bs.red_o(str(desired_amount)), bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]), bs.red_o(str(proxy_timeout)), bs.warning_o(bsl["PROXY"]["SECONDS_3"])) print(bs.warning_o(bsl["GENERAL"]["CTRL_Z_EXIT"])) checked_proxy = [] for ind, item in enumerate(proxies, start=1): if cnt < desired_amount: if bs.is_bad_proxy(item, proxy_timeout): print('[BAD PROXY]') else: checked_proxy.append(item) cnt += 1 else: break bs.clear() print(bs.warning_o(bsl["PROXY"]["CHECKED"]), bs.red_o(str(ind)), bs.warning_o(bsl["PROXY"]["OUT_OF"]), bs.red_o(str(len(proxies))), bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]), bs.red_o(str(proxy_timeout)), bs.warning_o(bsl["PROXY"]["SECONDS_3"])) print(bs.warning_o(bsl["PROXY"]["CHOSEN"]), bs.okgreen_o(str(cnt)), bs.warning_o(bsl["PROXY"]["OUT_OF"]), bs.red_o(str(desired_amount)), bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]), bs.red_o(str(proxy_timeout)), bs.warning_o(bsl["PROXY"]["SECONDS_3"])) print(bs.warning_o(bsl["PROXY"]["EXIT_WARN"])) print(bs.warning_o(bsl["GENERAL"]["CTRL_Z_EXIT"])) end_time = time.time() extra_message = (bsl["PROXY"]["APPENDED"], str(cnt), bsl["PROXY"]["PROXIES_WITH_TIMEOUT"], str(proxy_timeout), bsl["PROXY"]["SECONDS_1"], bsl["PROXY"]["TO_THE_PROXY_LIST"], bsl["PROXY"]["IN"], str(round(end_time - start_time, 2)), bsl["PROXY"]["SECONDS_1"] + ']') extra_message = bs.success_o(' '.join(x for x in extra_message)) with open('proxy_list.txt', 'a') as infl: for item in checked_proxy: infl.write(''.join(item) + '\n') bs.print_full_main_screen(extra_message)
import pymysql.cursors from datetime import datetime from multiprocessing import Pool, current_process import numpy as np import random import time from fake_useragent import UserAgent import params import proxyscrape HOST = 'https://www.rusprofile.ru/' URL1 = 'https://www.rusprofile.ru/codes/89220' URL2 = 'https://www.rusprofile.ru/codes/429110' collector = proxyscrape.create_collector('my-collector', 'https') # получить содержимое страницы def get_html(url, my_proxy=None): process_name = current_process().name connection_is_ok = False if my_proxy is None: proxy = collector.get_proxy() else: proxy = my_proxy try: proxy_str = proxy.host + ":" + proxy.port except AttributeError:
browser_list=[] browser_one = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver") browser_two = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver") browser_three = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver") browser_four = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver") browser_list.append(browser_one) browser_list.append(browser_two) browser_list.append(browser_three) browser_list.append(browser_four) for browser in browser_list: browser.get(video_url) collector = create_collector('proxy-collector', 'https') a=0 while a < vviews : while(True): proxy = collector.get_proxy() full_proxy=proxy.host+":"+proxy.port print(full_proxy) PROXY = full_proxy webdriver.DesiredCapabilities.CHROME['proxy']={ "httpProxy":PROXY, "ftpProxy":PROXY,
from datetime import datetime import requests from urllib3.exceptions import ReadTimeoutError from requests.exceptions import ( ConnectTimeout, ProxyError, ReadTimeout, ConnectionError ) from bs4 import BeautifulSoup from proxyscrape import create_collector from app.core.database import DB from app.core.machine_learning import get_prediction COLLECTOR = create_collector('collector', 'https') def manage_proxies(func): proxies = [] def wrapper(*args, **kwargs): nonlocal proxies response, new_proxies = func(*args, proxies, **kwargs) proxies = new_proxies return response return wrapper @manage_proxies def get_response_use_proxy(url, proxies):
import requests from bs4 import BeautifulSoup import proxyscrape import time import datetime import json collector = proxyscrape.create_collector("supreme", "http") collector.apply_filter({'type': 'http'}) def read_file(): with open('config.json') as config: data = json.load(config) discord_webhook = data["discordWebhook"] slack_webhook = data["slackWebhook"] region = data["region"] delay = data["delay"] use_proxies = data["useProxies"] proxies = data["proxies"] return discord_webhook, slack_webhook, region, delay, use_proxies, proxies def parse_html(html): timestamp = datetime.datetime.now().isoformat() tohru = None soup = BeautifulSoup(html, "html.parser") if "us" in soup.body["class"]: region = "US" elif "eu" in soup.body["class"]: region = "EU"
def _get_proxy_collector(self, conf): name, proxy_types = conf["proxies"]["name"], conf["proxies"]["type"] collector = create_collector(name, proxy_types) collector.refresh_proxies(force=True) return collector
import proxyscrape from http_request_randomizer.requests.proxy.requestProxy import RequestProxy import time from bs4 import BeautifulSoup from selenium import webdriver from webdriver_manager.firefox import GeckoDriverManager from selenium.webdriver.firefox.options import Options req_proxy = RequestProxy( ) # you may get different number of proxy when you run this at each time proxies = req_proxy.get_proxy_list() # this will create proxy list USA = [proxy for proxy in proxies if proxy.country == 'United States'] # ============================== proxyscrape START ============================== # open("proxies.txt", "w").close() collector = proxyscrape.create_collector( 'default', 'http') # Create a collector for http resources proxy = collector.get_proxies({'country': 'united states'}) for x in range(len(proxy)): portBefore = (str(proxy[x]).split("port='", 1)[-1]) portAfter = (str(portBefore).split("', code=")[0]) ip = re.findall(r'[0-9]+(?:\.[0-9]+){3}', str(proxy[x])) ip = str(ip) rS1B = str((ip.split("['", 1)[1])) rS2B = str((rS1B.split("']")[0])) ipAndPort = (rS2B + ":" + portAfter) with open("proxies.txt", "a") as myfile: myfile.write(ipAndPort + "\n") myfile.close() # ============================== proxyscrape END ============================== #