def get_proxy_ips(self, limit=1): # https://proxybroker.readthedocs.io/en/latest/examples.html# # NEED TO COMMENT OUT THE REMOVAL OF IP_CHECKERS IN LINE 90 OF # resolver.py within the ProxyBroker Code! Otherwise, it will # fail after doing the refresh. host, port = '127.0.0.1', 8888 # by default types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80'] codes = [200, 301, 302] proxies = asyncio.Queue() broker = Broker(proxies, max_tries=2) new_list = [] try: logging.info("Gathering proxies using ProxyBroker API") tasks = asyncio.gather( broker.find(types=['HTTP', 'HTTPS'], limit=limit), self.show(proxies, new_list)) logging.debug("Proxy Gathering Finished.") loop = asyncio.get_event_loop() logging.debug("Got Event Loop.") loop.run_until_complete(tasks) logging.info("Ran until complete.") broker.stop() logging.info("Broker Stopped Successfully.") except Exception as e: logging.error( "Error encountered when collecting Proxies in get_proxy_ips()." ) logging.error(e, exc_info=True) return new_list
def get_proxies(timeout=20, broker_timeout=7, max_conn=150, max_tries=3, limit=40): exceptions = 0 print('Loading proxy list') try: proxy_list.clear() setup_proxy(reset=True) proxies = asyncio.Queue() broker = Broker(proxies, timeout=broker_timeout, max_conn=max_conn, max_tries=max_tries) tasks = asyncio.gather(broker.find(types=['SOCKS5'], limit=limit), save_proxy(proxies)) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait_for(tasks, timeout)) print('Loaded proxies:', colored(len(proxy_list), 'cyan')) except Exception as e: print(colored('Error while loading proxies.', 'red'), e) time.sleep(5) pass finally: broker.stop() tasks.cancel()
def get_proxy_ips(): host, port = '127.0.0.1', 8888 # by default types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80'] codes = [200, 301, 302] broker = Broker(max_tries=1) proxies = asyncio.Queue() broker = Broker(proxies) new_list = [] tasks = asyncio.gather(broker.find(types=['HTTP', 'HTTPS'], limit=10), show(proxies, new_list)) # Broker.serve() also supports all arguments that are accepted # # Broker.find() method: data, countries, post, strict, dnsbl. # broker.serve(host=host, port=port, types=types, limit=100, max_tries=3, # prefer_connect=True, min_req_proxy=5, max_error_rate=0.5, # max_resp_time=8, http_allowed_codes=codes, backlog=100) # # # print(broker.find()) # # # urls = ['http://httpbin.org/get', 'https://httpbin.org/get', # # 'http://httpbin.org/redirect/1', 'http://httpbin.org/status/404'] # # # # proxy_url = 'http://%s:%d' % (host, port) # # loop.run_until_complete(get_pages(urls, proxy_url)) # loop = asyncio.get_event_loop() loop.run_until_complete(tasks) broker.stop() return new_list
def main(): host, port = '127.0.0.1', 30003 # by default loop = asyncio.get_event_loop() types = ['HTTP', 'HTTPS'] codes = [200] broker = Broker(max_tries=1, loop=loop) # Broker.serve() also supports all arguments that are accepted # Broker.find() method: data, countries, post, strict, dnsbl. broker.serve(host=host, port=port, types=types, limit=1000, max_tries=2, prefer_connect=True, min_req_proxy=4, max_error_rate=0.25, max_resp_time=4, http_allowed_codes=codes, backlog=100) # urls = ['https://translate.google.com/', 'https://google.com'] # # proxy_url = 'http://%s:%d' % (host, port) # # # source, from_lang='auto', to_lang='en' # objs = [('i love my father', 'en', 'es'), ('so does my mother', 'en', 'es')] # # # res = translate(proxy_url=proxy_url, source=objs[0][0], from_lang='en', to_lang='es') # # # res = normal_google(proxy_url) # # print(res) # # sth = loop.run_until_complete(get_translates(proxy_url, objs)) # # sth = loop.run_until_complete(get_pages(urls, proxy_url)) # for fut in sth: # print("return value is {}".format(fut.result())) broker.stop()
def main(): host, port = '127.0.0.1', 8888 # by default loop = asyncio.get_event_loop() types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80'] codes = [200, 301, 302] broker = Broker(max_tries=1, loop=loop) # Broker.serve() also supports all arguments that are accepted # Broker.find() method: data, countries, post, strict, dnsbl. broker.serve(host=host, port=port, types=types, limit=10, max_tries=3, prefer_connect=True, min_req_proxy=5, max_error_rate=0.5, max_resp_time=8, http_allowed_codes=codes, backlog=100) urls = [ 'http://httpbin.org/get', 'https://httpbin.org/get', 'http://httpbin.org/redirect/1', 'http://httpbin.org/status/404' ] proxy_url = 'http://%s:%d' % (host, port) loop.run_until_complete(get_pages(urls, proxy_url)) broker.stop()
def check_proxies(cls, proxy_list): """ Static method for checking given proxy list using ProxyBroker""" async def fetch_proxy(proxies): new_proxy_list = [] while True: proxy = await proxies.get() if proxy is None: break proto = "https" if "HTTPS" in proxy.types else "http" row = f"{proto}://{proxy.host}:{proxy.port}" if row not in new_proxy_list: new_proxy_list.append(row) return new_proxy_list is_main_thread = threading.current_thread() is threading.main_thread() logger.info(f'[Thread: {"Main" if is_main_thread else "Not main"}] ' f"Proxies checking is requested.") with cls.check_lock: logger.info( f'[Thread: {"Main" if is_main_thread else "Not main"}] ' f"Proxies checking is started.") s = get_project_settings() proxy_q = asyncio.Queue() if threading.current_thread() is threading.main_thread(): broker = Broker(proxy_q) else: broker = Broker(proxy_q, stop_broker_on_sigint=False) try: tasks = asyncio.gather( broker.find( data=proxy_list, types=s.get("PROXY_TYPES"), countries=s.get("PROXY_COUNTRIES"), strict=True, dnsbl=s.get("PROXY_DNSBL"), ), fetch_proxy(proxy_q), ) loop = asyncio.get_event_loop() _, proxy_list = loop.run_until_complete(tasks) except RuntimeError as e: logger.error(f"Error happened on proxy checking. Cancelled") broker.stop() else: logger.info( f'[Thread: {"Main" if is_main_thread else "Not main"}]: ' f"Proxy checking is ended." f" Number of collected proxies: {len(proxy_list)}") return proxy_list
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--addr", help="specify host", default='127.0.0.1') parser.add_argument("-p", "--port", help="specify port", default='8808') args = parser.parse_args() loop = asyncio.get_event_loop() types = ['HTTPS', 'HTTP'] broker = Broker(max_tries=1, loop=loop) broker.serve(host=args.addr, port=args.port, types=types, limit=10, max_tries=3) loop.run_forever() broker.stop()
def serve(): loop = asyncio.get_event_loop() input_broker = { k: v for k, v in config["broker"].items() if v != "Default" } broker = Broker(**input_broker, loop=loop) input_serve = {k: v for k, v in config["serve"].items() if v != "Default"} types = [("HTTP", "High"), "HTTPS", "CONNECT:80"] broker.serve(**input_serve, types=types) try: loop.run_forever() except KeyboardInterrupt: broker.stop() finally: loop.stop() loop.close()
def main(): host, port = '127.0.0.1', 8888 # by default loop = asyncio.get_event_loop() types = [('HTTP', 'High'), 'HTTPS', 'CONNECT:80'] codes = [200, 301, 302] broker = Broker(max_tries=1, loop=loop) # Broker.serve() also supports all arguments that are accepted # Broker.find() method: data, countries, post, strict, dnsbl. broker.serve(host=host, port=port, types=types, limit=10, max_tries=3, prefer_connect=True, min_req_proxy=5, max_error_rate=0.5, max_resp_time=8, http_allowed_codes=codes, backlog=100) urls = ['http://httpbin.org/get', 'https://httpbin.org/get', 'http://httpbin.org/redirect/1', 'http://httpbin.org/status/404'] proxy_url = 'http://%s:%d' % (host, port) loop.run_until_complete(get_pages(urls, proxy_url)) broker.stop()
class VeryScrape: """ Many API, much data, VeryScrape! :param q: Queue to output data gathered from scraping :param loop: Event loop to run the scraping """ def __init__(self, q, loop=None): # declaring items in __init__ allows the items to # be cancelled from the close method of this class self.items = None self.loop = loop or asyncio.get_event_loop() self.queue = q self.using_proxies = False proxy_queue = asyncio.Queue(loop=self.loop) self.proxies = ProxyPool(proxy_queue) self.proxy_broker = Broker(queue=proxy_queue, loop=self.loop) self.kill_event = asyncio.Event(loop=self.loop) self.loop.add_signal_handler(signal.SIGINT, self.close) async def scrape(self, config, *, n_cores=1, max_items=0, max_age=None): """ Scrape, process and organize data on the web based on a scrape config :param config: dict: scrape configuration This is a map of scrape sources to data gathering information. The basic scheme is as follows (see examples for real example): { "source1": { "first_authentication|split|by|pipe": { "topic1": ["query1", "query2"], "topic2": ["query3", "query4"] }, "second_authentication|split|by|pipe": { "topic3": ["query5", "query6"] } }, "source2": ... } :param n_cores: number of cores to use for processing data Set to 0 to use all available cores. Set to -1 to disable processing. :param max_items: :param max_age: """ if isinstance(config, str): with open(config) as f: config = json.load(f) assert isinstance(config, dict), \ 'Configuration must be a dict or a path to a json config file' try: scrapers, streams, topics = \ self.create_all_scrapers_and_streams(config) except Exception as e: raise ValueError().with_traceback(e.__traceback__) self.items = ItemMerger(*[stream() for stream in streams]) if n_cores > -1: self.items = ItemProcessor( self.items, # one core is needed to run event loop n_cores=n_cores or cpu_count() - 1, loop=self.loop) # Update topics of ItemProcessor for classifying self.items.update_topics(**topics) if max_items > 0 or max_age is not None: self.items = ItemSorter(self.items, max_items=max_items, max_age=max_age) # Start finding proxies if any scrapers use proxies if self.using_proxies: asyncio.ensure_future(self._update_proxies()) async for item in self.items: await self.queue.put(item) await asyncio.gather(*[s.close() for s in scrapers]) def close(self): self.kill_event.set() if self.items is not None: self.items.cancel() self.proxy_broker.stop() def create_all_scrapers_and_streams(self, config): """ Creates all scrapers and stream functions associated with a config A scraper is a class inheriting from scrape.Scraper A stream function is a function returning an items.ItemGenerator :param config: scrape configuration :return: list of scrapers, list of stream functions """ scrapers = [] streams = [] topics_by_source = defaultdict(dict) for source, auth_topics in config.copy().items(): for auth, metadata in auth_topics.items(): args, kwargs = self._create_args_kwargs(auth, metadata) scraper, _streams = self._create_single_scraper_and_streams( metadata, _scrapers[source], args, kwargs) topics_by_source[source].update(metadata) scrapers.append(scraper) streams.extend(_streams) return scrapers, streams, topics_by_source def _create_args_kwargs(self, auth, metadata): args = [] if auth: args.extend(auth.split('|')) kwargs = {'proxy_pool': None} kwargs.update(metadata.pop('kwargs', {})) use_proxies = metadata.pop('use_proxies', False) if use_proxies: self.using_proxies = True kwargs.update(proxy_pool=self.proxies) return args, kwargs @staticmethod def _create_single_scraper_and_streams(topics, klass, args, kwargs): streams = [] scraper = klass(*args, **kwargs) for topic, queries in topics.items(): if klass in _classifying_scrapers.values(): topic = '__classify__' for q in queries: streams.append(partial(scraper.stream, q, topic=topic)) return scraper, streams async def _update_proxies(self): while not self.kill_event.is_set(): await self.proxy_broker.find(strict=True, types=['HTTP', 'HTTPS'], judges=[ 'http://httpbin.org/get?show_env', 'https://httpbin.org/get?show_env' ]) # default proxy-broker sleep cycle for continuous find await asyncio.sleep(180) # pragma: nocover
def get_proxies_programmatically(cls): """ Static method for collecting free proxies using ProxyBroker by executing in runtime is_initial is the variable whether we will use initial collection limit or periodic one""" async def fetch_proxy(proxies): while True: proxy = await proxies.get() if proxy is None: break proto = "https" if "HTTPS" in proxy.types else "http" row = f"{proto}://{proxy.host}:{proxy.port}" if row not in proxy_list: proxy_list.append(row) return proxy_list is_main_thread = threading.current_thread() is threading.main_thread() logger.info(f'[Thread: {"Main" if is_main_thread else "Not main"}] ' f"Proxies collection is requested programmatically.") with cls.gather_lock: proxy_list = [] logger.info( f'[Thread: {"Main" if is_main_thread else "Not main"}] ' f"Proxies is started to collect programmatically.") s = get_project_settings() limit = s.getint("PROXY_PERIODIC_COUNT", 10) if cls.is_initial_collection: limit = s.getint("PROXY_INITIAL_COUNT", 100) proxy_q = asyncio.Queue() if is_main_thread: broker = Broker(proxy_q) else: broker = Broker(proxy_q, stop_broker_on_sigint=False) try: tasks = asyncio.gather( broker.find( types=s.get("PROXY_TYPES"), countries=s.get("PROXY_COUNTRIES"), strict=True, dnsbl=s.get("PROXY_DNSBL"), limit=limit, ), fetch_proxy(proxy_q), ) loop = asyncio.get_event_loop() _, proxy_list = loop.run_until_complete(tasks) except Exception as e: logger.error( f'[Thread: {"Main" if is_main_thread else "Not main"}]' f"{e}" f"Error happened on proxy collection programmatically. Cancelled" ) broker.stop() else: logger.info( f'[Thread: {"Main" if is_main_thread else "Not main"}]: ' f"Proxy collection programmatically is ended." f'Type of collection: {"initial" if cls.is_initial_collection else "periodic"} ' f" Number of collected proxies: {len(proxy_list)}") # we did initial proxybroker collection, so we will do smaller batch of collection cls.is_initial_collection = False return proxy_list
class tournament: """tournament!""" def __init__(self, bot): self.bot = bot self.proxypath = 'data/tourney/proxyfile.txt' self.goodproxy = [] + proxies_list with open(self.proxypath, 'r') as f: for line in f: asplit = line.split(':') self.goodproxy.append(Proxy(host=asplit[0], port=asplit[1])) print("{} proxies were loaded".format(len(self.goodproxy))) self.path = 'data/tourney/settings.json' self.settings = dataIO.load_json(self.path) self.auth = dataIO.load_json('cogs/auth.json') self.queue = asyncio.Queue() self.broker = Broker(self.queue) self.proxylist = deque(self.goodproxy) self.goodproxy = [ ] # Proxies get saved once, then have to earn the right to stay again self.lastTag = '0' def __unload(self): self.broker.stop() def save_data(self): """Saves the json""" dataIO.save_json(self.path, self.settings) def getAuth(self): return {"auth": self.auth['token']} async def _is_allowed(self, member): server = member.server botcommander_roles = [ discord.utils.get(server.roles, name=r) for r in [ "Member", "Family Representative", "Clan Manager", "Clan Deputy", "Co-Leader", "Hub Officer", "admin", "Guest" ] ] botcommander_roles = set(botcommander_roles) author_roles = set(member.roles) if len(author_roles.intersection(botcommander_roles)): return True else: return False async def _fetchTourney(self): tourney = {} for i in range(100): ua = UserAgent() headers = {"User-Agent": ua.random} strProxy, Proxy = self._get_proxy() if not strProxy: return None proxies = {'http': strProxy} tourneydata = {} try: tourneydata = requests.get( 'http://statsroyale.com/tournaments?appjson=1', timeout=5, headers=headers, proxies=proxies).json() except (requests.exceptions.Timeout, json.decoder.JSONDecodeError): continue else: self._add_proxy(Proxy) #Reward working proxies by reusing them return tourneydata return None # Returns a list with tournaments async def getTopTourneyNew(self): try: tourneydata = await self._fetchTourney() except (requests.exceptions.Timeout, json.decoder.JSONDecodeError): return None except requests.exceptions.RequestException as e: return None if not tourneydata: return None numTourney = len(tourneydata['tournaments']) for x in range(0, numTourney): hashtag = tourneydata['tournaments'][x]['hashtag'] title = tourneydata['tournaments'][x]['title'] totalPlayers = tourneydata['tournaments'][x]['totalPlayers'] full = tourneydata['tournaments'][x]['full'] maxPlayers = tourneydata['tournaments'][x]['maxPlayers'] timeLeft = tourneydata['tournaments'][x]['timeLeft'] cards = getCards(maxPlayers) coins = getCoins(maxPlayers) time = sec2tme(timeLeft) players = str(totalPlayers) + "/" + str(maxPlayers) if (maxPlayers > 50) and (not full) and (timeLeft > 600) and ( (totalPlayers + 4) < maxPlayers) and (hashtag != self.lastTag): try: tourneydataAPI = requests.get( 'http://api.cr-api.com/tournaments/{}'.format(hashtag), headers=self.getAuth(), timeout=10).json() totalPlayers = tourneydataAPI['capacity'] except: pass full = tourneydataAPI['capacity'] == tourneydataAPI[ 'maxCapacity'] isClosed = tourneydataAPI['type'] == 'open' if (full) or ( (totalPlayers + 4) > maxPlayers) or (not isClosed): continue self.lastTag = hashtag tourney['tag'] = hashtag tourney['title'] = title tourney['players'] = players tourney['time'] = time tourney['gold'] = coins tourney['cards'] = cards return tourney return None # checks for a tourney every 5 minutes async def checkTourney(self): while self is self.bot.get_cog("tournament"): data = await self.getTopTourneyNew() if data is not None: embed = discord.Embed( title="New Tournament", description= "We found an open tournament. You can type !tourney to search for more.", color=0x00FFFF) embed.set_thumbnail( url='https://statsroyale.com/images/tournament.png') embed.add_field(name="Title", value=data['title'], inline=True) embed.add_field(name="Tag", value=data['tag'], inline=True) embed.add_field(name="Players", value=data['players'], inline=True) embed.add_field(name="Ends In", value=data['time'], inline=True) embed.add_field(name="Top prize", value="<:coin:380832316932489268> " + str(data['gold']) + " <:tournamentcards:380832770454192140> " + str(data['cards']), inline=True) embed.set_footer(text=credits, icon_url=creditIcon) for serverid in self.settings.keys(): if self.settings[serverid]: await self.bot.send_message( discord.Object(id=self.settings[serverid]), embed=embed) # Family #await self.bot.send_message(discord.Object(id='363728974821457923'), embed=embed) # testing await asyncio.sleep(900) await asyncio.sleep(120) @commands.command(pass_context=True, no_pm=True) async def tourney(self, ctx, minPlayers: int = 0): """Check an open tournament in clash royale instantly""" author = ctx.message.author self.bot.type() allowed = await self._is_allowed(author) if not allowed: await self.bot.say( "Error, this command is only available for Legend Members and Guests." ) return try: tourneydata = await self._fetchTourney() except (requests.exceptions.Timeout, json.decoder.JSONDecodeError): await self.bot.say( "Error: Cannot reach Clash Royale Servers. Please try again later." ) return except requests.exceptions.RequestException as e: await self.bot.say( "Unexpected error while attempting to get tournaments.\nPlease wait a bit then try again" ) return if not tourneydata: await self.bot.say( "Error: Cog hasn't fully loaded yet. Please wait a bit then try again" ) return None numTourney = list(range(len(tourneydata['tournaments']))) random.shuffle(numTourney) for x in numTourney: title = tourneydata['tournaments'][x]['title'] length = tourneydata['tournaments'][x]['length'] totalPlayers = tourneydata['tournaments'][x]['totalPlayers'] maxPlayers = tourneydata['tournaments'][x]['maxPlayers'] full = tourneydata['tournaments'][x]['full'] timeLeft = tourneydata['tournaments'][x]['timeLeft'] startTime = tourneydata['tournaments'][x]['startTime'] warmup = tourneydata['tournaments'][x]['warmup'] hashtag = tourneydata['tournaments'][x]['hashtag'] cards = getCards(maxPlayers) coins = getCoins(maxPlayers) if not full and timeLeft > 600 and maxPlayers >= minPlayers and hashtag != self.lastTag: self.lastTag = hashtag embed = discord.Embed( title="Open Tournament", description= "Here is a good one I found. You can search again if this is not what you are looking for.", color=0x00FFFF) embed.set_thumbnail( url='https://statsroyale.com/images/tournament.png') embed.add_field(name="Title", value=title, inline=True) embed.add_field(name="Tag", value="#" + hashtag, inline=True) embed.add_field(name="Players", value=str(totalPlayers) + "/" + str(maxPlayers), inline=True) embed.add_field(name="Ends In", value=sec2tme(timeLeft), inline=True) embed.add_field( name="Top prize", value="<:coin:380832316932489268> " + str(cards) + " <:tournamentcards:380832770454192140> " + str(coins), inline=True) embed.set_footer(text=credits, icon_url=creditIcon) await self.bot.say(embed=embed) return await self.bot.say("No tournament found") @commands.command(pass_context=True, no_pm=True) @checks.admin_or_permissions(administrator=True) async def tourneychannel(self, ctx, channel: discord.Channel = None): """Choose the channel for posting top tournaments Pass no channel to disable""" serverid = ctx.message.server.id if not channel: self.settings[serverid] = None await self.bot.say("Tournament channel for this server cleared") else: self.settings[serverid] = channel.id await self.bot.say("Tournament channel for this server set to " + channel.mention) self.save_data() def _get_proxy(self): """Grab and pop the oldest proxy""" if not self.proxylist: return None, None proxy = self.proxylist.popleft() host = proxy.host port = proxy.port proxystr = '{}:{}'.format(host, port) return proxystr, proxy def _add_proxy(self, proxy): """If a proxy worked, reward it by adding it back to the deque""" self.proxylist.append(proxy) with open(self.proxypath, 'w') as f: f.write('{}:{}\n'.format(proxy.host, proxy.port)) async def _proxyBroker(self): while self is self.bot.get_cog("tournament"): types = ['HTTP'] countries = ['US', 'DE', 'FR'] self.broker.stop() await self.broker.find(types=types, limit=15) await asyncio.sleep(120) async def _brokerResult(self): while self is self.bot.get_cog("tournament"): anyfound = False await asyncio.sleep(8) print("Waiting on results from Proxy-Broker") while True: proxy = await self.queue.get() if proxy is None: break if type(proxy) is not Proxy: continue self.proxylist.append(proxy) if not anyfound: print("Proxies are being found: {}".format(proxy)) anyfound = True if anyfound: print("No more proxies to be found") else: print("No proxies were found, trying in two minutes") await asyncio.sleep(120)