Exemple #1
0
    def __init__(self,
                 protocol='https',
                 anonymity=True,
                 autorefresh=True,
                 max_refresh_tries=5,
                 check_url='https://www.google.com',
                 check_timeout=2,
                 limit=-1,
                 verbose=0):
        self.protocol = protocol
        self.anonymity = anonymity
        self._collector = create_collector('default', protocol)
        self._collector.apply_filter(dict(anonymous=anonymity))
        self.autorefresh = autorefresh
        self._max_refresh_tries = max_refresh_tries
        self._auto_refresh_counter = 0
        self.check_timeout = check_timeout
        self._check_url = check_url
        self.limit = limit
        self.verbose = verbose

        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/87.0.4280.66 Safari/537.36'
        }
        self.proxies = []
        self._proxies = []

        self.refresh_proxies()
Exemple #2
0
def autoscrape():
    collector = create_collector('my-collector', 'https')
    proxies = collector.get_proxies({'anonymous': True})
    print(len(proxies))
    selection = random.choice(proxies)
    proxy = str(selection[0] + ":" + selection[1])
    return proxy
Exemple #3
0
    def __init__(self, *args, **kwargs):

        headers = {
            "Content-type":
            "application/x-www-form-urlencoded",
            "Accept":
            "text/plain",
            "User-agent":
            "Mozilla/5.0 (Linux NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0",
        }
        cookies = {'enwiki_session': '17ab96bd8ffbe8ca58a78657a918558'}

        self.proxy_app = []
        self.proxy_dict = {}
        protocol = 'https'
        self.collect_proxy = proxyscrape.create_collector('proxy', [protocol])

        self.getproxy = self.collect_proxy.get_proxy({
            'code': ('us', 'uk'),
            'anonymous': True
        })

        for i in self.getproxy:
            self.proxy_app.append(i)

        self.proxy_dict[
            self.proxy_app[len(self.proxy_app) -
                           2]] = self.proxy_app[0] + ':' + self.proxy_app[1]

        print(self.proxy_dict)
Exemple #4
0
def get_proxy_list(proxy_number):
    proxy_list = []
    collector = proxyscrape.create_collector("default", "http")
    for i in range(proxy_number):
        proxygrabber = collector.get_proxy({"anonymous": True})
        proxy = proxygrabber.host + ":" + proxygrabber.port
        proxy_list.append(proxy)
    return proxy_list
Exemple #5
0
def make_collector():
    random_string = get_random_string(10)
    http_collector = create_collector(random_string, 'https')
    print("https")
    #resource_name = get_proxyscrape_resource(proxytype='http', timeout=5000, ssl='yes', anonymity='all', country='us')
    #add_resource_type("resource", resource_name)
    print("end https")
    return http_collector
Exemple #6
0
def generate_proxy(protocol):
    """Retrieves a proxy according to protocol and returns its address"""
    if protocol not in ['https', 'http']:
        return None

    collector = create_collector(protocol + '-collector', protocol)
    collected = collector.get_proxy()
    proxy = protocol + '://' + collected.host + ':' + collected.port
    return proxy
Exemple #7
0
def downloadproxies():
    collector = proxyscrape.create_collector('my-collector',
                                             ['http', 'socks4', 'socks5'])
    proxies = collector.get_proxies()
    file = open("proxies.txt", "w")
    file.write(str(proxies))
    file.close()
    print(
        "                                 \33[38;5;214mSuccessfully saved proxies list!\n\033[0m"
    )
Exemple #8
0
def autoscrape():
    collector = create_collector('my-collector', 'https')
    full_proxies = collector.get_proxies({'anonymous': True})
    print(len(full_proxies))
    with open("config/proxies.txt", "w") as f:
        for proxy in full_proxies:
            f.write(str(proxy[0]) + ":" + str(proxy[1]) + "\n")
    uniqlines = set(open('config/proxies.txt').readlines())
    with open('config/proxies.txt', 'w') as finish:
        finish.writelines(set(uniqlines))
    return
Exemple #9
0
def get_ips():
    global proxy_count, proxies
    collector = proxyscrape.create_collector('my-collector', 'http')
    proxies = collector.get_proxies({'country': 'united states'})
    proxy_count = len(proxies)
    print(f"Getting {proxy_count} proxies")
    '''
    for i in proxies:
        print(f"{i.host}:{i.port}")
        time.sleep(2)
    '''
    return proxies
    def _get_free_proxies_collector():
        """Retrieve or create a Collector of free proxies.

        :return: Collector object
        """
        try:
            collector = get_collector('scraping-proxies')
        except CollectorNotFoundError:
            collector = create_collector('scraping-proxies',
                                         ['socks4', 'socks5'])

        return collector
    def __init__(self, filters, refresh_interval, logstats_interval,
                 stop_if_no_proxies, max_proxies_to_try,
                 force_refresh_if_no_proxies, try_with_host):
        self.collector = create_collector('proxy-pool', ['http', 'https'],
                                          refresh_interval)
        self.collector.apply_filter(filters)

        self.refresh_interval = refresh_interval
        self.logstats_interval = logstats_interval
        self.stop_if_no_proxies = stop_if_no_proxies
        self.max_proxies_to_try = max_proxies_to_try
        self.force_refresh_if_no_proxies = force_refresh_if_no_proxies
        self.try_with_host = try_with_host
Exemple #12
0
def scrapeproxies(proxytype):
    collector = create_collector('generatedfromcollector', [proxytype])
    proxycount = int(input("\nHOW MANY PROXIES WOULD YOU LIKE TO GENERATE?\n> "))
    print("\nScraping Proxies...\n")
    time.sleep(1)
    proxyprinted = 0
    proxyloop = True
    while proxyloop == True:
        proxy = collector.get_proxy()
        print("\u001b[32mSuccesfully\u001b[0m Scraped : " + str(proxy.host) + ":" + str(proxy.port))
        proxyprinted = proxyprinted + 1
        if proxyprinted == proxycount:
            proxyloop = False
    print("\nScraping Finished!")
    eee = input("Press ENTER To Go Back.")
    def __init__(self):
        self.result_dir = os.path.join(os.getcwd(), "Result")
        if not os.path.exists(self.result_dir):
            os.makedirs(self.result_dir)

        self.collector = create_collector('my-collector', 'http')

        self.DB_Name = "proxies.db"
        self.tbl_name = "http"
        self.db_proxy = DB(dst_path=self.result_dir, dbname=self.DB_Name)

        self.fields = [
            "anonymous", "code", "country", "host", "port", "type", "date", "status"
        ]
        self.db_proxy.createTable(tbl_name=self.tbl_name, fields=self.fields)
Exemple #14
0
def get_valid_proxies(iso_codes: tuple = ('ru'),
                      anonymous: bool = True,
                      type_proxy: str = 'https',
                      all: bool = False,
                      number_of_checks: int = 100,
                      check_timeout: int = 10) -> list:
    collector = create_collector(
        name='default_collector',
        resource_types=type_proxy,
        refresh_interval=3600,
    )
    filter_dict = {'anonymous': anonymous, 'type': type_proxy}
    if iso_codes != ():
        filter_dict['code'] = iso_codes
    collector.apply_filter(filter_dict)
    proxies, valid_proxies, i = [], [], 0
    while True:
        p = collector.get_proxy()
        print(p)
        if p and i < number_of_checks:
            p_str = f'{p.host}:{p.port}'
            if p_str not in proxies:
                print('-', p_str)
                proxies.append(p_str)
                try:
                    if type_proxy == 'http':
                        req = r.get('http://api.ipify.org?fromat=json',
                                    proxies={'http': p_str},
                                    timeout=check_timeout)
                    else:
                        req = r.get('https://api.ipify.org?fromat=json',
                                    proxies={'https': p_str},
                                    timeout=check_timeout)
                    if req.text == p.host:
                        print('--', p_str)
                        if not all:
                            return [p_str]
                        valid_proxies.append(p_str)
                        collector.remove_proxy(p)
                except:
                    collector.remove_proxy(p)
                    continue
            else:
                collector.remove_proxy(p)
        else:
            break
        i += 1
    return valid_proxies
Exemple #15
0
 def create_proxies(self):
     if len(self.proxies) == 0:
         resource_name = get_proxyscrape_resource(proxytype='all',
                                                  timeout=10000,
                                                  ssl='yes',
                                                  anonymity='elite',
                                                  country='DE')
         collector = create_collector('my-collector',
                                      resources=resource_name)
         generatedProxies = 0
         while generatedProxies <= 3:
             proxy = collector.get_proxy()
             if proxy:
                 proxyPath = proxy.host + ":" + proxy.port
                 if proxyPath not in self.proxies:
                     print("Proxy added :", proxyPath)
                     generatedProxies += 1
                     self.proxies.append(proxyPath)
Exemple #16
0
    def proxyscrape_lib(self) -> Set[str]:
        """Parsing proxies from proxyscrape py library"""
        free_proxies = scrapers.get_free_proxy_list_proxies()
        ssl_proxies = scrapers.get_ssl_proxies()
        try:
            collector = create_collector("default", "http")
        except CollectorAlreadyDefinedError:
            collector = get_collector("default")
        collector_proxies = set(collector.get_proxies())
        proxies = free_proxies | ssl_proxies | collector_proxies

        for proxy in proxies:
            prepare_proxy = f"{proxy.host}:{proxy.port}"
            if prepare_proxy not in self.proxy_set:
                self.proxy_set.add(prepare_proxy)
        logger.info(
            f"From proxyscrape_lib were parsed {len(self.proxy_set)} proxies")
        return self.proxy_set
Exemple #17
0
 def download_proxies(self, location=False):
     collector = proxyscrape.create_collector('collector1', ['https', 'http']) 
     proxies_raw = ""
     if location:
         collector.apply_filter({"country": location})
     print("[DOWNLOAD] Downloading proxies...", end="")
     for i in range(len(self.combos)):
         a = collector.get_proxy() 
         proxies_raw += str(a[0]) + ":" + str(a[1]) + "\n" 
     print("done")
     if self.verbose:
         print(f"[WRITING] Writing proxies to {getcwd()}{self.slash}proxies.txt...", end="")
     with open("proxies.txt", "w") as pfile:
         pfile.write(proxies_raw)
         pfile.close()
     self.proxy_file = "proxies.txt"
     if self.verbose:
         print("done")
    def __init__(self, filters, refresh_interval, logstats_interval,
                 stop_if_no_proxies, max_proxies_to_try,
                 force_refresh_if_no_proxies, try_with_host, elite,
                 external_url):
        self.collector = create_collector('proxy-pool', ['http', 'https'],
                                          refresh_interval, None, elite,
                                          external_url)
        self.collector.apply_filter(filters)
        if self.collector.get_proxies():
            logger.info("Proxies: " + str(len(self.collector.get_proxies())))
        else:
            logger.info("Proxies: 0")

        self.refresh_interval = refresh_interval
        self.logstats_interval = logstats_interval
        self.stop_if_no_proxies = stop_if_no_proxies
        self.max_proxies_to_try = max_proxies_to_try
        self.force_refresh_if_no_proxies = force_refresh_if_no_proxies
        self.try_with_host = try_with_host
Exemple #19
0
def update_proxy():
    db.taskid = '{date:%Y%m%d-%H%M%S}'.format(date=datetime.now())

    try:
        # Create a collector for https resources
        collector = proxyscrape.create_collector(
            'MyProxy' + str(random.randint(100, 10000)), 'https')
        # Get proxy
        proxies = []
        for i in range(50):
            print('getting proxy:', i)
            proxy = collector.get_proxy({
                'code': ('us', 'uk'),
                'anonymous': True
            })
            proxies.append(proxy.host + ':' + proxy.port)
        # Add proxis
        db.add_proxy(proxies)
        # Write to Log
        db.add_sys_log('get proxy', 'success', '')
    except Exception as e:
        print(str(e))
        db.add_sys_log('get proxy', 'fail', str(e))
import proxyscrape

collector = proxyscrape.create_collector('default', 'http')

collector.refresh_proxies()

proxies = collector.get_proxies()

print(proxies)
Exemple #21
0
import aiohttp
import json
import random
from proxyscrape import create_collector
import asyncio
from time import sleep
from aiohttp_proxy import ProxyConnector
from proxy import proxies
from proxy2 import proxies as proxies2
from proxy3 import proxies as proxies3
from proxy4 import proxies as proxies4
from proxy5 import proxies as proxies5
import random
import sys
collector = create_collector('my-collector', 'http')


def getNewProxy(*args):
    new_proxy = getProxy()
    if(len(args) != 0):
        while (new_proxy == args[0]):
            new_proxy = getProxy()

    return new_proxy


def getProxy():
    toss = random.randint(0, 1)
    if(toss==0):
        proxyObj = collector.get_proxy()
        proxy = f'{proxyObj.type}://{proxyObj.host}:{proxyObj.port}'
import re
import requests
from bs4 import BeautifulSoup
from requests.compat import urljoin
import time
import random
from proxyscrape import create_collector

# the header could be replaced using fake_useragent module
HEADERS = {
    "User-Agent":
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"
}

# initializing proxy list collector
collector = create_collector("my-collector", "http")
raw_data = collector.get_proxies()
home_page = "https://www.kwestiasmaku.com"

# storing a queue of internal links for processing
internal_links = set()


def parse_html(url):
    # listing proxy ip, port to be fed into requests
    proxies = []
    for i in raw_data:
        proxies.append(str(i[0]) + ":" + str(i[1]))

    # selecting random proxy ip, port
    def random_proxy():
Exemple #23
0
    err_msg,
    sms_success,
)
from telegram import ForceReply
from telegram.ext import ConversationHandler, CommandHandler, MessageHandler, Filters
from github import Github
import threading
import string
import requests
import os
import proxyscrape
import random
from misc.invalid_msg import wrong_option

sessions = {}
sockets = proxyscrape.create_collector("default", "http")


def ask_num(update, context):
    try:
        adminlist = (Github(os.getenv("API")).get_repo(repo_path).get_contents(
            file_name).decoded_content.decode().strip().split("\n"))
        if str(update.message.from_user.id) in adminlist or (
                update.message.from_user.username
                and update.message.from_user.username.lower()
                in [i.lower() for i in adminlist]):
            update.message.reply_text(ask_no, reply_markup=ForceReply())
            return 0
        else:
            update.message.reply_text(not_admin)
            return ConversationHandler.END
Exemple #24
0
    def get_proxies(desired_amount: int = 1, proxy_timeout=0.5):

        proxies = []

        # https://stackoverflow.com/a/59531141
        try:
            collector_1 = proxyscrape.get_collector('collector-http')

        except proxyscrape.errors.CollectorNotFoundError:
            collector_1 = proxyscrape.create_collector('collector-http',
                                                       'http')

        full_list = list(collector_1.get_proxies())

        for item in full_list:
            proxies.append(item.host + ':' + item.port)

        print(bs.warning_o(bsl["PROXY"]["FOUND"]), bs.red_o(str(len(proxies))),
              bs.warning_o(bsl["PROXY"]["HTTP_PROXIES"]))

        print(bs.warning_o(bsl["PROXY"]["PERFORMANCE_CHECK"]))
        print(bs.warning_o(bsl["GENERAL"]["CTRL_Z_EXIT"]))

        time.sleep(1)
        bs.clear()
        start_time = time.time()

        cnt = 0

        print(
            bs.warning_o(bsl["PROXY"]["CHECKED"]) + bs.red_o(' 0 ') +
            bs.warning_o(bsl["PROXY"]["OUT_OF"]), bs.red_o(str(len(proxies))),
            bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]),
            bs.red_o(str(proxy_timeout)),
            bs.warning_o(bsl["PROXY"]["SECONDS_3"]))

        print(bs.warning_o(bsl["PROXY"]["CHOSEN"]), bs.red_o(str(cnt)),
              bs.warning_o(bsl["PROXY"]["OUT_OF"]),
              bs.red_o(str(desired_amount)),
              bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]),
              bs.red_o(str(proxy_timeout)),
              bs.warning_o(bsl["PROXY"]["SECONDS_3"]))

        print(bs.warning_o(bsl["GENERAL"]["CTRL_Z_EXIT"]))

        checked_proxy = []

        for ind, item in enumerate(proxies, start=1):

            if cnt < desired_amount:

                if bs.is_bad_proxy(item, proxy_timeout):
                    print('[BAD PROXY]')
                else:
                    checked_proxy.append(item)
                    cnt += 1
            else:
                break

            bs.clear()

            print(bs.warning_o(bsl["PROXY"]["CHECKED"]), bs.red_o(str(ind)),
                  bs.warning_o(bsl["PROXY"]["OUT_OF"]),
                  bs.red_o(str(len(proxies))),
                  bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]),
                  bs.red_o(str(proxy_timeout)),
                  bs.warning_o(bsl["PROXY"]["SECONDS_3"]))

            print(bs.warning_o(bsl["PROXY"]["CHOSEN"]), bs.okgreen_o(str(cnt)),
                  bs.warning_o(bsl["PROXY"]["OUT_OF"]),
                  bs.red_o(str(desired_amount)),
                  bs.warning_o(bsl["PROXY"]["PROXIES_WITH_TIMEOUT"]),
                  bs.red_o(str(proxy_timeout)),
                  bs.warning_o(bsl["PROXY"]["SECONDS_3"]))

            print(bs.warning_o(bsl["PROXY"]["EXIT_WARN"]))
            print(bs.warning_o(bsl["GENERAL"]["CTRL_Z_EXIT"]))

        end_time = time.time()

        extra_message = (bsl["PROXY"]["APPENDED"], str(cnt),
                         bsl["PROXY"]["PROXIES_WITH_TIMEOUT"],
                         str(proxy_timeout), bsl["PROXY"]["SECONDS_1"],
                         bsl["PROXY"]["TO_THE_PROXY_LIST"], bsl["PROXY"]["IN"],
                         str(round(end_time - start_time,
                                   2)), bsl["PROXY"]["SECONDS_1"] + ']')

        extra_message = bs.success_o(' '.join(x for x in extra_message))

        with open('proxy_list.txt', 'a') as infl:
            for item in checked_proxy:
                infl.write(''.join(item) + '\n')

        bs.print_full_main_screen(extra_message)
Exemple #25
0
import pymysql.cursors
from datetime import datetime
from multiprocessing import Pool, current_process
import numpy as np
import random
import time
from fake_useragent import UserAgent
import params
import proxyscrape

HOST = 'https://www.rusprofile.ru/'

URL1 = 'https://www.rusprofile.ru/codes/89220'
URL2 = 'https://www.rusprofile.ru/codes/429110'

collector = proxyscrape.create_collector('my-collector', 'https')


# получить содержимое страницы
def get_html(url, my_proxy=None):
    process_name = current_process().name
    connection_is_ok = False

    if my_proxy is None:
        proxy = collector.get_proxy()
    else:
        proxy = my_proxy

    try:
        proxy_str = proxy.host + ":" + proxy.port
    except AttributeError:
browser_list=[]
browser_one = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver")
browser_two = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver")
browser_three = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver")
browser_four = webdriver.Chrome(r"C:\Users\Julien MAUCLAIR\Desktop\Bot Youtube\webdriver\chromedriver")

browser_list.append(browser_one)
browser_list.append(browser_two)
browser_list.append(browser_three)
browser_list.append(browser_four)

for browser in browser_list:
	browser.get(video_url)

collector = create_collector('proxy-collector', 'https')

a=0

while a < vviews :

	while(True):

		
		proxy = collector.get_proxy()
		full_proxy=proxy.host+":"+proxy.port
		print(full_proxy)
		PROXY = full_proxy
		webdriver.DesiredCapabilities.CHROME['proxy']={
			"httpProxy":PROXY,
			"ftpProxy":PROXY,
from datetime import datetime
import requests
from urllib3.exceptions import ReadTimeoutError
from requests.exceptions import (
    ConnectTimeout,
    ProxyError,
    ReadTimeout,
    ConnectionError
)
from bs4 import BeautifulSoup
from proxyscrape import create_collector
from app.core.database import DB
from app.core.machine_learning import get_prediction


COLLECTOR = create_collector('collector', 'https')


def manage_proxies(func):
    proxies = []

    def wrapper(*args, **kwargs):
        nonlocal proxies
        response, new_proxies = func(*args, proxies, **kwargs)
        proxies = new_proxies
        return response
    return wrapper


@manage_proxies
def get_response_use_proxy(url, proxies):
Exemple #28
0
import requests
from bs4 import BeautifulSoup
import proxyscrape
import time
import datetime
import json

collector = proxyscrape.create_collector("supreme", "http")
collector.apply_filter({'type': 'http'})


def read_file():
    with open('config.json') as config:
        data = json.load(config)
        discord_webhook = data["discordWebhook"]
        slack_webhook = data["slackWebhook"]
        region = data["region"]
        delay = data["delay"]
        use_proxies = data["useProxies"]
        proxies = data["proxies"]
    return discord_webhook, slack_webhook, region, delay, use_proxies, proxies


def parse_html(html):
    timestamp = datetime.datetime.now().isoformat()
    tohru = None
    soup = BeautifulSoup(html, "html.parser")
    if "us" in soup.body["class"]:
        region = "US"
    elif "eu" in soup.body["class"]:
        region = "EU"
Exemple #29
0
 def _get_proxy_collector(self, conf):
     name, proxy_types = conf["proxies"]["name"], conf["proxies"]["type"]
     collector = create_collector(name, proxy_types)
     collector.refresh_proxies(force=True)
     return collector
Exemple #30
0
import proxyscrape
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.firefox.options import Options

req_proxy = RequestProxy(
)  # you may get different number of proxy when  you run this at each time
proxies = req_proxy.get_proxy_list()  # this will create proxy list
USA = [proxy for proxy in proxies if proxy.country == 'United States']

# ============================== proxyscrape START ============================== #
open("proxies.txt", "w").close()
collector = proxyscrape.create_collector(
    'default', 'http')  # Create a collector for http resources
proxy = collector.get_proxies({'country': 'united states'})

for x in range(len(proxy)):
    portBefore = (str(proxy[x]).split("port='", 1)[-1])
    portAfter = (str(portBefore).split("', code=")[0])
    ip = re.findall(r'[0-9]+(?:\.[0-9]+){3}', str(proxy[x]))
    ip = str(ip)
    rS1B = str((ip.split("['", 1)[1]))
    rS2B = str((rS1B.split("']")[0]))
    ipAndPort = (rS2B + ":" + portAfter)
    with open("proxies.txt", "a") as myfile:
        myfile.write(ipAndPort + "\n")
        myfile.close()

# ============================== proxyscrape END ============================== #