Beispiel #1
0
    def __init__(self, language='en', browser_lang="en-US", timezone="Asia/Kolkata", region='IN', cookie=None):

        # Randomize user-agent
        software_names = [SoftwareName.CHROME.value]
        operating_system = [OperatingSystem.WINDOWS.value,
                            OperatingSystem.LINUX.value]
        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_system=operating_system,
                                       limit=100)

        self.base_url = "https://t.tiktok.com/api"
        # self.user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:79.0) Gecko/20100101 Firefox/79.0"
        self.user_agent = user_agent_rotator.get_random_user_agent()

        self.headers = {
            "User-Agent": self.user_agent,
        }
        self.language = language
        self.browser_lang = browser_lang
        self.timezone = timezone
        self.region = region

        if cookie is None:
            self.verifyFp = random_key(16)
        else:
            self.verifyFp = cookie
        self.default_params = {
            "aid": "1988",
            "app_name": "tiktok_web",
            "device_platform": "web",
            "referer": "",
            "user_agent": urllib.parse.quote_plus(self.user_agent),
            "cookie_enabled": "true",
            "screen_width": "1920",
            "screen_height": "1080",
            "browser_language": self.browser_lang,
            "browser_platform": "Linux+x86_64",
            "browser_name": "Mozilla",
            "browser_version": "5.0+(X11)",
            "browser_online": "true",
            "timezone_name": self.timezone,
            # "page_referer": "https://www.tiktok.com/foryou?lang=en",
            "priority_region": self.region,

            "appId": "1180",
            "region": self.region,
            "appType": "t",

            "isAndroid": "false",
            "isMobile": "false",
            "isIOS": "false",
            "OS": "linux",
            "tt-web-region": self.region,

            "language": self.language,
            "verifyFp": self.verifyFp
        }
        self.signature_key = "_signature"
        self.did_key = "did"
        self.tiktok_browser = TikTokBrowser(self.user_agent)
Beispiel #2
0
def create_random_headers():
    software_names = [SoftwareName.CHROME.value, SoftwareName.FIREFOX.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]

    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    agent = user_agent_rotator.get_random_user_agent()
    headers = {
        "User-Agent": user_agent_rotator.get_random_user_agent(),
        "Accept": "application/json, text/plain, */*",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
        "Cache-Control": "no-cache",
        "Origin": "https://www.trendyol.com",
        "Pragma": "no-cache",
        "Referer": "https://www.trendyol.com/",
        "Sec-Fetch-Dest": "emtpy",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-site",
        "Upgrade-Insecure-Requests": "1"
    }
    return {k: v for k, v in sorted(headers.items(), key=lambda item: item[0])}
Beispiel #3
0
 def __init__(self, bot):
     self.bot = bot
     self.icon = "🌐"
     self.twitter_api = tweepy.API(
         OAuthHandler(TWITTER_CKEY, TWITTER_CSECRET))
     self.google_client = async_cse.Search(GCS_DEVELOPER_KEY)
     self.ig_colors = [
         int("405de6", 16),
         int("5851db", 16),
         int("833ab4", 16),
         int("c13584", 16),
         int("e1306c", 16),
         int("fd1d1d", 16),
         int("f56040", 16),
         int("f77737", 16),
         int("fcaf45", 16),
     ]
     self.regions = {
         "kr": "www",
         "korea": "www",
         "eune": "eune",
         "euw": "euw",
         "jp": "jp",
         "japan": "jp",
         "na": "na",
         "oceania": "oce",
         "oce": "oce",
         "brazil": "br",
         "las": "las",
         "russia": "ru",
         "ru": "ru",
         "turkey": "tr",
         "tr": "tr",
     }
     self.user_agents = UserAgent()
Beispiel #4
0
def randomUserAgent():
    operating_systems = [OperatingSystem.CHROMEOS.value]
    hardware_types = [HardwareType.COMPUTER.value]
    user_agent_rotator = UserAgent(operating_systems=operating_systems,
                                   hardware_types=hardware_types)
    ua = user_agent_rotator.get_random_user_agent()
    return ua
Beispiel #5
0
    def open_browser(self):
        """
            This entire first block of code is randomizing the browser's user agent info
            Apparently Canvas really doesn't like Selenium / Automation tools, so I needed
            to obscure the user agent info to prevent them from detecting it.
            
            The last two lines are actually opening the browser.
        """

        software_names = [SoftwareName.CHROME.value]
        operating_systems = [
            OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
        ]

        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=100)

        # Get list of user agents.
        user_agents = user_agent_rotator.get_user_agents()

        # Get Random User Agent String.
        user_agent = user_agent_rotator.get_random_user_agent()

        # Open the browser to the designated URL
        self.browser.get(self.url)
        # Make sure Alamo is in the tab title
        assert 'Alamo' in self.browser.title
Beispiel #6
0
class RandomUserAgentMiddleware(object):
    def __init__(self, *args, **kwargs):
        pupularity = [
            Popularity.POPULAR.value,
            # Popularity.COMMON.value
        ]
        hardware_types = [
            HardwareType.COMPUTER.value,
        ]
        software_types = [SoftwareType.WEB_BROWSER.value]
        software_names = [SoftwareName.FIREFOX.value]
        operating_system = [OperatingSystem.WINDOWS]
        self.user_agent_rotator = UserAgent(software_types=software_types,
                                            hardware_types=hardware_types,
                                            pupularity=pupularity,
                                            software_names=software_names,
                                            operating_system=operating_system)
        self.session = 1
        self.user_agent = self.user_agent_rotator.get_random_user_agent()

    def process_request(self, request, spider):
        session = request.meta.get("cookiejar")
        if session:
            if session != self.session:
                self.session = session
                self.user_agent = self.user_agent_rotator.get_random_user_agent(
                )
            request.headers['User-Agent'] = self.user_agent
        else:
            request.headers[
                'User-Agent'] = self.user_agent_rotator.get_random_user_agent(
                )
        return None
    def __init__(self, url):
        try:
            self.url = url
            self.driver = None

            software_names = [SoftwareName.CHROME.value]
            operating_systems = [
                OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
            ]
            user_agent_rotator = UserAgent(software_names=software_names,
                                           operating_systems=operating_systems,
                                           limit=100)
            user_agent = user_agent_rotator.get_random_user_agent()
            prefs = {'profile.managed_default_content_settings.images': 2}
            options = webdriver.ChromeOptions()

            options.add_argument("user-agent={user_agent}")
            options.add_argument("--headless")
            options.add_argument("--no-sandbox")
            options.add_experimental_option("prefs", prefs)

            # Ted's path: /home/devadmin/Desktop/chromedriver
            self.driver = webdriver.Chrome(
                executable_path='/home/devadmin/Desktop/chromedriver',
                chrome_options=options)
            if self.url is not None:
                self.driver.get(self.url)
        except:
            print("ERROR: CHROME DRIVER")
Beispiel #8
0
def build_driver():
    software_names = [SoftwareName.FIREFOX.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]
    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    user_agent = user_agent_rotator.get_random_user_agent()
    options = Options()
    # options.add_argument("--headless")
    # options.add_argument("--no-sandbox")
    # options.add_argument("--window-size=1420,1080")
    # options.add_argument("--disable-gpu")
    options.add_argument(f'user-agent={user_agent}')
    driver = webdriver.Firefox(options=options)

    session_file = open(SELENIUM_SESSION_FILE, 'w')
    session_file.writelines([
        driver.command_executor._url,
        "\n",
        driver.session_id,
        "\n",
    ])
    session_file.close()

    return driver
Beispiel #9
0
def random_agents(num: int):
    software_names = [SoftwareName.CHROME.value]
    operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]
    user_agent_rotator = UserAgent(software_names=software_names, operating_systems=operating_systems, limit=300)
    agents_list = []
    for agent in range(num):
        agent = user_agent_rotator.get_random_user_agent()
        agents_list.append(agent)
    return agents_list
Beispiel #10
0
def generate_random_useragent() -> str:
    """Generate random mobile user agent"""

    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   hardware_type=hardware_type,
                                   software_types=software_types)

    return user_agent_rotator.get_random_user_agent()
def main():
    delay = 15
    logger.init_logger(f'logs/{settings.NAME}.log')
    s = requests.Session()
    software_names = [SoftwareName.CHROME.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]

    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    # user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 ' \
    #              'Safari/537.36 '
    user_agent = user_agent_rotator.get_random_user_agent()
    s.headers.update({'User-Agent': user_agent})
    proxy = settings.PROXIES if settings.USE_PROXY else None
    try:
        with open('assets/cookie', 'rb') as f:
            s.cookies.update(pickle.load(f))
    except Exception as e:
        logging.warning(str(e))

    films = []
    r = re.compile('\d+')
    page_num = 1
    while True:

        page = s.get(
            f'https://www.kinopoisk.ru/popular/films/2018/?page={page_num}&quick_filters=films&tab=all',
            proxies=proxy)
        tree = html.fromstring(page.text)
        films_buf = [
            r.findall(film.attrib['href'])[0]
            for film in tree.find_class('selection-film-item-meta__link')
        ]
        logging.debug(f'Добавлено {len(films_buf)} фильмов')
        if len(films_buf) == 0:
            break
        films.extend(films_buf)
        page_num += 1
        time.sleep(delay)

    for i in range(len(films)):
        try:
            data = get_film_data(s, films[i], delay, proxy)
            insert_film(conn.conn, data)
            logging.debug(
                f'Фильм {data["title"]} обработан  {i}/{len(films)} ')
        except:
            logging.debug(f'Фильм не обработан, ошибка! {i}/{len(films)} ')

        time.sleep(delay)
    with open('assets/cookie', 'wb') as f:
        pickle.dump(s.cookies, f)
    def get_random_user_agent():
        software_names = [SoftwareName.CHROME.value]
        operating_systems = [
            OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
        ]

        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=100)
        user_agent = user_agent_rotator.get_random_user_agent()
        return user_agent
    def __generate_random_agent(self) -> None:
        software_names = [SoftwareName.CHROME.value]
        operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]
        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems, limit=100)

        user_agent = user_agent_rotator.get_random_user_agent()
        headers = {"User-Agent": user_agent}

        self.r = requests.Session()
        self.r.headers.update(headers)
        pass
Beispiel #14
0
    def get_user_agents():
        software_names = [SoftwareName.CHROME.value]
        operating_systems = [OperatingSystem.LINUX.value]

        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=100)
        user_agents = user_agent_rotator.get_user_agents()

        user_agent = user_agent_rotator.get_random_user_agent()
        headers = {'userAgent': 'python 3.7.5', 'platform': user_agent}
        return (headers)
def build_driver():
    software_names = [SoftwareName.FIREFOX.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]
    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    user_agent = user_agent_rotator.get_random_user_agent()
    # test for prevent block
    user_agent = "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0"
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    # options.add_argument("--window-size=1420,1080")
    options.add_argument("--disable-gpu")
    options.add_argument(f'user-agent={user_agent}')

    profile = webdriver.FirefoxProfile()
    profile.set_preference("general.useragent.override", user_agent)
    profile.add_extension(extension='./extensions/adblock_plus-3.11-an+fx.xpi')
    profile.add_extension(
        extension='./extensions/adblock_for_firefox-4.33.0-fx.xpi')

    API_KEY = '7f3282dc1e35451c7037fa93818b0cef'
    proxy_options = {
        'proxy': {
            'http':
            f'http://*****:*****@proxy-server.scraperapi.com:8001',
            'https':
            f'http://*****:*****@proxy-server.scraperapi.com:8001',
            'no_proxy': 'localhost,127.0.0.1'
        }
    }

    driver = webdriver.Firefox(firefox_profile=profile,
                               firefox_binary=None,
                               options=options,
                               seleniumwire_options=proxy_options)

    print("Agent: {}".format(user_agent))
    session_file = open(SELENIUM_SESSION_FILE, 'w')
    session_file.writelines([
        driver.command_executor._url,
        "\n",
        driver.session_id,
        "\n",
    ])
    session_file.close()
    time.sleep(5)
    # close other tabs
    ulties.closeOtherTabs(driver)
    return driver
Beispiel #16
0
def get_user_agent_header(limit: int = 300) -> dict:
    """Get random user agent header."""
    global _user_agents
    if not _user_agents:
        software_names = [SoftwareName.CHROME.value, SoftwareName.FIREFOX.value]
        operating_systems = [OperatingSystem.LINUX.value]
        _user_agents = UserAgent(software_names=software_names,
                                 operating_systems=operating_systems, limit=limit)

    user_agent = _user_agents.get_random_user_agent()
    agent_header = {'UserAgent': user_agent}
    return agent_header
Beispiel #17
0
def get_random_user_agent():
    # you can also import SoftwareEngine, HardwareType, SoftwareType, Popularity from random_user_agent.params
    # you can also set number of user agents required by providing `limit` as parameter

    software_names = [SoftwareName.CHROME.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]

    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)

    return user_agent_rotator.get_random_user_agent()
Beispiel #18
0
    def create_session(self):
        """
        Initializes the variables and creates a requests Session
        """
        software_names = [SoftwareName.CHROME.value]
        operating_systems = [
            OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
        ]
        self.user_agent_rotator = UserAgent(
            software_names=software_names,
            operating_systems=operating_systems,
            limit=100)

        self.session = requests.Session()
        return
Beispiel #19
0
class Headers:
    """Defines the Crawler interface"""

    __log__ = logging.getLogger('flathunt')

    _user_agent_rotator = UserAgent(popularity=[Popularity.COMMON._value_],
                                    hardware_types=[HardwareType.COMPUTER._value_])

    _headers = {
        'Connection': 'keep-alive',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': _user_agent_rotator.get_random_user_agent(),
        'Accept': 'text/html,application/xhtml+xml,application/xml;'
                  'q=0.9,image/webp,image/apng,*/*;q=0.8,'
                  'application/signed-exchange;v=b3;q=0.9',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-User': '******',
        'Sec-Fetch-Dest': 'document',
        'Accept-Language': 'en-US,en;q=0.9',
    }

    @property
    def headers(self):
        return self._headers

    def rotate_user_agent(self):
        """Choose a new random user agent"""
        self._headers['User-Agent'] = self._user_agent_rotator.get_random_user_agent()
Beispiel #20
0
    def getRandomUserAgent(self):
        software_names = [
            SoftwareName.CHROME.value, SoftwareName.FIREFOX.value,
            SoftwareName.EDGE.value, SoftwareName.INTERNET_EXPLORER.value,
            SoftwareName.ANDROID.value
        ]
        operating_systems = [
            OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
        ]
        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=100)
        user_agents = user_agent_rotator.get_user_agents()
        user_agent = user_agent_rotator.get_random_user_agent()
        self.__log__.debug('using user agent: ' + str(user_agent))

        return user_agent
Beispiel #21
0
 def __init__(self, *args, **kwargs):
     pupularity = [
         Popularity.POPULAR.value,
         # Popularity.COMMON.value
     ]
     hardware_types = [
         HardwareType.COMPUTER.value,
     ]
     software_types = [SoftwareType.WEB_BROWSER.value]
     software_names = [SoftwareName.FIREFOX.value]
     operating_system = [OperatingSystem.WINDOWS]
     self.user_agent_rotator = UserAgent(software_types=software_types,
                                         hardware_types=hardware_types,
                                         pupularity=pupularity,
                                         software_names=software_names,
                                         operating_system=operating_system)
     self.session = 1
     self.user_agent = self.user_agent_rotator.get_random_user_agent()
Beispiel #22
0
 def __init__(self, user: str, user_file: str=USER_DATA_JSON) -> None:
     self.user_file = user_file
     self.user_agent_rotator = UserAgent(software_names="chrome", operating_system="linux", limit=100)
     self.browser_options = Options()
     self.driver = self.__setBotOptions(user)
     self.user_name = user
     self.operational_data = self.__getUserdata()
     self.__on_class = False
     self.current_class = "N/A"
    def __test_proxy_list(self, num: int= 20) -> None:
        # NE PAS EXECUTER SI ON VEUT GAGNER DU TEMPS
        # Pour raison de simpliciter, on cherche que les proxy qui ont un port 8080
        pat = re.compile('.:8080$')

        proxies_list = [l for l in self.listed_proxy \
                        if l in list(filter(pat.findall, self.listed_proxy))]
        # proxies_list = [l for l in self.listed_proxy]

        for prox in proxies_list:
            if len(self.good_prox) <= num:
                try:
                    print(prox)

                    # On génère un User Agent aléatoire pour chaque proxy
                    software_names = [SoftwareName.CHROME.value]
                    operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]
                    user_agent_rotator = UserAgent(software_names=software_names,
                                                   operating_systems=operating_systems, limit=100)
                    proxies = {"http": self.__transform_proxy_http(prox),
                               "https": self.__transform_proxy_http(prox)}

                    user_agent = user_agent_rotator.get_random_user_agent()
                    headers = {"User-Agent": user_agent}

                    r = requests.Session()
                    r.headers.update(headers)
                    r.proxies.update(proxies)

                    # Connexion à la page (I found azlyris in a past project, which is in my opinion a good site
                    # for testing)
                    page = r.get("https://www.azlyrics.com/", proxies=proxies, headers=headers)

                    # Si la connexion est fructueuse, alors le proxy est stocké
                    self.good_prox.append(prox)
                except:
                    # Si je ne peux pas me connecter avec ce proxy, alors je teste le suivant
                    print("Not Good")
                    continue
            else:
                # Stop selection if we get {num} good proxies
                break
        print("End")
        pass
Beispiel #24
0
    def update(self):
        """Get the latest data from Yahoo earnings."""

        software_names = [SoftwareName.CHROME.value]
        operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]

        user_agent_rotator = UserAgent(software_names=software_names, operating_systems=operating_systems, limit=100)

        # Get list of user agents.
        user_agent = user_agent_rotator.get_random_user_agent()

        headers = {'User-Agent': user_agent}

        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        other_details_json_link = "https://query2.finance.yahoo.com/v10/finance/quoteSummary/{0}?formatted=true&lang=en-US&region=US&modules=summaryProfile%2CfinancialData%2CrecommendationTrend%2CupgradeDowngradeHistory%2Cearnings%2CdefaultKeyStatistics%2CcalendarEvents&corsDomain=finance.yahoo.com".format(self._ticker)
        try:
            summary_json_response = requests.get(other_details_json_link, verify=False, headers=headers, timeout=10)
            _LOGGER.debug("Yahoo earnings updated")
            results_json = {}
            results_json['Ticker'] = self._ticker
            json_loaded_summary =  json.loads(summary_json_response.text)
            quotes_json = json_loaded_summary["quoteSummary"]["result"][0]
            upgradeDowngradeHistory = quotes_json.get("upgradeDowngradeHistory", {}).get("history")
            recommendationMean = quotes_json.get("financialData", {}).get("recommendationMean", {}).get('raw')
            targetMeanPrice = quotes_json.get("financialData", {}).get("targetMeanPrice", {}).get('raw')
            targetMedianPrice = quotes_json.get("financialData", {}).get("targetMedianPrice", {}).get('raw')
            recommendationKey = quotes_json.get("financialData", {}).get("recommendationKey")
            numberOfAnalystOpinions = quotes_json.get("financialData", {}).get("numberOfAnalystOpinions", {}).get('raw')
            heldPercentInsiders = quotes_json.get("defaultKeyStatistics", {}).get("heldPercentInsiders", {}).get('raw')
            heldPercentInstitutions = quotes_json.get("defaultKeyStatistics", {}).get("heldPercentInstitutions", {}).get('raw')
            results_json['Institutional Ownership'] = heldPercentInstitutions
            results_json['Insider Ownership'] = heldPercentInsiders
            results_json['Number of Analysts'] = numberOfAnalystOpinions
            results_json['Recommendation'] = recommendationKey
            results_json['Median Target'] = targetMedianPrice
            results_json['Mean Target'] = targetMeanPrice
            results_json['Recommendation Mean'] = recommendationMean
            results_json['History'] = upgradeDowngradeHistory
            self.data = results_json
            self.available = True
        except requests.exceptions.ConnectionError:
            _LOGGER.error("Connection error")
            self.data = None
            self.available = False
Beispiel #25
0
async def monitor_webpage():
    #Generate fake user agent so we dont get banned
    userAgent = UserAgent(100, Popularity.POPULAR.value)
    global curLast

    await client.wait_until_ready()
    channel = client.get_channel(632490309443387392)

    #Check every hour or so
    while (True):
        header = userAgent.get_random_user_agent()

        print(f'>>> Time to check for updates with user agent: {str(header)}')

        # Get the html from the website
        response = requests.get(url, header)

        # Parse the html so we can easily search it
        soup = bs.BeautifulSoup(response.text, 'lxml')

        # Get the most recent news title in the parsed html format
        newsList = []
        for c in soup.find_all('div', class_='news-list-item'):
            newLast = (c.find('a').get('title'), c.find('a').get('href'))

            if curLast == 'init':
                curLast = newLast  #We got the latest news title
                print(f'Initialized with {curLast}')
                break
            else:
                if curLast == newLast:
                    print('This has been shown before')
                    break
                else:
                    newsList.append(newLast)

        if len(newsList) > 0:
            for new in newsList:
                await channel.send(
                    f'{newsMessage}*{new[0]}*\nhttp://www.phys.uoa.gr/{new[1]}'
                )
            curLast = newsList[0]
        response.close()
        time.sleep(sleepTime)
 def get_selenium_res(self):
     software_names = [SoftwareName.FIREFOX.value]
     operating_systems = [
         OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
     ]
     user_agent_rotator = UserAgent(software_names=software_names,
                                    operating_systems=operating_systems,
                                    limit=100)
     user_agent = user_agent_rotator.get_random_user_agent()
     # user_agents = ['Googlebot','Applebot','Bingbot','DuckDuckBot','Naverbot','Twitterbot','Yandex']
     # user_agent = user_agents[random.randint(0, len(user_agents)-1)]
     print(user_agent)
     options = self.setup_driver_options(user_agent)
     #creating driver with a proxy:
     # browser = webdriver.Firefox(options=firefox_options, desired_capabilities=capabilities)
     #proxies are easily blocked after multiple uses-so not using it
     #initializing browser without proxy
     if self.driver_type == 'firefox':
         browser = webdriver.Firefox(options=options)
     elif self.driver_type == 'chrome':
         browser = webdriver.Chrome(chrome_options=options)
     browser.get(self.url)
     time_to_wait = 15
     tag_name = 'video'
     #locating element
     try:
         WebDriverWait(browser, time_to_wait).until(
             EC.presence_of_element_located((By.TAG_NAME, tag_name)))
         video = browser.find_element_by_tag_name(tag_name)
         video_link = video.get_attribute('src')
         resp = requests.get(video_link)
         with open(self.path, 'wb') as f:
             f.write(resp.content)
         browser.close()
         print("video written")
     except TimeoutException:
         print("You were likely blocked")
         browser.close()
         self.selenium_retries += 1
         if self.driver_type == 'firefox':
             self.driver_type = 'chrome'
         else:
             self.driver_type = 'firefox'
         return self.get_selenium_res()
Beispiel #27
0
def user_agent_string():
    from random_user_agent.user_agent import UserAgent
    from random_user_agent.params import SoftwareName, OperatingSystem

    # you can also import SoftwareEngine, HardwareType, SoftwareType, Popularity from random_user_agent.params
    # you can also set number of user agents required by providing `limit` as parameter

    software_names = [SoftwareName.CHROME.value]
    operating_systems = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]

    user_agent_rotator = UserAgent(software_names=software_names, operating_systems=operating_systems, limit=100)

    # # Get list of user agents.
    # user_agents = user_agent_rotator.get_user_agents()

    # Get Random User Agent String.
    user_agent = user_agent_rotator.get_random_user_agent()

    return user_agent
Beispiel #28
0
def get_user_agent_driver():
    software_names = [SoftwareName.CHROME.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]
    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    user_agents = user_agent_rotator.get_user_agents()

    user_agent1 = user_agent_rotator.get_random_user_agent()
    options = Options()
    options.add_argument(f'—-headless')
    options.add_argument(f'—-no-sandbox')
    options.add_argument(f'—-disable-gpu')
    options.add_argument(f'—-window-size=1420,1080')
    options.add_argument(f'user-agent={user_agent1}')
    driver = webdriver.Chrome('../../data/chromedriver.exe', options=options)
    return driver
def get_random_user_agent():
    # Retorna um user agent para ser usado ao criar uma sessão de navegação com o objetivo de evitar a detecção da automação.
    try:
        software_names = [
            SoftwareName.CHROME.value, SoftwareName.FIREFOX.value
        ]
        operating_systems = [
            OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value,
            OperatingSystem.MACOS.value, OperatingSystem.FREEBSD.value
        ]
        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=1000)
        #user_agents = user_agent_rotator.get_user_agents()
        user_agent = user_agent_rotator.get_random_user_agent()
        return str(user_agent)
    except Exception as e:
        logging.error('Ocorreu um erro ao gerar um User Agent para navegação.')
        logging.exception(e)
        raise e
Beispiel #30
0
class Bot:
  logging.basicConfig(filename='SNKRSlog.log', filemode='a', format='%(asctime)s - %(name)s - %(message)s',
                    level=logging.DEBUG)

  software_names = [SoftwareName.CHROME.value]
  hardware_type = [HardwareType.MOBILE__PHONE]
  user_agent_rotator = UserAgent(software_names=software_names, hardware_type=hardware_type)
  CONFIG = dotenv.dotenv_values("e.env")

  proxyObject = FreeProxy(country_id=['FR'], rand=True)

  INSTOCK = []