Ejemplo n.º 1
0
 def test_anonym_filter(self):
     test1 = FreeProxy()
     cnt1 = len(test1.get_proxy_list())
     test2 = FreeProxy(anonym=True)
     cnt2 = len(test2.get_proxy_list())
     self.assertTrue(cnt2 < cnt1)
Ejemplo n.º 2
0
 def test_empty_proxy_list(self):
     test = FreeProxy()
     test.get_proxy_list = MagicMock(return_value=[])
     with self.assertRaises(RuntimeError):
         test.get()
Ejemplo n.º 3
0
from datetime import datetime
import time

import json
import logging
import traceback

import config

logging.basicConfig(filename='SNKRS.log', filemode='a', format='%(asctime)s - %(name)s - %(message)s', level=logging.DEBUG)

software_names = [SoftwareName.CHROME.value]
hardware_type = [HardwareType.MOBILE__PHONE]
user_agent_rotator = UserAgent(software_names=software_names, hardware_type=hardware_type)

proxy_obj = FreeProxy(country_id=config.FREE_PROXY_LOCATION, rand=True)

INSTOCK = []


def scrape_site(headers, proxy):
    """
    Scrapes SNKRS site and adds items to array
    """
    items = []

    # Makes request to site
    anchor = 0
    while anchor < 160:
        url = f'https://api.nike.com/product_feed/threads/v3/?anchor={anchor}&count=50&filter=marketplace%28{config.LOCATION}%29&filter=language%28{config.LANGUAGE}%29&filter=channelId%28010794e5-35fe-4e32-aaff-cd2c74f89d61%29&filter=exclusiveAccess%28true%2Cfalse%29'
        html = rq.get(url=url, timeout=20, verify=False, headers=headers, proxies=proxy)
Ejemplo n.º 4
0
 def update_proxy(self):
     self.proxy = FreeProxy(country_id=["RU"]).get()
     if debug == "true":
         print("[DEBUG] New proxy:", self.proxy)
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import pyfiglet
from os import system
from time import sleep
import sys
from fp.fp import FreeProxy

proxy = FreeProxy(timeout=1, rand=True).get()
print(proxy[7:])
proxy = proxy[7:]

chrome_options = webdriver.ChromeOptions()
#chrome_options.add_argument('--headless')
mobile_emulation = {"deviceName": "Nexus 5"}
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)
chrome_options.add_argument('--proxy-server=%s' % proxy)
driver = webdriver.Chrome(
    'C:/chromedriver.exe',
    chrome_options=chrome_options)  #If you use Windows you must add .exe
#driver.set_window_size(480, 590)

i = 0


def loop1():
    global i
    sleep(10)
    try:
Ejemplo n.º 6
0
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import SoftwareName, HardwareType
from fp.fp import FreeProxy

logging.basicConfig(filename='Shopifylog.log',
                    filemode='a',
                    format='%(asctime)s - %(name)s - %(message)s',
                    level=logging.DEBUG)

software_names = [SoftwareName.CHROME.value]
hardware_type = [HardwareType.MOBILE__PHONE]
user_agent_rotator = UserAgent(software_names=software_names,
                               hardware_type=hardware_type)
CONFIG = dotenv.dotenv_values()

proxyObject = FreeProxy(country_id=[CONFIG['LOCATION']], rand=True)

INSTOCK = []


def check_url(url):
    """
    Checks whether the supplied URL is valid
    :return: Boolean - True if valid
    """
    return 'products.json' in url


def scrape_site(url, headers, proxy):
    """
    Scrapes the specified Shopify site and adds items to array
from fp.fp import FreeProxy
from requests.exceptions import ProxyError


total_sleeps = 0
latest_query_id = 0
webstyles = {}
sleep_time = 1800
scraped_queries = []
# the data structure for storing queries: {retailer: {30: [queryList], 1: [queryList2], 2: [queryList3]}}
queries = {}

default_header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', }

current_header = {"User-Agent": "Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0",}
first_proxy = FreeProxy(country_id=['US']).get()
current_proxy = {"http": first_proxy, }


class CustomFormatting:
    @staticmethod
    def format(listing):
        return listing


class CustomFilter:
    @staticmethod
    def filter(filtered_listing, unfiltered_listing):
        return filtered_listing

Ejemplo n.º 8
0
 def get_proxy():
     while 1:
         proxy = FreeProxy(country_id=['US'], rand=True).get()
         if proxy is not None:
             break
     return proxy
Ejemplo n.º 9
0
 def test_invalid_proxy(self):
     test = FreeProxy()
     test.get_proxy_list = MagicMock(return_value=['111.111.11:2222'])
     self.assertEqual(
         "There are no working proxies at this time.", test.get())
Ejemplo n.º 10
0
 def test_empty_proxy_list(self):
     test = FreeProxy()
     test.get_proxy_list = MagicMock(return_value=[])
     self.assertEqual(
         "There are no working proxies at this time.", test.get())
 def get_proxies():
     print('getting proxies...')
     proxy = FreeProxy(country_id='US').get()
     proxies = {'http': proxy}
     print(f'obtained proxies: {proxies}')
     return proxies
Ejemplo n.º 12
0
def scrape_scholar(query, pages=0, max_proxy_tries=5):
    '''  
    Name: scrape_scholar
    Description: Searches Google Scholar using query and returns data for results.
    Input:
    @query: search term
    @pages: number of pages (10 articles per page) to request
    @start_year: minimum number of words in body of text
    @log_path: file path for where to create log file
    Output: A pandas DataFrame with one paper per row
    '''

    generator = FreeProxy(rand=True)

    page_size = 10

    # create log file to write errors to
    log = open(f'{query}' + log_path + '.txt', 'w+')

    # initialize list which will contain all article data and be used for DataFrame
    rows = []

    # the number of the current result being pulled from google scholar
    index = 0

    results = str(1)
    
    num_tries = 0
    while num_tries<max_proxy_tries:
        # try-catch block that allows errors to be written in log file if they occur
        try:
            # proxy = generator.get()
            # print(proxy)

            # pg = ProxyGenerator()
            # pg.SingleProxy(http = "http://157.245.203.17:3128")
            scholarly.use_proxy(None)

            # creates a generator object for results for the query
            results = scholarly.search_pubs(query) #, start=0)

            # detects whether the limit has been passed, if there is one
            while not pages or index<page_size*pages:

                result = next(results)

                # retrieves current results object
                curr_result_bib = result.bib

                #instantiates current row container
                row = dict()

                # passes link to article
                row['Link'] = curr_result['url'] if 'url' in curr_result else np.nan

                # title of paper, removes quotes at the start and end if there
                row['Title'] = curr_result['title'] if 'title' in curr_result else np.nan

                # True if pdf is available, False otherwise
                # row['Accessible'] = bool(paper['repositoryDocument']['pdfStatus'])

                # page number paper would be on on the website assuming 10 papers per page
                row['Page number'] = index//page_size + 1

                # list of [initials last-name]
                row['Authors'] = curr_result['author'] if 'author' in curr_result else np.nan
                
                # checks published year
                row['Publish year'] = int(curr_result['year']) if 'year' in curr_result else np.nan

                # number of citations
                row['Citations'] = curr_result['cites'] if 'cites' in curr_result else np.nan

                # links to related articles
                row['Related articles'] = 'https://scholar.google.com/scholar?q=related:' + results['url_scholarbib'].split(':')[1] + ':scholar.google.com/&scioq=' + query + '&hl=en&as_sdt=0,14'

                # checks if publisher is available
                row['Publisher'] = curr_result['venue'] if 'venue' in curr_result else np.nan

                rows.append(row)
                index += 1
            # returns pandas DataFrame where each row is 1 paper
            return pd.DataFrame(rows)

        # write any errors to log file
        except Exception as e:
            # log.write(str(e))
            # print(str(e))
            # traceback.print_exc(file=sys.stdout)
            # log.write('\n')
            if rows:
                return pd.DataFrame(rows)
            if str(e) == "Cannot fetch the page from Google Scholar.":
                num_tries += 1
                continue
            else:
                return pd.DataFrame(rows)
    # returns partially filled DataFrame if failed
    return pd.DataFrame(rows)
Ejemplo n.º 13
0
from fp.fp import FreeProxy
import urllib.request, socket

proxy = FreeProxy(country_id=['GB', 'US']).get()
prox = proxy[7:]


def is_bad_proxy(pip):
    try:
        proxy_handler = urllib.request.ProxyHandler({'http': pip})
        opener = urllib.request.build_opener(proxy_handler)
        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
        urllib.request.install_opener(opener)
        sock = urllib.request.urlopen('http://www.google.com')
    except urllib.error.HTTPError as e:
        print('Error code: ', e.code)
        return e.code
    except Exception as detail:

        print("ERROR:", detail)
        return 1
    return 0


def check_proxy():
    prox = [proxy[7:]]
    for item in prox:
        if is_bad_proxy(item):
            print("Bad Proxy")
        else:
            print(item)
Ejemplo n.º 14
0
def get_proxy():
    curl = FreeProxy(country_id=['US', 'RU'], timeout=1).get()
    return {"http": curl, "https": curl}
Ejemplo n.º 15
0
# Parse the author names
file_in = sys.argv[1]
authornames = []
with open(file_in, 'r') as f:
    for line in f:
        line = line.split('\n')[0]
        authornames.append(line)

# Indicate what data to get (see Author class in https://pypi.org/project/scholarly/)
sections = ['basics', 'indices']
max_homonyms = 5

#pip install free-proxy
from fp.fp import FreeProxy
proxy = FreeProxy(rand=True, timeout=1, country_id=['NO']).get()
scholarly.use_proxy(http=proxy, https=proxy)

# Loop through the authors
t0 = time.time()
data = list({})
for i, authname in enumerate(authornames):
    hindices = []
    emails, names, affiliations, citedbys = [], [], [], []
    try:
        search_query = scholarly.search_author(authname)
        for _ in range(max_homonyms):
            try:
                author = next(search_query)
                tmp_data = author.fill(sections=sections)
                hindices.append(tmp_data.hindex)
Ejemplo n.º 16
0
 def get_torrent(self, url):
     print(
         colored('\n[-] CONNECTING TO PROXY SERVER',
                 'green',
                 attrs=['bold']))
     l = []
     USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"
     headers = {"user-agent": USER_AGENT}
     while 1:
         a = FreeProxy(rand=True, timeout=0.3).get().split('://')[1]
         if a in l:
             continue
         l.append(a)
         try:
             response = get(url,
                            proxies={
                                "http": a,
                                "https": a
                            },
                            headers=headers,
                            timeout=7)
         except:
             print(
                 colored('\n[-] ERROR CONNECTING TO PROXY ' + a +
                         '. TRYING WITH NEXT PROXY',
                         'white',
                         'on_red',
                         attrs=['bold']))
             continue
         if response.content:
             try:
                 s = BeautifulSoup(response.content, 'html.parser')
                 torrent = s.find(
                     'a', {'title': 'Download attachment'})['href']
             except:
                 print(
                     colored('\n[-] ERROR CONNECTING TO PROXY ' + a +
                             '. TRYING WITH NEXT PROXY',
                             'white',
                             'on_red',
                             attrs=['bold']))
                 continue
             print(
                 colored('\n[-] DOWNLOADING TORRENT FILE',
                         'green',
                         attrs=['bold']))
             try:
                 r = get(torrent,
                         allow_redirects=True,
                         proxies={
                             "http": a,
                             "https": a
                         },
                         headers=headers)
             except:
                 continue
             open(self.temp_file, 'wb').write(r.content)
             print(
                 colored('\n[-] DOWNLOADED TORRENT FILE',
                         'green',
                         attrs=['bold']))
             return self.temp_file
         else:
             continue
     return 0
Ejemplo n.º 17
0
 def add_free_proxies(self):
     for proxy in FreeProxy().get_proxy_list():
         self.add(proxy)
Ejemplo n.º 18
0
                                  proxies={
                                      'http': проксиадрес,
                                      'https': проксиадрес
                                  })  # делаем запрос к сайту, используя прокси
            запрос.encoding = 'utf-8'  #перекодируем результат запроса для корректного отображения кириллици
            суп = BeautifulSoup(
                запрос.text, 'html.parser'
            )  # 'lxml' создаем объект BeautifulSoup (суп) и передаём его конструктору. Вторая опция уточняет объект парсинга.
            break  # завершаем цикл
        except:  # если возникла ошибка (нет доступа через текущий прокси)
            print("Не удолось подключиться с прокси:", проксиадрес,
                  "Продолжаем.")  #сообщаем
            проксиадрес = FreeProxy(rand=True).get()  # выгружаем другой прокси


проксиадрес = FreeProxy(rand=True).get()  # выгружаем рабочий прокси
# ПАРСИНГ ЗАПИСЕЙ
итерация = 1  # номер страницы
while True:  # цикл перебора страниц
    if итерация == 1:
        адрес = "https://funkysouls.org/music/index.html"  # для первой страницы
    else:
        адрес = ("https://funkysouls.org/music/page/" + str(итерация) + ".html"
                 )  # для последующих
    база = json.loads(open('музыка.json', "r",
                           encoding='utf-8').read())  # открываем базу
    альбомы = [i['а'] for i in база
               ]  # создаем перечень существующих записей (альбомов)
    предбаза = []  # создаем пустую предбазу
    парсинг(адрес)  # запускаем функцию парсинга по адресу для получения супа
    массив = суп.find_all(
Ejemplo n.º 19
0
from bs4 import BeautifulSoup as scrap
import requests
import time
import schedule
from fp.fp import FreeProxy
proxy = FreeProxy().get()
list = []
counter = 1


def ScrapTorrent():
    global counter
    print(counter)
    counter += 1
    url = 'https://www.oxtorrent.cc/'
    response = requests.get(url, proxy=proxy)
    soup = scrap(response.text, 'lxml')
    table = soup.findAll('table', {'class': 'table table-hover'})
    for i in range(0, 7):
        for item in table[i].findAll('a'):
            list.append(item.text)
        if i == 0:
            print('Films : ')
        elif i == 1:
            print('Series : ')
        elif i == 2:
            print('Musiques : ')
        elif i == 3:
            print('Jeux pc  : ')
        elif i == 4:
            print('Jeux consoles : ')
Ejemplo n.º 20
0
import json
import logging
import traceback
import config

logging.basicConfig(filename='footlocker.log',
                    filemode='a',
                    format='%(asctime)s - %(name)s - %(message)s',
                    level=logging.DEBUG)

software_names = [SoftwareName.CHROME.value]
hardware_type = [HardwareType.MOBILE__PHONE]
user_agent_rotator = UserAgent(software_names=software_names,
                               hardware_type=hardware_type)

proxy_obj = FreeProxy(country_id=[config.FREE_PROXY_LOCATION])

INSTOCK = []


def test_webhook():
    """
    Sends a test Discord webhook notification
    """
    data = {
        "username":
        config.USERNAME,
        "avatar_url":
        config.AVATAR_URL,
        "embeds": [{
            "title": "Testing Webhook",
Ejemplo n.º 21
0
def getVideos(proxyStrategy=ProxyStrategy.NONE.value):
    proxy = None

    if proxyStrategy == ProxyStrategy.FREE_PROXY.value:
        try:
            proxy = FreeProxy(https=True, rand=True).get()
        except Exception as e:
            if len(triedFreeProxyProxies) < numberOfFreeProxyProxiesToTry:
                triedFreeProxyProxies.append(proxy)
                getVideos(ProxyStrategy.FREE_PROXY.value)
                return
            getVideos(ProxyStrategy.PROXYSCRAPE.value)
            return

    if proxyStrategy == ProxyStrategy.PROXYSCRAPE.value:
        try:
            proxy = get_proxy(excluded_proxies=triedProxyScrapeProxies)
        except Exception as e:
            print(
                json.dumps(
                    {'message': 'Couldn\'t find a working Proxy : ' + str(e)}))
            sys.exit()

    videos = []
    try:
        with TikTokApi(proxy=proxy) as api:
            videosGenerator = api.user(username=username).videos(
                count=numberOfVideos)
            for video in videosGenerator:
                videos.append(video)
    except exceptions.CaptchaException:
        if (proxyStrategy == ProxyStrategy.FREE_PROXY.value):
            if len(triedFreeProxyProxies) < numberOfFreeProxyProxiesToTry:
                triedFreeProxyProxies.append(proxy)
                getVideos(ProxyStrategy.FREE_PROXY.value)
                return

        if (proxyStrategy == ProxyStrategy.PROXYSCRAPE.value):
            if len(triedProxyScrapeProxies) < numberOfProxyScrapeProxiesToTry:
                triedProxyScrapeProxies.append(proxy)
                getVideos(ProxyStrategy.PROXYSCRAPE.value)
                return

        if proxyStrategy == lastProxyStrategyIndex:
            print(
                json.dumps(
                    {'message': 'TikTok blocked the request using a Captcha'}))
            sys.exit()

        getVideos(proxyStrategy + 1)
        return
    except exceptions.NotFoundException:
        print(json.dumps({'message': 'User not found'}))
        sys.exit()
    except Exception as e:
        if (proxyStrategy == ProxyStrategy.FREE_PROXY.value):
            if len(triedFreeProxyProxies) < numberOfFreeProxyProxiesToTry:
                triedFreeProxyProxies.append(proxy)
                getVideos(ProxyStrategy.FREE_PROXY.value)
                return

        if (proxyStrategy == ProxyStrategy.PROXYSCRAPE.value):
            if len(triedProxyScrapeProxies) < numberOfProxyScrapeProxiesToTry:
                triedProxyScrapeProxies.append(proxy)
                getVideos(ProxyStrategy.PROXYSCRAPE.value)
                return

        if proxyStrategy == lastProxyStrategyIndex:
            print(json.dumps({'message': str(e)}))
            sys.exit()

        getVideos(proxyStrategy + 1)
        return

    print(json.dumps(videos))
Ejemplo n.º 22
0
    async def __call__(self):
        UserCancel = KeyboardInterrupt
        
        # region various embed types creation
        def publication_embeds(result) -> discord.Embed:
            embed = discord.Embed(
                title=result["bib"]["title"],
                description=result["bib"]["abstract"],
                url=result["eprint_url"]
                if "eprint_url" in result.keys()
                else result["pub_url"],
            )
            embed.add_field(
                name="Authors",
                value=", ".join(result["bib"]["author"]).strip(),
                inline=True,
            )

            embed.add_field(name="Publisher", value=result["bib"]["venue"], inline=True)
            embed.add_field(
                name="Publication Year", value=result["bib"]["pub_year"], inline=True
            )
            embed.add_field(
                name="Cited By",
                value=result["num_citations"]
                if "num_citations" in result.keys()
                else "0",
                inline=True,
            )

            embed.add_field(
                name="Related Articles",
                value=f'https://scholar.google.com{result["url_related_articles"]}',
                inline=True,
            )

            embed.set_footer(text=f"Requested by {self.ctx.author}")
            return embed

        def author_embeds(result) -> discord.Embed:
            embed = discord.Embed(title=result["name"])
            embed.add_field(
                name="Cited By", value=f"{result['citedby']} articles", inline=True
            )
            embed.add_field(name="Scholar ID", value=result["scholar_id"], inline=True)
            embed.add_field(
                name="Affiliation",
                value=result["affiliation"]
                if "affiliation" in result.keys()
                else "None",
                inline=True,
            )
            embed.add_field(
                name="Interests",
                value=f"{', '.join(result['interests']) if 'interests' in result.keys() else 'None'}",
                inline=True,
            )
            embed.set_image(url=result["url_picture"])
            embed.set_footer(text=f"Requested by {self.ctx.author}")
            return embed

        def citation_embeds(result) -> discord.Embed:
            embed = discord.Embed(
                title=result["bib"]["title"],
                description=f"```{scholarly.bibtex(result)}```",
                url=result["eprint_url"]
                if "eprint_url" in result.keys()
                else result["pub_url"],
            )
            embed.set_footer(text=f"Requested by {self.ctx.author}")
            return embed

        # endregion

        try:
            # region user flags processing

            pg = ProxyGenerator()
            proxy = FreeProxy(rand=True, timeout=1, country_id=["BR"]).get()
            pg.SingleProxy(http=proxy, https=proxy)
            scholarly.use_proxy(pg)

            # self.args processing
            if self.args is None:
                results = [next(scholarly.search_pubs(self.query)) for _ in range(5)]
                embeds = list(map(publication_embeds, results))
            elif "author" in self.args:
                results = [
                    next(scholarly.search_author(self.query)) for _ in range(5)
                ]
                embeds = list(map(author_embeds, results))
            elif "cite" in self.args:
                results = scholarly.search_pubs(self.query)
                results = [results for _ in range(5)]
                embeds = list(map(citation_embeds, results))
            else:
                await self.message.edit(content="Invalid flag")
                return
            # endregion

            # sets the reactions for the search result
            if len(embeds) > 1:
                buttons = [[
                    {Button(style=ButtonStyle.grey, label="◀️", custom_id="◀️"): None},
                    {Button(style=ButtonStyle.red, label="🗑️", custom_id="🗑️"): None},
                    {Button(style=ButtonStyle.grey, label="▶️", custom_id="▶️"): None}
                ]]
            else:
                buttons = [[
                    Button(style=ButtonStyle.red, label="🗑️", custom_id="🗑️")
                ]]

            await Sudo.multi_page_system(self.bot, self.ctx, self.message, tuple(embeds), buttons)
            return

        except asyncio.TimeoutError:
            raise
        except (asyncio.CancelledError, discord.errors.NotFound):
            pass
        except scholarly_exceptions._navigator.MaxTriesExceededException:
            await self.message.edit(
                content="Google Scholar is currently blocking our requests. Please try again later"
            )
            Log.append_to_log(self.ctx, f"{self.ctx.command} error", "MaxTriesExceededException")
            return

        except Exception as e:
            await error_handler(self.bot, self.ctx, e, self.query)
        finally:
            return
Ejemplo n.º 23
0
 def test_invalid_proxy(self):
     test = FreeProxy()
     test.get_proxy_list = MagicMock(return_value=['111.111.11:2222'])
     with self.assertRaises(RuntimeError):
         test.get()
import time
import logging
import dotenv
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import SoftwareName, HardwareType
from fp.fp import FreeProxy

logging.basicConfig(filename='Footlockerlog.log', filemode='a', format='%(asctime)s - %(name)s - %(message)s',
                    level=logging.DEBUG)

software_names = [SoftwareName.CHROME.value]
hardware_type = [HardwareType.MOBILE__PHONE]
user_agent_rotator = UserAgent(software_names=software_names, hardware_type=hardware_type)
CONFIG = dotenv.dotenv_values()

proxyObject = FreeProxy(country_id=['GB'], rand=True)

INSTOCK = []


def discord_webhook(product_item):
    """
    Sends a Discord webhook notification to the specified webhook URL
    :param product_item: An array of the product's details
    :return: None
    """
    data = {}
    data["username"] = CONFIG['USERNAME']
    data["avatar_url"] = CONFIG['AVATAR_URL']
    data["embeds"] = []
    embed = {}
Ejemplo n.º 25
0
import requests
import time
from fp.fp import FreeProxy
from requests import ConnectionError
from itertools import cycle
import os


proxy_list = cycle(FreeProxy().get_proxy_list())


def get_html_by_url(url, headers=None):
    for proxy in proxy_list:
        try:
            response = requests.get(url, headers=headers, proxies={'http': proxy})
            if response.ok:
                return response.text
        except ConnectionError:
            time.sleep(.5)
            return get_html_by_url(url, headers)


def write_links_to_file(filename, header, links):
    path_to_file = f'{os.getcwd()}/links/{filename}'
    mode = 'a' if os.path.exists(path_to_file) else 'w'
    with open(path_to_file, mode) as file:
        file.write(header + '\n')
        file.write('\n'.join(links) + '\n')