Python ProxyRequests.getの例、proxy_requests.ProxyRequests.get Pythonの例

コード例 #1

0

ファイルを表示

def getRSS(curso):
    """
    Downloads xml rss files from https://side.utad.pt
    Stores them into feeds/curso.xml
    :param curso: string
    :return: True
    """

    if debug: print("getRSS", curso)

    feedRSS = "https://side.utad.pt/rss.pl?" + curso
    feedFile = "feeds/" + curso + ".xml"

    if path.exists(feedFile): remove(feedFile)

    try:
        r = ProxyRequests(feedRSS)
        r.get()
        with open(feedFile, 'wb') as f:
            f.write(r.get_raw())
        if round(path.getsize(feedFile)) < 700:
            getRSS(curso)

    except (requests.exceptions.ConnectionError,
            requests.exceptions.ReadTimeout, requests.exceptions.ProxyError,
            urllib3.exceptions.MaxRetryError):
        getRSS(curso)

コード例 #2

0

ファイルを表示

def test_get():
    r = ProxyRequests('https://api.ipify.org')
    r.get()
    assert r.get_status_code() == 200
    try:
        inet_aton(r.__str__())
    except Exception:
        pytest.fail('Invalid IP address in response')
    print(r.get_proxy_used())

コード例 #3

0

ファイルを表示

def parse_person():
    data = {}
    for person in models.Person.query.all():  #.filter_by(name_original=None)
        while True:
            try:
                r = ProxyRequests(f'{URL}{person.links}')
            except:
                break
            r.get()
            r.encoding = 'utf-8'
            text = r.request
            soup = BeautifulSoup(text, 'html.parser')
            if not soup.find('h1', {'itemprop': 'name'}):
                continue
            alternateName = soup.find('span', {'itemprop': 'alternateName'})
            if alternateName:
                person.name_original = alternateName.text
            else:
                person.name_original = person.name
            db.session.add(person)
            db.session.commit()

            list_career = []
            director = soup.find('a', {'href': '#director'})
            if director:
                egge = director.text.replace(' ', '')
                if not models.Career.query.filter_by(name=egge).first():
                    new_career = models.Career(name=egge)
                    db.session.add(new_career)
                    db.session.commit()
                    list_career.append(new_career)
                else:
                    list_career.append(
                        models.Career.query.filter_by(name=egge).first())

            actor = soup.find('a', {'href': '#actor'})
            if actor:
                egge = actor.text.replace(' ', '')
                if not models.Career.query.filter_by(name=egge).first():
                    new_career = models.Career(name=egge)
                    db.session.add(new_career)
                    db.session.commit()
                    list_career.append(new_career)
                else:
                    list_career.append(
                        models.Career.query.filter_by(name=egge).first())

            person.career.clear()
            for i in list_career:
                person.career.append(i)
            db.session.add(person)
            db.session.commit()
            break

コード例 #4

0

ファイルを表示

ファイル: updated_scraper.py プロジェクト: tishyakhanna97/cs4225-project

def fetch_with_proxy(url, headers):
    r = ProxyRequests(url)
    if headers:
        r.set_headers(headers)
        r.get_with_headers()
    else:
        r.get()

    status_code = r.get_status_code()
    if status_code != 200:
        print(f"{status_code}: {url}")

    return r.get_raw()

コード例 #5

0

ファイルを表示

def get_current():
    url = "https://acrnm.com"
    site = ProxyRequests(url)
    
    failures = 0

    while True:

        print("Checking if new products are on ACRNM on proxy: {}".format(site.proxy_used))
        if not site.get().ok:
            print("Proxy or website is unresponsive. Trying again...")
            failures += 1
            site.proxy_used = site.sockets.pop(0)
            continue
        else:
            failures = 0
        
        tree = html.fromstring(str(site))
        tree.make_links_absolute(url)

        prod_names = tree.xpath("//div[@class='name']/text()")
        prod_urls = tree.xpath("//a[contains(concat(' ', normalize-space(@class), ' '), ' tile ')]/@href")

        new, restock = db.new_items(prod_names, prod_urls)

        if new:
            new = list(zip(*new))
            notify(new[1], restock)
            db.insert_products(new[0])
        else:
            notify(new, restock)

        db.insert_current(prod_names, prod_urls)

コード例 #6

0

ファイルを表示

ファイル: app.py プロジェクト: naveen1000/displayboard

def cricket(mid):
    while True:
        try:
            r = ProxyRequests('http://mapps.cricbuzz.com/cbzios/match/' + mid +
                              '/leanback.json')
            r.get()
            a = str(r)
            data = json.loads(a)
            bat = data['bat_team']['name']
            bow = data['bow_team']['name']
            score = int(data["comm_lines"][0]["score"])
            wicket = int(data["comm_lines"][0]["wkts"])
            over = float(data['bat_team']['innings'][0]['overs'])
            detailed_score = data["comm_lines"][0]["score"] + "/" + data[
                "comm_lines"][0]["wkts"] + " " + data['bat_team']['innings'][
                    0]['overs']
            try:
                bowler = data['bowler'][0]['name']
                batname0 = data['batsman'][0]['name']
                batname1 = data['batsman'][1]['name']
                bat0score = data['batsman'][0]['r']
                bat1score = data['batsman'][1]['r']
                bat0ball = data['batsman'][0]['b']
                bat1ball = data['batsman'][1]['b']
                bowler = bow + ":" + data['bowler'][0]['name']
                batters = batname0 + "(" + bat0score + "-" + bat0ball + ")" + batname1 + "(" + bat1score + "-" + bat1ball + ")"
                detailed_score = bat + ":" + data["comm_lines"][0][
                    "score"] + "/" + data["comm_lines"][0][
                        "wkts"] + " " + data['bat_team']['innings'][0]['overs']
            except:
                print(
                    "An exception occurred fetching either batters or bowler")
            try:
                txt = bowler + " " + batters
                print(detailed_score + " " + txt)
                aio.send('message', detailed_score + " " + txt)
            except:
                print("An exception occurred sending")
        except:
            print("An exception occurred start")
        time.sleep(10)
        global stop_threads
        if stop_threads:
            print('Stopped Cricket')
            break

コード例 #7

0

ファイルを表示

def all_team_names(url_root):
    url = os.path.join(url_root, "teams") + "/"
    r = ProxyRequests(url)
    r.get()
    # print ip used
    print(r.get_proxy_used())
    soup = BeautifulSoup(r.get_raw(), "html.parser")
    tabs = soup.find_all("table")
    # active franchise: tabs[0] bc two tables on url, then pd_read_html returns a list
    df_active = pd.read_html(tabs[0].prettify())[0]
    # filter to max years, which is the main franchise. Do you need this?

    # Extract all the hrefs for the active teams:
    team_a_links = tabs[0].find_all("a", href=True)
    team_names = {
        t["href"].replace("teams", "").replace("/", ""): t.text
        for t in team_a_links if "/teams/" in t["href"]
    }
    return team_names

コード例 #8

0

ファイルを表示

def listofMatches():
    try:
        url='http://mapps.cricbuzz.com/cbzios/match/livematches'
        r = ProxyRequests(url)
        r.get()
        a=str(r)
        data=json.loads(a)
        matches=[]
        match_id=[]
        for i in data['matches']:
            matches.append(i)
        for i in matches:
            t= time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(i['header']['start_time'])))
            match_id.append(i['match_id'])
            print(i['match_id']+' '+ t +' '+i['series_name'])
        return match_id[0]    
    except:
        print("An exception occurred auto updating mid")
        time.sleep(2)
        listofMatches()

コード例 #9

0

ファイルを表示

def prefetch():
    try:
        print("Pre-fetching")
        print(config.ur)
        r = ProxyRequests(config.ur)
        r.get()
        a=str(r)
        data=json.loads(a)
        config.series_name = data["series_name"]
        config.bat_team_name=data['bat_team']['name']
        config.twicket=int(data["comm_lines"][0]["wkts"])
        config.twicket=config.twicket+1
        config.tover=int(float(data['bat_team']['innings'][0]['overs']))
        config.tover=config.tover+1
        config.series_name="--"+config.series_name+"--"
        print(config.series_name+'\n'+config.bat_team_name)
    except:
        print("An exception occurred prefetching")
        time.sleep(5)
        prefetch()

コード例 #10

0

ファイルを表示

ファイル: wc_mba_images.py プロジェクト: Flo95x/mba-pipeline

    def crawl_img(image_row):
        asin = image_row["asin"]
        url_image_hq = image_row["url_image_hq"]
        print(asin)
        r = ProxyRequests(url_image_hq)
        r.get()
        print("Proxy used: " + str(r.get_proxy_used()))
        if 200 == r.get_status_code():
            print(r.get_status_code())
            # save image locally
            with open("data/shirts/shirt.jpg", 'wb') as f:
                f.write(r.get_raw())

            #df_img = pd.DataFrame(data={"asin":[asin],"url":["https://storage.cloud.google.com/5c0ae2727a254b608a4ee55a15a05fb7/mba-shirts/"+marketplace+"/"+asin+".jpg"],"url_gs":["gs://5c0ae2727a254b608a4ee55a15a05fb7/mba-shirts/"+marketplace+"/"+asin+".jpg"],"url_mba_lowq":[url_image_lowq],"url_mba_hq":[url_image_hq], "timestamp":[datetime.datetime.now()]}, dtype=np.object)
            #df_imgs = df_imgs.append(df_img)
            #utils.upload_blob("5c0ae2727a254b608a4ee55a15a05fb7", "data/shirts/shirt.jpg", "mba-shirts/"+marketplace+"/" + asin + ".jpg")

            print("Successfully crawled image: %s" % (asin))
        else:
            print("Could not crawl image: %s" % (asin))

コード例 #11

0

ファイルを表示

def codechecker(code):
        try:
                r = ProxyRequests("https://discordapp.com/api/v6/entitlements/gift-codes/$%s?with_application=false&with_subscription_plan=true" % (code))
                r.get()
                JsonResponse = r.get_json()
                Response = JsonResponse["message"]
                if Response == "Unknown Gift Code":
                        print(f"\x1b[31;1mInvaild Code {code}\n")
                        return
                if Response == "You are being rate limited.":
                        return
                        print("\x1b[31;1mYou Are Being Rate Limited.")
                else:
                        print(f"\x1b[31;1mFound Working Code {code} Site Response:{Response}\n")
                        response = ProxyRequests("https://discordapp.com/api/v6/entitlements/gift-codes/{code}/redeem", json={"channel_id":str(message.channel.id)}, headers={'authorization':token})
                        redeemedcode = (response.text)
                        return
        except Exception as e:
                print(e)
                return

コード例 #12

0

ファイルを表示

def parse_links():
    page = 1
    last_page = 1
    data = {}

    while page <= last_page:
        r = ProxyRequests(f'{URL}/top/navigator/m_act[rating]/1%3A/order/rating/page/{page}/#results')
        r.get()
        r.encoding = 'utf-8'
        text = r.request
        soup = BeautifulSoup(text)
        if last_page == 1:
            try:
                last_link = soup.find_all('li', {'class': 'arr'})[-1].find('a').get('href')
                last_page = int(re.findall(r'\d{2,}', last_link)[0])
            except:
                continue

        movie_link = soup.find_all('div', {'class': '_NO_HIGHLIGHT_'})
        if not movie_link:
            continue

        for i in movie_link:
            i_soup = BeautifulSoup(f'b{i}').find('div', {'class': 'name'}).find('a')
            i_text = i_soup.text
            i_link = i_soup.get('href')
            id_film = int(re.findall(r'\d{1,}', i_link)[1])
            if models.Film.query.filter_by(id_film=id_film).first() == None:
                film = models.Film(id_film=id_film, links=i_link, name=i_text)
                db.session.add(film)
                try:
                    db.session.commit()
                except Exception:
                    db.session.rollback()
                    data[i_text] = {page:i_link}
                    continue

        page += 1
    with open('data.txt', 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False)

コード例 #13

0

ファイルを表示

ファイル: wc_mba_images.py プロジェクト: KuenstlicheIntelligenz/mba-pipeline

def main(argv):
    parser = argparse.ArgumentParser(description='')
    parser.add_argument(
        'marketplace',
        help='Shortcut of mba marketplace. I.e "com" or "de", "uk"',
        type=str)
    parser.add_argument(
        '--number_images',
        default=10,
        type=int,
        help=
        'Number of images that shoul be crawled. If 0, every image that is not already crawled will be crawled.'
    )

    # if python file path is in argv remove it
    if ".py" in argv[0]:
        argv = argv[1:len(argv)]

    # get all arguments
    args = parser.parse_args(argv)
    marketplace = args.marketplace
    number_images = args.number_images

    # get all arguments
    args = parser.parse_args()

    # get already crawled asin list
    #asin_crawled_list = get_asin_images_crawled("mba_de.products_images")

    df_images = get_images_urls_not_crawled(marketplace)

    # if number_images is equal to 0, evry image should be crawled
    if number_images == 0:
        number_images = len(df_images)

    for j, image_row in df_images.iloc[0:number_images].iterrows():
        asin = image_row["asin"]
        url_image_hq = image_row["url_image_hq"]
        url_image_lowq = image_row["url_image_lowq"]

        #headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}
        #proxy_list = get_proxies("de", True)
        #proxy = next(iter(proxy_list))
        #proxies={"http": proxy, "https": proxy}

        r = ProxyRequests(url_image_hq)
        r.get()
        print("Proxy used: " + str(r.get_proxy_used()))
        if 200 == r.get_status_code():
            print(r.get_status_code())
            # save image locally
            with open("data/shirts/shirt.jpg", 'wb') as f:
                f.write(r.get_raw())

            utils.upload_blob(
                "5c0ae2727a254b608a4ee55a15a05fb7", "data/shirts/shirt.jpg",
                "mba-shirts/" + marketplace + "/" + asin + ".jpg")
            df_img = pd.DataFrame(data={
                "asin": [asin],
                "url": [
                    "https://storage.cloud.google.com/5c0ae2727a254b608a4ee55a15a05fb7/mba-shirts/"
                    + marketplace + "/" + asin + ".jpg"
                ],
                "url_gs": [
                    "gs://5c0ae2727a254b608a4ee55a15a05fb7/mba-shirts/" +
                    marketplace + "/" + asin + ".jpg"
                ],
                "url_mba_lowq": [url_image_lowq],
                "url_mba_hq": [url_image_hq],
                "timestamp": [datetime.datetime.now()]
            },
                                  dtype=np.object)
            df_img['timestamp'] = df_img['timestamp'].astype('datetime64')
            df_img.to_gbq("mba_" + marketplace + ".products_images",
                          project_id="mba-pipeline",
                          if_exists="append")
            print("Successfully crawled image: %s | %s of %s" %
                  (asin, j + 1, number_images))
        else:
            print("Could not crawl image: %s | %s of %s" (asin, j + 1,
                                                          number_images))

        #response = requests.get(quote_plus(url_image_hq),proxies=proxies,headers=headers, stream=True)
        test = 0

    bucket_name = "5c0ae2727a254b608a4ee55a15a05fb7"
    folder_name = "mba-shirts"
    file_path = "mba-pipeline/crawler/mba/data/test.jpg"
    #upload_blob("5c0ae2727a254b608a4ee55a15a05fb7", file_path , "mba-shirts/test.jpg")

    test = 0

コード例 #14

0

ファイルを表示

ファイル: pars_film.py プロジェクト: AndrewYr/film_for_the_night

def parse_films():
    engine = create_engine('sqlite:///:memory:', echo=True)

    data = {}
    for film in models.Film.query.filter_by(rating_kp=None).all():  #
        while True:
            try:
                r = ProxyRequests(f'{URL}{film.links}')
            except:
                break
            r.get()
            r.encoding = 'utf-8'
            text = r.request
            soup = BeautifulSoup(text, 'html.parser')
            genres = soup.find('span', {'itemprop': 'genre'})
            if genres:
                genres = genres.find_all('a')
                countrys = soup.find_all(
                    'div', {'style': 'position: relative'})[1].find_all('a')
                persons = soup.find_all('li', {'itemprop': 'actors'})
                for director in soup.find_all('td', {'itemprop': 'director'}):
                    persons.append(director)
                break
        list_genres = []
        for genre in genres:
            if not models.Genre.query.filter_by(name=genre.text).first():
                while True:
                    new_genre = models.Genre(name=genre.text)
                    db.session.add(new_genre)
                    try:
                        db.session.commit()
                        list_genres.append(new_genre)
                        break
                    except Exception:
                        db.session.rollback()
            else:
                list_genres.append(
                    models.Genre.query.filter_by(name=genre.text).first())

        list_countrys = []
        for country in countrys:
            if not models.Country.query.filter_by(name=country.text).first():
                while True:
                    new_country = models.Country(name=country.text)
                    db.session.add(new_country)
                    try:
                        db.session.commit()
                        list_countrys.append(new_country)
                        break
                    except Exception:
                        db.session.rollback()
            else:
                list_countrys.append(
                    models.Country.query.filter_by(name=country.text).first())

        list_person = []
        for person in persons:
            if person.find('a').text.replace(' ', '') == '...':
                break
            person_link = person.find('a').get('href')
            if not models.Person.query.filter_by(id_person_kp=int(
                    re.findall(r'\d{1,}', person_link)[0])).first():
                while True:
                    # person_link = person.find('a').get('href')
                    if models.Person.query.filter_by(id_person_kp=int(
                            re.findall(r'\d{1,}', person_link)[0])).first():
                        break
                    id_person_kp = int(re.findall(r'\d{1,}', person_link)[0])
                    new_person = models.Person(name=person.text,
                                               links=person_link,
                                               id_person_kp=id_person_kp)
                    db.session.add(new_person)
                    try:
                        db.session.commit()
                        list_person.append(new_person)
                        break
                    except Exception:
                        db.session.rollback()
            else:
                if not models.Person.query.filter_by(id_person_kp=int(
                        re.findall(r'\d{1,}', person_link)
                    [0])).first() in list_person:
                    list_person.append(
                        models.Person.query.filter_by(id_person_kp=int(
                            re.findall(r'\d{1,}', person_link)[0])).first())

        # if not film.description:
        while True:
            try:
                film.name = soup.find('span', {
                    'class': 'moviename-title-wrapper'
                }).text
                film.name_original = film.name if not soup.find(
                    'span', {
                        'class': 'alternativeHeadline'
                    }).text else soup.find('span', {
                        'class': 'alternativeHeadline'
                    }).text
                film.description = soup.find('div', {
                    'itemprop': 'description'
                }).text.replace(chr(151), '-')
                film.rating_kp = float(
                    soup.find('span', {
                        'class': 'rating_ball'
                    }).text)
                film.rating_imdb = float(
                    re.findall(
                        r'[\d][^ ]+',
                        soup.find('div', {
                            'style':
                            'color:#999;font:100 11px tahoma, verdana'
                        }).text)[0])
                film.date_released = int(
                    soup.find('div', {
                        'style': 'position: relative'
                    }).find('a').text)
                try:
                    db.session.commit()
                except Exception:
                    db.session.rollback()
                    continue
                film.genre.clear()
                film.country.clear()
                film.person.clear()

                while True:
                    for i in list_genres:
                        film.genre.append(i)
                    for i in list_countrys:
                        film.country.append(i)
                    for i in list_person:
                        film.person.append(i)
                    db.session.add(film)
                    try:
                        db.session.commit()
                        break
                    except Exception:
                        db.session.rollback()
                break

            except:
                db.session.rollback()

コード例 #15

0

ファイルを表示

def score():
    try:
        r = ProxyRequests(config.ur)
        r.get()
        a = str(r)
        data = json.loads(a)
        score = int(data["comm_lines"][0]["score"])
        wicket = int(data["comm_lines"][0]["wkts"])
        over = float(data['bat_team']['innings'][0]['overs'])
        detailed_score = config.bat_team_name + " " + data["comm_lines"][0][
            "score"] + "/" + data["comm_lines"][0]["wkts"] + " " + data[
                'bat_team']['innings'][0]['overs']
        print(detailed_score, end=" ")

        try:
            bowler = data['bowler'][0]['name']
            print("B:" + bowler)
            batname0 = data['batsman'][0]['name']
            batname1 = data['batsman'][1]['name']
            bat0score = data['batsman'][0]['r']
            bat1score = data['batsman'][1]['r']
            bat0ball = data['batsman'][0]['b']
            bat1ball = data['batsman'][1]['b']
            bowler = data['bowler'][0]['name']
            batters = batname0 + "*(" + bat0score + "-" + bat0ball + ") " + batname1 + "(" + bat1score + "-" + bat1ball + ")"
            print(batters)
            fputOnRdb(detailed_score + "     B: " + bowler + "\n" + batters +
                      "\nRecent:\n" + data['prev_overs'])
        except:
            print("An exception occurred fetching either batters or bowler")
        try:
            if (over == (config.tover - 1.0 + 0.5)):
                global bow
                bow = bowler
            if over == config.tover:
                prev_overs = data['prev_overs']
                prev_over = prev_overs.split('|')
                msg = detailed_score + " B:" + bow + "\n" + batters + "\n" + prev_over[
                    -1]
                print(msg)
                notify(msg)
                config.tover = config.tover + 1
                fbpush(msg)
                updateRegIds()
                time.sleep(10)
            if wicket == config.twicket:
                msg = "wicket " + str(
                    config.twicket
                ) + " " + data['last_wkt_name'] + " " + data[
                    'last_wkt_score'] + " B: " + bowler + "\n" + detailed_score
                fbpush(msg)
                notify(msg)
                config.twicket = config.twicket + 1
                time.sleep(15)
            if (int(over + 1) != config.tover):
                updateRegIds()
                prefetch()
            if ((wicket + 1) != config.twicket):
                updateRegIds()
                prefetch()
        except:
            print("An exception occurred while trying to notify")
    except:
        print("An exception occurred fetching score")

コード例 #16

0

ファイルを表示

ファイル: ndclookup.py プロジェクト: brettbj/association-robustness

def rotate_proxy(test_url=BASE_URL + '/version'):
    rotator = ProxyRequests(test_url)
    rotator.get()
    proxy = rotator.get_proxy_used
    proxies = {'http': 'http://%s' % proxy, 'https': 'https://%s' % proxy}

コード例 #17

0

ファイルを表示

from proxy_requests import ProxyRequests
from proxyValidator import ProxyValidator

# proxyInstance = ProxyValidator(['207.154.231.217:3128'])
# print(proxyInstance.validated_proxies)

r = ProxyRequests("https://api.ipify.org")
print(r.get())

コード例 #18

0

ファイルを表示

    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10',
    'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
    'Opera/9.80 (Macintosh; Intel Mac OS X 10.14.1) Presto/2.12.388 Version/12.16',
    'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14'
]
user_agent = random.choice(user_agent_list)

#proxy_list = get_proxies()
#proxy =  random.choice(proxy_list)

#request = urllib.request.Request(url,headers={'User-Agent': user_agent})
#response = urllib.request.urlopen(request)

#response = requests.get(url,proxies={"http": proxy, "https": proxy},headers={'User-Agent': user_agent})

r = ProxyRequests(url)
r.get()
html = str(r)  #response.content

soup = BeautifulSoup(html, 'html.parser')
print(soup.contents[36].table.tr.next_sibling.next_sibling.next_sibling.
      next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.
      next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.
      next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.
      next_sibling.next_sibling.td.next_sibling.next_sibling.next_sibling.
      next_sibling.next_sibling.next_sibling.next_sibling.b.string)

#/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[11]/td[6]/b