def run(): tr = TorRequest( password='******') response = tr.get(site, headers=headers[0], verify=True) response = tr.get(site1, headers=headers[0], verify=True) print '[' + str(i) + ']' + ' Blog View Added With IP:' + tr.get( 'http://ipecho.net/plain').content tr.reset_identity()
def tor_request(url): """ Make a get request with Tor Proxy Args: - url (string) : url from which we want to fetch data Return: response : Response from get request """ tr = TorRequest(password="******") print(f"Scrapped {url} with Ip Address",tr.get("http://ipecho.net/plain").text) response = tr.get(url) tr.reset_identity() return response
def tor(): request = TorRequest() response = request.get("http://httpbin.org/ip") ip_address = json.loads(response.content)["origin"] response = requests.get(f"http://ip-api.com/json/{ip_address}") data = json.loads(response.content) city = data["city"] country = data["country"] return f"{country} - {city}"
def get_last_page_num(stock): tr = TorRequest(proxy_port=9050, ctrl_port=9051, password=None) headers = {'User-Agent': random.choice(browsers)} target_url = "https://finance.naver.com/item/sise_day.nhn?code=%s&page=1" % stock.code r = tr.get(target_url, headers=headers) # page_re = re.compile(r'page=(\d+)') # s = BeautifulSoup(r.text, 'lxml') # rr = s.find('td', {"class": "pgRR"}) # rr_href = rr.a['href'] # m = re.search(r.text, rr_href) # return int(m[1]) return 1
def tor_identity(): from torrequest import TorRequest global ip print("Loading new Tor identity ...") tr = TorRequest() response = requests.get('http://ipecho.net/plain') print("My Original IP Address:", response.text) tr.reset_identity() #Reset Tor identity response = tr.get('http://ipecho.net/plain') ip = response.text print("New Ip Address", response.text)
class Ecosia: """ Ecosia Search (through TOR nodes). Using tor is the standard mode, but it needs some extra config, in order to work on your machine. See these links for the config and possible errors: > https://www.scrapehero.com/make-anonymous-requests-using-tor-python/ > https://stackoverflow.com/questions/49470261/tor-failing-to-run-with-failed-to-bind-one-of-the-listener-ports """ def __init__(self, isTor=True): self.searchURL = "https://www.ecosia.org/search?q=" # init generators self.rhGen = RequestHeaderGenerator() self.stGen = SearchTermGenerator() self.isTor = isTor self.searches = 0 if self.isTor: # this password needs to be set in your .env file # simply create a new file and paste the password you set # while configuring tor. Make sure that the .env file is # in the same dir as the .config file self.tr = TorRequest(password=TOR_PASS) def _buildUrl(self): """ build search url with given search terms and make sure, that they are correctly encoded """ return self.searchURL + "+".join( list(map(urllib.parse.quote, self.stGen.getSearchTerm()))) def search(self): """ requests ecosia search results page has 2 modes: anonymous reqs via tor or normal ones to avoid blocking for normal reqs I generate for every req a new req header """ url = self._buildUrl() if self.isTor: self.tr.reset_identity() response = self.tr.get(url) else: print("Tor Option disabled") response = requests.get( url, headers=self.rhGen.getRandomRequestHeader()) if int(response.status_code) == 200: self.searches += 1 print( f"Performed request to url: {url}, \nGot status code: {response.status_code}" ) return response.status_code
def tor_reset(): global tor global header try: tor = TorRequest(password='******') tor.reset_identity() except: tor.close() tor_1 = TorRequest(password='******') tor = tor_1 tor.reset_identity() response = tor.get('http://ipecho.net/plain') print("Ip Address has changed: ", response.text)
def check_tor(password): ''' Check if we are connected via tor. ''' # Requirements import sys from torrequest import TorRequest # Add HashedControlPass. tr = TorRequest(password=password) # Check that we are connected via tor. url = 'https://check.torproject.org' response = tr.get(url) txt = response.text status = txt[txt.find('<title>') + 7:txt.find('</title>')].split('\n')[2].lstrip() print(status, file=sys.stderr)
def get_financial(tor: TorRequest, company: str) -> pd.DataFrame: """ Get accountability information on company """ financial_df = pd.DataFrame() # writer = pd.ExcelWriter('XLS/{}.xlsx'.format(company)) for elements_financier in accountant: r = tor.get(url + "{}/{}?p={}".format(company, elements_financier, company)) if r.status_code != 200: print(r.status_code, ":", r.reason) time.sleep(10) financial_df = get_financial(tor, company) return financial_df soup = BeautifulSoup(r.text, "lxml") tables = soup.find_all('table') df = pd.DataFrame() raw = [] for table in tables: tr = table.find_all('tr') for row in tr: td = row.find_all('td') # Catch if this is a title if len(td) == 1: data = str(td[0].find(text=True)) raw.append(data) df = df.append([raw]) raw = [] continue # Add a line with a temporary raw for element in td: data = str(element.find(text=True)) raw.append(data) df = df.append([raw]) del raw[:] df.set_index([0], inplace=True) # df.to_excel(writer, elements_financier) financial_df = pd.concat([financial_df, df]) # writer.save() return financial_df
def assign_new_ip(text=False): """ Reset the identity using TorRequest Parameters ---------- arg1 [OPTIONAL]| text: bool A boolean flag to return the IP address tuple (old, morphed) Returns ------- boolean True/False """ try: # pass the hashed password req = TorRequest( password= '******') # return the ip address normal_identity = requests.get('http://ipecho.net/plain') # reset the identity using Tor req.reset_identity() # make a request now morphed_identity = req.get('http://ipecho.net/plain') # return the status depending on the flag if morphed_identity != normal_identity: if text == True: # return the ip address pairs as a tuple return (normal_identity.text, morphed_identity.text) else: return True else: # return just the status return False except: return False
def randomize_ip(password, quiet=False): ''' Randomize IP addredss with tor. Reset tor to randomize your IP address. Takes your tor hashed control password as an argument. Requires that you have set HashedControlPassword variable in the tor configuration file. ''' # Requirements import sys from torrequest import TorRequest # Add HashedControlPass. tr = TorRequest(password=password) # Reset Tor. tr.reset_identity() # Check new ip. response = tr.get('http://ipecho.net/plain') ip = response.text if not quiet: print("IP address is set to: {}".format(ip), file=sys.stderr) return (ip)
def tor_session(password): ''' tor_session ''' # Requirements import sys from torrequest import TorRequest # Add HashedControlPass. tr = TorRequest(password=password) session = tr.session url = 'https://check.torproject.org' response = tr.get(url) txt = response.text status = txt[txt.find('<title>') + 7:txt.find('</title>')].split('\n')[2].lstrip() print(status, file=sys.stderr) if status is "Sorry. You are not using Tor.": print("Continue only at your own risk.", file=sys.stderr) #EIF return (session)
def get_connection(links_site, torR): headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} for i in range(15): try: resp = torR.get(links_site, headers=headers, timeout=5) return resp except requests.exceptions.Timeout: print("Reconnect" + str(i+1)) with Controller.from_port(port = 9051) as controller: controller.authenticate(password='******') print("Success!") controller.signal(Signal.NEWNYM) print("New Tor connection processed") torR=TorRequest(password='') torR.reset_identity() #Reset Tor response= torR.get('http://ipecho.net/plain') print("New Ip Address",response.text) pass return resp
def pageScan(link): li = [] ua = UserAgent() headers = { 'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36 ua.random' } tr = TorRequest(1234) tr.reset_identity() url = tr.get(link).text soup = BeautifulSoup(url, 'lxml') try: links = soup.find('div', id='atfResults').find( 'ul', id='s-results-list-atf').find_all('li', class_='s-result-item') for l in links: li.append(l['data-asin']) except (AttributeError, TypeError): pass return li
def get_summary(tor: TorRequest, company: str) -> pd.DataFrame: """ Get additional information on company """ summary_info = pd.DataFrame() raw = [] r = tor.get(url + "{}?p={}".format(company, company)) if r.status_code != 200: print(r.status_code, ":", r.reason) soup = BeautifulSoup(r.text, "lxml") tables = soup.find_all('table') for table in tables: spans = table.find_all('span') for element in spans: raw.append(element.get_text()) if len(raw) == 2: summary_info = summary_info.append([raw]) del raw[:] summary_info.set_index(summary_info.iloc[:, 0], inplace=True) return summary_info
def make_request(url, headers, error_type, social_network, verbose=False, tor=False, unique_tor=False): r = TorRequest() if (tor or unique_tor) else requests try: rsp = r.get(url, headers=headers) if unique_tor: r.reset_identity() if rsp.status_code: return rsp, error_type except requests.exceptions.HTTPError as errh: print_error(errh, "HTTP Error:", social_network, verbose) except requests.exceptions.ConnectionError as errc: print_error(errc, "Error Connecting:", social_network, verbose) except requests.exceptions.Timeout as errt: print_error(errt, "Timeout Error:", social_network, verbose) except requests.exceptions.RequestException as err: print_error(err, "Unknown error:", social_network, verbose) return None, ""
from torrequest import TorRequest import random from bs4 import BeautifulSoup from Word import Word from constants import BLANK, browsers tr = TorRequest(proxy_port=9050, ctrl_port=9051, password=None) headers = {'User-Agent': random.choice(browsers)} words = list() with open('gre.csv') as f: for l in f: words.append(Word(l.strip())) for w in words: print(w.word) re_ko = tr.get( "https://endic.naver.com/search.nhn?sLn=en&searchOption=all&query=%s" % w.word.replace(' ', BLANK), headers=headers) s_ko = BeautifulSoup(re_ko.text, 'lxml') dl = s_ko.findAll('dl', {'class': 'list_e2'}) print(dl) break
for data in words: writer.writerow(data) failed = list() for i, r in enumerate(words): syns = r['syn'].split(',') syns = [s.strip() for s in syns] print(i / len(words) * 100, '%') # print(syns) # break before = len(syns) try: result = tr.get( 'https://en.wiktionary.org/wiki/Thesaurus:%s' % r['word'].replace(' ', '_'), headers=headers) #% r['word'].replace(' ', '_'), headers=headers) s = BeautifulSoup(result.text, 'lxml') syn = s.find('span', id='Synonyms') ls = syn.findParent().fetchNextSiblings()[0].findAll('li') for l in ls: syns.append(l.text) syns = list(set(syns)) after = len(syns) print(after - before, 'words added.') r['syn'] = ', '.join(syns) save_words(words) # break
print('filednames:', fieldnames) words = list() with open('gre2020.csv') as f: reader = csv.DictReader(f) for row in reader: words.append(row) new_words = list() for w in words: print(w['word']) try: re = tr.get( "https://endic.naver.com/search.nhn?sLn=en&query=%s&searchOption=all&isOnlyViewEE=Y" % w['word'].replace(' ', BLANK), headers=headers) s = BeautifulSoup(re.text, 'lxml') content_div = s.find('div', {'id': 'content'}) dl_e2 = content_div.find('dl', {'class': 'list_e2'}) dd = dl_e2.find('dd') k09 = dd.find('span', {'class': 'fnt_k09'}) if k09 != None: pof = k09.text else: pof = '' # k05 = k09.find_next_sibling() k05 = dd.find('span', {'class': 'fnt_k05'})
lnght = len(ListOfData) x = 0 delays = [random() for _ in range(10)] * 5 with open("news_2.py", "w") as fo: fo.writelines("BetterData = [\n") while True: if x >= lnght: break if x % 8 == 0: tr = TorRequest(password='******') tr.reset_identity() link = ListOfData[x][0] response = tr.get(ANOTHER_URL + link) soup = bs(response.content, "lxml") body = soup.body error_count = 0 if error_count >= 5: print("Restarting from %d" % (x - 5)) x = x - 5 else: try: description = body.find("div", class_="Ap5OSd").get_text() error_count = 0 ListOfData[x].extend(description) fo.writelines("%s,\n" % ListOfData[x]) except ConnectionRefusedError as CE:
class ProxyRot(): """docstring for ProxyRot.""" tr = None url= None password = "" proxies = [] chromedriver="/usr/local/bin/chromedriver" def __init__(self, password= None, url= None): self.password= password if(password is not None): self.tr = TorRequest(password=self.password) else: self.tr = TorRequest() self.url = url self.options = Options() self.options.add_argument('--headless') self.options.add_argument('--disable-gpu') # Last I checked this was necessary def get_ip(self): try: response= requests.get('http://ipinfo.io/ip') except Exception as e: print(str(e)) return None return response.text def reset_tor_identity(self): self.tr.reset_identity() self.tr = TorRequest(password=self.password) def scrape_proxies_country(self): countries=["Russia","China", "India", "Ukraine", "Indonesia","Brazil", "Canada","Pakistan", "United Kingdom","Iran", "Thailand"] countries=["Russia","China"] url = "http://www.gatherproxy.com/proxylist/country/?c=" proxies={} driver = webdriver.Chrome(self.chromedriver, options=self.options) try: for country in countries: driver.get(url+country) time.sleep(2) #print(driver.page_source) page_content = BeautifulSoup(''.join(driver.page_source), "html.parser") proxy_table = page_content.find("div", attrs={"class": "proxy-list"}) proxies_pre = proxy_table.findAll("tr") proxies[country]=[] for proxy in proxies_pre: proxies[country].append(proxy.attrs) return proxies except Exception as e: print(e) return None def get_tor_ip(self): try: response = self.tr.get('http://ipinfo.io/ip') except Exception as e: print(str(e)) return None return response.text def get(self, url): try: response = self.tr.get(url) except Exception as e: print(str(e)) return None return response
# Add HashedControlPass. password = get_pass("torpass") tr=TorRequest(password=password) # Reset Tor. tr.reset_identity() # Check initial ip. session = requests.session() response = session.get('http://ipecho.net/plain') ip = response.text print("IP address is set to: {}".format(ip)) # Check new ip with tor. response = tr.get('http://ipecho.net/plain') ip = response.text print("IP address is set to: {}".format(ip)) # Check if tor is active. response = tr.get('https://check.torproject.org') response.text # Sorry, you are not using Tor. #-------------------------------------------------------------------- ## Test 3. #-------------------------------------------------------------------- # We can use torrify on the command line. torify wget -O - 'https://check.torproject.org' # Congratulations. This browser is configured to use Tor.
from stem import Signal from stem.control import Controller import requests from torrequest import TorRequest tr = TorRequest(password='******') tr.reset_identity() #Reset Tor response = tr.get('http://ipecho.net/plain') print("New Ip Address", response.text) #------------------------------------------------------------ response = requests.get('http://ipecho.net/plain') print("My Original IP Address:", response.text) #------------------------------------------------------------ with Controller.from_port(port=9051) as controller: controller.authenticate(password='******') print("Success!") controller.signal(Signal.NEWNYM) print("New Tor connection processed") response = requests.get('http://ipecho.net/plain') print("IP Address after success s:", response.text)
with open('gre2020-temp.csv', 'w') as f: writer = csv.DictWriter(f, fieldnames) writer.writeheader() for data in words: writer.writerow(data) failed = list() for i, r in enumerate(words): print(i / len(words) * 100, '%') if 'pron' in r and r['pron'] != '': print(r['pron'], 'is already') if 'pron' in r and r['pron'] == '': try: result = tr.get('https://www.lexico.com/en/definition/%s' % r['word'].replace(' ', BLANK), headers=headers) time.sleep(1) s = BeautifulSoup(result.text, 'lxml') pron = s.find('span', {'class': 'phoneticspelling'}) print(pron.text) r['pron'] = pron.text save_words(words) except: failed.append(r['word']) with open('pron_failed2.csv', 'w') as f: f.write('\n'.join(failed))
ID = int(input("who's the lucky one? : ")) i = int(input("how many times mister?: ")) cookies = { 'HoldTheDoor': 'f113024b10de77d8031c15bdcf2f830d67773813', } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36 OPR/66.0.3515.27', 'Referer': 'http://158.69.76.135/level4.php', } data = { 'id': ID, 'holdthedoor': 'Submit', 'key': 'f113024b10de77d8031c15bdcf2f830d67773813' } x = 0 while x < i: tr = TorRequest(password='******') tr.reset_identity() #Reset Tor response = tr.post('http://158.69.76.135/level4.php', headers=headers, cookies=cookies, data=data, verify=False) ip = tr.get('http://ipecho.net/plain') print("*********** Identity {} ^_^ ********* \n {} ".format( x + 1, ip.text)) x += 1
ip_check_url = 'http://ipecho.net/plain' original_ip = requests.get(ip_check_url).text print("My original Ip: %s" % original_ip) tr = TorRequest(password=os.environ.get('TOR_PASS')) target_url = 'http://newtaxicsv.taxiromaniaonline.ro/aplicatie/make_request' addresses = json.load(open('addresses1.json')) userAgents = json.load(open('userAgents.json')) while True: try: # tr.reset_identity() tor_ip = tr.get(ip_check_url).text print("Using ip: %s" % tor_ip) if tor_ip == original_ip: raise ValueError("%s == %s" % (tor_ip, original_ip)) headers = { 'User-agent': random.choice(userAgents) } print("Headers: %s" % headers) address = str(random.choice(addresses)) nr = random.choice(range(1,6)) randomPrefix = random.choice(["str", "strada", "str.", "Str.", "St"])
url.get('href')) else: with open( 'logs/getlinks_log_{timestamp}.txt'.format( timestamp=timestamp), 'a') as f: f.write("Skipping " + url.get('href') + "\n") # Scraping Links from wg-gesucht.de ua = UserAgent() header = {'User-Agent': str(ua.random)} tr = TorRequest(password='******') tr.reset_identity() # Reset Tor source = tr.get( 'https://www.wg-gesucht.de/wohnungen-in-Hamburg.55.2.1.0.html?category=2&city_id=55&rent_type=2&noDeact=1&img=1', headers=header).text soup = bs.BeautifulSoup(source, 'lxml') # Scraping the number of pages for option in soup.find_all("a", {"class": "a-pagination"}): pages = option.get_text() pages = int(pages) #scraping links from the pages for i in range(0, pages + 1): with open('logs/getlinks_log_{timestamp}.txt'.format(timestamp=timestamp), 'a') as f: f.write("Scraping Page {page} from wg-gesucht.de\n".format(page=i))
def ParseReviews(asin): # This script has only been tested with Amazon.com: and only works with amazon.com because it involves the product requirements gathered from amazon.com amazon_url = 'http://www.amazon.com/dp/' + asin # Add some recent user agent to prevent amazon from blocking the request # Find some chrome user agent strings here https://udger.com/resources/ua-list/browser-detail?browser=Chrome def randomizer(): header_index = random.randint(1, 37) headers_user_agents = {} with open("user_agents.json", "r") as outfile: headers_user_agents = json.load(outfile) print(headers_user_agents) headers = {'User-Agent': headers_user_agents["headers"][header_index]} print(headers) #headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'} #Hcanged the following range(5) to range(20) proxy_number = random.randint(0, 14) print(proxy_number) proxies_list = [ "105.185.176.102", "223.25.97.62", "110.44.122.198", "5.148.128.44", "177.21.103.63", "196.61.16.247", "109.224.57.14", "110.49.11.50", "181.10.129.85", "91.137.140.89", "103.9.134.241", "91.147.180.1", "213.57.125.158", "117.239.30.251" ] proxy = proxies_list[proxy_number] return headers, proxy for i in range(5): headers, proxy_lnk = randomizer() print(proxy_lnk) proxy = {'http': proxy_lnk} #response = get(amazon_url, headers = headers, verify=False, timeout=30, proxies=proxy) tr = TorRequest(password="******") tr.reset_identity() response = tr.get(amazon_url) if response.status_code == 404: return {"url": amazon_url, "error": "page not found"} if response.status_code != 200: continue # Removing the null bytes from the response. cleaned_response = response.text.replace('\x00', '') parser = html.fromstring(cleaned_response) print(parser) XPATH_AGGREGATE = '//span[@id="acrCustomerReviewText"]' XPATH_REVIEW_SECTION_1 = '//div[contains(@id,"reviews-summary")]' XPATH_REVIEW_SECTION_2 = '//div[@data-hook="review"]' XPATH_AGGREGATE_RATING = '//table[@id="histogramTable"]//tr' XPATH_PRODUCT_NAME = '//h1//span[@id="productTitle"]//text()' XPATH_PRODUCT_PRICE = '//span[@id="priceblock_ourprice"]/text()' raw_product_price = parser.xpath(XPATH_PRODUCT_PRICE) raw_product_name = parser.xpath(XPATH_PRODUCT_NAME) total_ratings = parser.xpath(XPATH_AGGREGATE_RATING) reviews = parser.xpath(XPATH_REVIEW_SECTION_1) product_price = ''.join(raw_product_price).replace(',', '') product_name = ''.join(raw_product_name).strip() if not reviews: reviews = parser.xpath(XPATH_REVIEW_SECTION_2) ratings_dict = {} reviews_list = [] # Grabing the rating section in product page for ratings in total_ratings: extracted_rating = ratings.xpath('./td//a//text()') if extracted_rating: rating_key = extracted_rating[0] raw_raing_value = extracted_rating[1] rating_value = raw_raing_value if rating_key: ratings_dict.update({rating_key: rating_value}) # Parsing individual reviews for review in reviews: XPATH_RATING = './/i[@data-hook="review-star-rating"]//text()' XPATH_REVIEW_HEADER = './/a[@data-hook="review-title"]//text()' XPATH_REVIEW_POSTED_DATE = './/span[@data-hook="review-date"]//text()' XPATH_REVIEW_TEXT_1 = './/div[@data-hook="review-collapsed"]//text()' XPATH_REVIEW_TEXT_2 = './/div//span[@data-action="columnbalancing-showfullreview"]/@data-columnbalancing-showfullreview' XPATH_REVIEW_COMMENTS = './/span[@data-hook="review-comment"]//text()' XPATH_AUTHOR = './/span[contains(@class,"profile-name")]//text()' XPATH_REVIEW_TEXT_3 = './/div[contains(@id,"dpReviews")]/div/text()' raw_review_author = review.xpath(XPATH_AUTHOR) raw_review_rating = review.xpath(XPATH_RATING) raw_review_header = review.xpath(XPATH_REVIEW_HEADER) raw_review_posted_date = review.xpath(XPATH_REVIEW_POSTED_DATE) raw_review_text1 = review.xpath(XPATH_REVIEW_TEXT_1) raw_review_text2 = review.xpath(XPATH_REVIEW_TEXT_2) raw_review_text3 = review.xpath(XPATH_REVIEW_TEXT_3) # Cleaning data author = ' '.join(' '.join(raw_review_author).split()) review_rating = ''.join(raw_review_rating).replace( 'out of 5 stars', '') review_header = ' '.join(' '.join(raw_review_header).split()) try: review_posted_date = dateparser.parse( ''.join(raw_review_posted_date)).strftime('%d %b %Y') except: review_posted_date = None review_text = ' '.join(' '.join(raw_review_text1).split()) # Grabbing hidden comments if present if raw_review_text2: json_loaded_review_data = loads(raw_review_text2[0]) json_loaded_review_data_text = json_loaded_review_data['rest'] cleaned_json_loaded_review_data_text = re.sub( '<.*?>', '', json_loaded_review_data_text) full_review_text = review_text + cleaned_json_loaded_review_data_text else: full_review_text = review_text if not raw_review_text1: full_review_text = ' '.join(' '.join(raw_review_text3).split()) raw_review_comments = review.xpath(XPATH_REVIEW_COMMENTS) review_comments = ''.join(raw_review_comments) review_comments = sub('[A-Za-z]', '', review_comments).strip() review_dict = { 'review_comment_count': review_comments, 'review_text': full_review_text, 'review_posted_date': review_posted_date, 'review_header': review_header, 'review_rating': review_rating, 'review_author': author } reviews_list.append(review_dict) data = { 'ratings': ratings_dict, 'reviews': reviews_list, 'url': amazon_url, 'name': product_name, 'price': product_price } return data return {"error": "failed to process the page", "url": amazon_url}
timestamp=timestamp), 'a') as f: f.write("Done scraping " + link + "\n") # scraping flat data on wg-gesucht.de if "wg-gesucht.de" in link: flat_info = [] ua = UserAgent() header = {'User-Agent': str(ua.random)} # using tor to scrape anonymously tr = TorRequest(password='******') tr.reset_identity() # Reset Tor source = tr.get(link, headers=header).text soup = bs.BeautifulSoup(source, 'lxml') #check if the flat is deactivated try: if "Diese Anzeige ist momentan deaktiviert" in soup.find_all( "h4", {"class": "headline alert-primary-headline"})[2].get_text(): DEACTIVATED_FLATS += 1 with open( 'logs/immoscrapy_log_{timestamp}.txt'.format( timestamp=timestamp), 'a') as f: f.write("The flat is deactivated: " + link + "\n") continue except: pass
def reset_ip(): tr = TorRequest(password='******') tr.reset_identity() #Reset Tor response = tr.get('http://ipecho.net/plain') print("New Ip Address", response.text)