def monitor(self): headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", "cache-control": "max-age=0", "upgrade-insecure-requests": "1", "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.69 Safari/537.36" } image_found = False product_image = "" while True: self.status_signal.emit({"msg":"Loading Product Page","status":"normal"}) try: r = self.session.get(self.product,headers=headers) if r.status_code == 200: doc = lxml.html.fromstring(r.text) if not image_found: self.sku_id = doc.xpath('//span[@class="product-data-value body-copy"]/text()')[1].strip() product_image = doc.xpath('//img[@class="primary-image"]/@src')[0] self.image_signal.emit(product_image) image_found = True if self.check_stock(): return product_image self.status_signal.emit({"msg":"Waiting For Restock","status":"normal"}) time.sleep(random_delay(self.monitor_delay, settings.rand_delay_start, settings.rand_delay_stop)) else: self.status_signal.emit({"msg":"Product Not Found","status":"normal"}) time.sleep(random_delay(self.monitor_delay, settings.rand_delay_start, settings.rand_delay_stop)) except Exception as e: self.status_signal.emit({"msg":"Error Loading Product Page (line {} {} {})".format(sys.exc_info()[-1].tb_lineno, type(e).__name__, e),"status":"error"}) time.sleep(self.error_delay)
def traverse_proj_structure(proj_url, base_proj_structure, is_parent): firefox_options = webdriver.FirefoxOptions() firefox_options.set_headless() driver = webdriver.Firefox(firefox_options=firefox_options) print(proj_url) sub_url_queue = [proj_url] leaf_hashes = {} leaf_similarities = {} ignored = ['.github', '.DS_Store'] # perform bfs on tree while len(sub_url_queue): popped_url = sub_url_queue.pop(0) driver.get(popped_url) print(popped_url, len(sub_url_queue)) time.sleep(random_delay(1, 2)) # check if raw button exists = file try: raw_button = driver.find_element_by_id('raw-url') raw_button.click() relative_path = proj_pattern.search(popped_url).group(2) file_code = driver.find_elements_by_tag_name('pre')[0].text leaf_hashes[relative_path] = winnowing_algorithm( file_code, window_size, n) if not is_parent: if relative_path in base_proj_structure: leaf_similarities[relative_path] = calculate_similarity( base_proj_structure[relative_path], leaf_hashes[relative_path]) else: leaf_similarities[relative_path] = None print(relative_path, "not found in base") except: file_folder_elems = driver.find_elements_by_xpath( '//a[contains(@class, "js-navigation-open link-gray-dark")]') child_urls = [ f.get_attribute('href') for f in file_folder_elems if f.text not in ignored and '.csv' not in f.text and '.sln' not in f.text ] sub_url_queue.extend(child_urls) time.sleep(random_delay(1, 2)) driver.close() if is_parent: return leaf_hashes return leaf_similarities
def monitor(self): headers = { "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", "cache-control": "max-age=0", "upgrade-insecure-requests": "1", "user-agent": settings.userAgent } image_found = False product_image = "" while True: self.status_signal.emit({"msg": "Loading Product Page", "status": "normal"}) try: r = self.session.get(self.product, headers=headers) if r.status_code == 200: # check for captcha page if self.is_captcha(r.text): self.status_signal.emit({"msg": "CAPTCHA - Opening Product Page", "status": "error"}) self.handle_captcha(self.product) continue doc = lxml.html.fromstring(r.text) if not image_found: product_image = doc.xpath('//meta[@property="og:image"]/@content')[0] self.image_signal.emit(product_image) image_found = True price = float(doc.xpath('//span[@itemprop="price"]/@content')[0]) if "add to cart" in r.text.lower(): if self.max_price != "": if float(self.max_price) < price: self.status_signal.emit({"msg": "Waiting For Price Restock", "status": "normal"}) self.session.cookies.clear() time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) continue offer_id = json.loads(doc.xpath('//script[@id="item"]/text()')[0])["item"]["product"]["buyBox"][ "products"][0]["offerId"] return product_image, offer_id self.status_signal.emit({"msg": "Waiting For Restock", "status": "normal"}) self.session.cookies.clear() time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) else: self.status_signal.emit({"msg": "Product Not Found", "status": "normal"}) time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) except Exception as e: self.status_signal.emit({"msg": "Error Loading Product Page (line {} {} {})".format( sys.exc_info()[-1].tb_lineno, type(e).__name__, e), "status": "error"}) time.sleep(self.error_delay)
def login(self): self.browser.get("https://www.target.com") accountBtn = wait(self.browser, self.TIMEOUT_LONG).until( EC.presence_of_element_located((By.ID, "account"))) accountBtn.click() test = wait(self.browser, 5).until( EC.presence_of_element_located( (By.XPATH, "//li[@id='accountNav-signIn']/a"))) test.click() self.fill_and_authenticate() test = self.browser.find_element_by_xpath( '//span[@data-test="accountUserName"]') time.sleep(1) if "sign in" in test.text.lower(): if settings.run_headless: self.status_signal.emit( create_msg( "Did not detect username on target page. Got \"{}\"". format(test.text), "stopnow")) else: self.status_signal.emit( create_msg( "Did not detect username on target page. Got \"{}\"". format(test.text), "normal")) else: self.status_signal.emit( create_msg("Succesfully signed in as {}".format(test.text), "normal")) # # Gives it time for the login to complete time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
def monitor(self): img_found = False in_stock = False self.browser.get(self.product) while not in_stock: try: if not img_found: product_img = self.browser.find_elements_by_class_name( 'slideDeckPicture')[0].find_element_by_tag_name("img") self.image_signal.emit(product_img.get_attribute("src")) self.product_image = product_img.get_attribute("src") img_found = True except Exception as e: continue try: ship_btn = self.browser.find_element_by_xpath( '//button[@data-test= "shipItButton"]') self.browser.execute_script( "return arguments[0].scrollIntoView(true);", ship_btn) ship_btn.click() in_stock = True self.status_signal.emit(create_msg("Added to cart", "normal")) except Exception as e: self.status_signal.emit( create_msg("Waiting on Restock", "normal")) time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) self.browser.refresh()
def monitor(self): self.in_stock = False self.browser.get(self.product) print(self.product) wait(self.browser, self.TIMEOUT_LONG).until( lambda _: self.browser.current_url == self.product) while not self.img_found: try: if not self.img_found: product_img = self.browser.find_elements_by_class_name( 'slideDeckPicture')[0].find_element_by_tag_name("img") self.image_signal.emit(product_img.get_attribute("src")) self.product_image = product_img.get_attribute("src") self.img_found = True except Exception as e: continue while not self.in_stock: self.in_stock = self.check_stock() if self.in_stock: continue else: self.status_signal.emit( create_msg("Waiting on Restock", "normal")) time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) self.browser.refresh()
def login(self): logged_in = False self.browser.get("https://www.target.com") self.browser.find_element_by_id("account").click() wait(self.browser, self.TIMEOUT_LONG).until( EC.element_to_be_clickable((By.ID, "accountNav-signIn"))).click() while not logged_in: try: wait(self.browser, self.TIMEOUT_LONG).until( EC.presence_of_element_located((By.ID, "username"))) self.fill_and_authenticate() # Gives it time for the login to complete time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) wait(self.browser, self.TIMEOUT_LONG).until( EC.presence_of_element_located( (By.ID, "accountNav-account"))) logged_in = True except Exception as e: self.status_signal.emit( create_msg("Log In Failed.. Retrying", "normal")) self.browser.refresh() continue
def checkout(self): did_checkout = False self.status_signal.emit(create_msg("Checking out", "normal")) while not did_checkout: try: self.browser.find_element_by_xpath('//button[@data-test= "checkout-button"]').click() did_checkout = True time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) except: continue
def login(self): self.browser.get("https://www.target.com") self.browser.find_element_by_id("account").click() wait(self.browser, 10).until(EC.element_to_be_clickable((By.ID, "accountNav-signIn"))).click() wait(self.browser, 10).until(EC.presence_of_element_located((By.ID, "username"))).send_keys(settings.target_user) password = self.browser.find_element_by_id("password") password.send_keys(settings.target_pass) self.browser.find_element_by_id("login").click() # Gives it time for the login to complete time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
def monitor(self): ## verify we have signed successfully else we should abort the task or attempt sign-in again # (TODO: add max attempts to sign-in before exiting task) if "user-message-initial" in self.browser.page_source: self.status_signal.emit( create_msg("Gamestop Successfully logged in...", "normal")) else: self.status_signal.emit( create_msg("Error logging in... please restart task", "stopnow")) # TODO: Exit task if we are not signed in self.status_signal.emit(create_msg("Checking Stock..", "normal")) # self.browser.set_window_size(900, 900) self.browser.get(self.product) wait(self.browser, self.LONG_TIMEOUT).until( lambda _: self.browser.current_url == self.product) in_stock = False while not in_stock: try: wait( self.browser, random_delay( self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)).until( EC.element_to_be_clickable( (By.XPATH, '//button[@data-buttontext="Add to Cart"]'))) add_to_cart_btn = self.browser.find_element_by_xpath( '//button[@data-buttontext="Add to Cart"]') add_to_cart_btn.click() time.sleep(1) if not add_to_cart_btn.is_enabled(): self.status_signal.emit( create_msg("Waiting For Restock", "normal")) self.browser.refresh() continue in_stock = True self.status_signal.emit(create_msg("Added to cart", "normal")) self.browser.maximize_window() # remove stop temporarily to see if gamestop captcha is an issue # self.status_signal.emit(create_msg("Added to cart, check for captcha","stopnow")) self.browser.get("https://www.gamestop.com/cart/") except: self.status_signal.emit( create_msg("Waiting For Restock", "normal")) self.browser.refresh()
def login(self): self.browser.get("https://www.target.com") self.browser.find_element_by_id("account").click() wait(self.browser, self.TIMEOUT_LONG).until( EC.element_to_be_clickable((By.ID, "accountNav-signIn"))).click() wait(self.browser, self.TIMEOUT_LONG).until( EC.presence_of_element_located((By.ID, "username"))) self.fill_and_authenticate() # Gives it time for the login to complete time.sleep( random_delay(3, settings.random_delay_start, settings.random_delay_stop))
def check_cart_items(self): headers = { "accept": "application/json, text/javascript, */*; q=0.01", "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "content-type": "application/json", "origin": "https://www.walmart.com", "referer": "https://www.walmart.com/checkout/", "user-agent": settings.userAgent, "wm_vertical_id": "0", "wm_cvv_in_session": "true", } profile = self.profile body = {"postalCode": profile["shipping_zipcode"], "city": profile["shipping_city"], "state": profile["shipping_state"], "isZipLocated": True, "crt:CRT": "", "customerId:CID": "", "customerType:type": "", "affiliateInfo:com.wm.reflector": "", "storeList": []} while True: self.status_signal.emit({"msg": "Loading Cart Items", "status": "normal"}) try: r = self.session.post("https://www.walmart.com/api/checkout/v3/contract?page=CHECKOUT_VIEW", json=body, headers=headers) print( r.text) # this sometimes returns json data related to loading a captcha.js file so that could be intercepted when requests fail if r.status_code == 201 or r.status_code == 200: r = json.loads(r.text)["items"][0] item_id = r["id"] fulfillment_option = r["fulfillmentSelection"]["fulfillmentOption"] ship_method = r["fulfillmentSelection"]["shipMethod"] self.status_signal.emit({"msg": "Loaded Cart Items", "status": "normal"}) return item_id, fulfillment_option, ship_method else: if json.loads(r.text)["message"] == "Item is no longer in stock.": self.status_signal.emit({"msg": "Waiting For Restock", "status": "normal"}) time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) else: if self.is_captcha(r.text): self.handle_captcha("https://www.walmart.com/checkout") self.status_signal.emit( {"msg": "Error Loading Cart Items, Got Response: " + str(r.text), "status": "error"}) time.sleep(self.error_delay) except Exception as e: self.status_signal.emit({"msg": "Error Loading Cart Items (line {} {} {})".format( sys.exc_info()[-1].tb_lineno, type(e).__name__, e), "status": "error"}) time.sleep(self.error_delay)
def monitor(self): img_found = False in_stock = False self.browser.get(self.product) wait(self.browser, self.TIMEOUT_LONG).until( lambda _: self.browser.current_url == self.product) while not img_found: try: if not img_found: product_img = self.browser.find_elements_by_class_name( 'swiper-zoom-container')[0].find_element_by_tag_name( "img") print(product_img) self.image_signal.emit(product_img.get_attribute("src")) self.product_image = product_img.get_attribute("src") img_found = True except Exception as e: continue while not in_stock: add_to_cart_btn = None if len( self.browser.find_elements_by_xpath( '//button[@data-test= "orderPickupButton"]')) > 0: add_to_cart_btn = self.browser.find_element_by_xpath( '//button[@data-test= "orderPickupButton"]') elif len( self.browser.find_elements_by_xpath( '//button[@data-test= "shipItButton"]')) > 0: add_to_cart_btn = self.browser.find_element_by_xpath( '//button[@data-test= "shipItButton"]') else: self.status_signal.emit( create_msg("Waiting on Restock", "normal")) time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) self.browser.refresh() continue self.browser.execute_script( "return arguments[0].scrollIntoView(true);", add_to_cart_btn) add_to_cart_btn.click() in_stock = True self.status_signal.emit(create_msg("Added to cart", "normal"))
def monitor(self): if not self.MONITOR_ONLY: logged_in = False while not logged_in: try: wait(self.browser, self.LONG_TIMEOUT).until(lambda _: self.browser.current_url == "https://www.gamestop.com/account/") self.status_signal.emit(create_msg("Successfully Logged In", "normal")) logged_in = True except: self.status_signal.emit(create_msg("Log in failed. Retrying.", "normal")) self.login() if self.MONITOR_ONLY: time.sleep(random.randint(1, 4)) self.status_signal.emit(create_msg("Checking Stock..", "normal")) self.browser.get(self.product) wait(self.browser, self.LONG_TIMEOUT).until(lambda _: self.browser.current_url == self.product) in_stock = False while not in_stock: try: wait(self.browser, random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)).until(EC.element_to_be_clickable((By.XPATH, '//button[@data-buttontext="Add to Cart"]'))) add_to_cart_btn = self.browser.find_element_by_xpath('//button[@data-buttontext="Add to Cart"]') home_delivery_option = self.browser.find_element_by_xpath('//input[@value="home"]') if not self.MONITOR_ONLY: add_to_cart_btn.click() time.sleep(1) if not home_delivery_option.is_enabled() & add_to_cart_btn.is_enabled(): self.status_signal.emit(create_msg("Out of stock. Rechecking soon.", "normal")) time.sleep(self.monitor_delay + random.randint(1, 4)) self.browser.refresh() continue in_stock = True self.browser.save_screenshot("screenshots/gamestop_"+datetime.now().strftime('%s')+".png") if not self.MONITOR_ONLY: self.status_signal.emit(create_msg("Added to cart", "normal")) self.browser.get("https://www.gamestop.com/cart/") else: self.status_signal.emit(create_msg("Item in stock. Sending notification", "normal")) self.notify() except: self.status_signal.emit(create_msg("Waiting For Restock", "normal")) self.browser.refresh()
def monitor(self): self.in_stock = False self.browser.get(self.product) wait(self.browser, self.TIMEOUT_LONG).until( lambda _: self.browser.current_url == self.product) while not self.img_found: try: if not self.img_found: product_img = self.browser.find_elements_by_class_name( 'slideDeckPicture')[0].find_element_by_tag_name("img") self.image_signal.emit(product_img.get_attribute("src")) self.product_image = product_img.get_attribute("src") self.img_found = True except Exception as e: continue while not self.in_stock: try: self.in_stock = self.check_stock() if self.in_stock: self.status_signal.emit( create_msg("Item in stock...", "normal")) self.browser.save_screenshot( "screenshots/target_" + datetime.now().strftime('%s') + ".png") if self.MONITOR_ONLY: self.notify() time.sleep(15) self.in_stock = False continue else: self.status_signal.emit( create_msg("Waiting on Restock", "normal")) time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)) self.browser.refresh() except Exception as e: continue
def monitor(self): wait(self.browser, self.LONG_TIMEOUT).until(lambda _: self.browser.current_url == "https://www.gamestop.com/account/") self.status_signal.emit(create_msg("Checking Stock..", "normal")) self.browser.set_window_size(900, 900) self.browser.get(self.product) wait(self.browser, self.LONG_TIMEOUT).until( lambda _: self.browser.current_url == self.product) in_stock = False while not in_stock: try: wait( self.browser, random_delay( self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)).until( EC.element_to_be_clickable( (By.XPATH, '//button[@data-buttontext="Add to Cart"]'))) add_to_cart_btn = self.browser.find_element_by_xpath( '//button[@data-buttontext="Add to Cart"]') add_to_cart_btn.click() time.sleep(1) if not add_to_cart_btn.is_enabled(): self.status_signal.emit( create_msg("Waiting For Restock", "normal")) self.browser.refresh() continue in_stock = True self.status_signal.emit(create_msg("Added to cart", "normal")) self.browser.get("https://www.gamestop.com/cart/") except: self.status_signal.emit( create_msg("Waiting For Restock", "normal")) self.browser.refresh()
def login(self): self.browser.get("https://newegg.com") wait(self.browser, self.TIMEOUT_LONG).until( EC.element_to_be_clickable( (By.CSS_SELECTOR, ".nav-complex-title"))).click() wait(self.browser, self.TIMEOUT_LONG).until( EC.element_to_be_clickable( (By.ID, "labeled-input-signEmail"))).send_keys(settings.bestbuy_user) wait(self.browser, self.TIMEOUT_LONG).until( EC.element_to_be_clickable( (By.CSS_SELECTOR, ".btn-orange"))).click() wait(self.browser, self.TIMEOUT_LONG).until( EC.element_to_be_clickable( (By.ID, "labeled-input-password"))).send_keys(settings.bestbuy_pass) wait(self.browser, self.TIMEOUT_LONG).until( EC.element_to_be_clickable( (By.CSS_SELECTOR, ".btn-orange"))).click() time.sleep( random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
def request_www_westernunion_com(prepared_requests, emails, proxies=[]): worker_name = current_thread().name emails_per_request = int(len(emails) / len(prepared_requests)) + bool(len(emails) % len(prepared_requests)) for idx, prepared_request in enumerate(prepared_requests): subset_start = idx * emails_per_request subset_end = subset_start + emails_per_request for k, email in enumerate(emails[subset_start:subset_end]): try: request, session_id = prepared_request body = json.dumps({ 'email': email, 'security': { 'session': { 'id': session_id }, 'version': '2' }, 'bashPath': '/us/en' }).encode() random_delay(1, 2) # Add the proxy and send the request if proxies: request.set_proxy(proxies[k], 'http') response = urllib.request.urlopen(request, body) # Parse the message data = response.fp.read() if data: # Get the message from the response msg = json.loads(data.decode('utf-8'))['error']['message'] # Email IS NOT registered if "We can't find that email address" in msg: log.info('{}: Not registered: {}'.format(worker_name, email)) with open('data/not_registered_emails.txt', 'a') as fp: fp.write(email) # Email IS registered elif "There's already an account with this email address" in msg: log.info('{}: Registered: {}'.format(worker_name, email)) with open('data/registered_emails.txt', 'a') as fp: fp.write(email) # Other message else: log.info('{}: {}'.format(worker_name, msg)) # No data received in response else: log.info('{}: Status: {}, Message: {}, Data: {} ({})'.format( worker_name, response.status, response.msg, data, email) ) with open('data/failed_emails.txt', 'a') as fp: fp.write(email) except urllib.error.URLError as e: logging.exception(e) except Exception as e: raise e
# read open source project names open_source_projs = pd.read_csv('awesome_open_source_projs.csv') # proj_names = list(open_source_projs['Project Name']) proj_names = ['SirixDB', 'PublicLab.org'] # proj_urls = list(open_source_projs['Project Link']) proj_urls = [ str(open_source_projs.loc[open_source_projs['Project Name'] == i].iloc[0] ['Project Link']) for i in proj_names ] for os_proj_url in proj_urls[company_lower_lim:company_upper_lim + 1]: print(os_proj_url) try: # driver.get(os_proj_url) time.sleep(random_delay(1, 3)) # # grab relevant data: NUMBER of watchers, stars, forks, branches, tags # watch_elements = driver.find_elements_by_xpath('//a[contains(@aria-label, "users are watching this repository")]') # star_elements = driver.find_elements_by_xpath('//a[contains(@aria-label, "users starred this repository")]') # fork_elements = driver.find_elements_by_xpath('//a[contains(@aria-label, "users forked this repository")]') # # maintaining initialized value for later click # fork_element = None # if len(watch_elements) > 0: # try: # watch_element = watch_elements[0] # num_watchers = int(numbers.search(watch_element.get_attribute('aria-label')).group(1)) # num_watchers_data.append(num_watchers) # except:
from selenium import webdriver from utils import random_delay import pandas as pd import time import regex as re main_proj_url_format = re.compile('(https:\/\/github\.com\/[^\/]+\/[^\/]+\/)') driver = webdriver.Firefox() driver.get('https://github.com/MunGell/awesome-for-beginners') assert 'GitHub' in driver.title time.sleep(random_delay(1, 3)) proj_link_elements = driver.find_elements_by_xpath( '//a[contains(@href, "/labels/")]') proj_names = [i.text for i in proj_link_elements] proj_links = [ str(main_proj_url_format.search(i.get_attribute('href')).group(1)) for i in proj_link_elements ] print(proj_names) print(proj_links) assert len(proj_names) == len(proj_links) print(len(proj_names)) proj_df = pd.DataFrame({
fork_compare_text = [] base_urls_curr = [] os_proj_data = pd.read_csv('awesome_proj_data_github.csv') for index, row in list(os_proj_data.iterrows())[start_lim:end_lim + 1]: base_url = row['Url'] forked_urls = ast.literal_eval(row['Fork urls'])[fork_start:fork_end + 1] jobs = [] fork_num = 0 for f in forked_urls: curr_forked_url = f print(curr_forked_url) driver.get(curr_forked_url) time.sleep(random_delay(2, 5)) try: # compare the fork to the base compare_elem = driver.find_element_by_xpath( "//*[contains(text(), 'This branch is')]") curr_fork_text = compare_elem.text print(curr_fork_text) fork_compare_text.append(curr_fork_text) except: fork_compare_text.append(None) base_urls_curr.append(curr_forked_url) # try: # # compare the fork to the base