Exemple #1
0
 def monitor(self):
     headers = {
         "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
         "accept-encoding": "gzip, deflate, br",
         "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
         "cache-control": "max-age=0",
         "upgrade-insecure-requests": "1",
         "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.69 Safari/537.36"
     }
     image_found = False
     product_image = ""
     while True:
         self.status_signal.emit({"msg":"Loading Product Page","status":"normal"})
         try:
             r = self.session.get(self.product,headers=headers)
             if r.status_code == 200:
                 doc = lxml.html.fromstring(r.text)
                 if not image_found:
                     self.sku_id = doc.xpath('//span[@class="product-data-value body-copy"]/text()')[1].strip()
                     product_image = doc.xpath('//img[@class="primary-image"]/@src')[0]
                     self.image_signal.emit(product_image)
                     image_found = True
                 if self.check_stock():
                     return product_image
                 self.status_signal.emit({"msg":"Waiting For Restock","status":"normal"})
                 time.sleep(random_delay(self.monitor_delay, settings.rand_delay_start, settings.rand_delay_stop))
             else:
                 self.status_signal.emit({"msg":"Product Not Found","status":"normal"})
                 time.sleep(random_delay(self.monitor_delay, settings.rand_delay_start, settings.rand_delay_stop))
         except Exception as e:
             self.status_signal.emit({"msg":"Error Loading Product Page (line {} {} {})".format(sys.exc_info()[-1].tb_lineno, type(e).__name__, e),"status":"error"})
             time.sleep(self.error_delay)
Exemple #2
0
def traverse_proj_structure(proj_url, base_proj_structure, is_parent):
    firefox_options = webdriver.FirefoxOptions()
    firefox_options.set_headless()
    driver = webdriver.Firefox(firefox_options=firefox_options)

    print(proj_url)

    sub_url_queue = [proj_url]
    leaf_hashes = {}
    leaf_similarities = {}

    ignored = ['.github', '.DS_Store']

    # perform bfs on tree
    while len(sub_url_queue):
        popped_url = sub_url_queue.pop(0)
        driver.get(popped_url)
        print(popped_url, len(sub_url_queue))
        time.sleep(random_delay(1, 2))
        # check if raw button exists = file
        try:
            raw_button = driver.find_element_by_id('raw-url')
            raw_button.click()
            relative_path = proj_pattern.search(popped_url).group(2)
            file_code = driver.find_elements_by_tag_name('pre')[0].text
            leaf_hashes[relative_path] = winnowing_algorithm(
                file_code, window_size, n)
            if not is_parent:
                if relative_path in base_proj_structure:
                    leaf_similarities[relative_path] = calculate_similarity(
                        base_proj_structure[relative_path],
                        leaf_hashes[relative_path])
                else:
                    leaf_similarities[relative_path] = None
                    print(relative_path, "not found in base")
        except:
            file_folder_elems = driver.find_elements_by_xpath(
                '//a[contains(@class, "js-navigation-open link-gray-dark")]')
            child_urls = [
                f.get_attribute('href') for f in file_folder_elems
                if f.text not in ignored and '.csv' not in f.text
                and '.sln' not in f.text
            ]
            sub_url_queue.extend(child_urls)

        time.sleep(random_delay(1, 2))

    driver.close()

    if is_parent:
        return leaf_hashes
    return leaf_similarities
Exemple #3
0
    def monitor(self):
        headers = {
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "accept-encoding": "gzip, deflate, br",
            "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
            "cache-control": "max-age=0",
            "upgrade-insecure-requests": "1",
            "user-agent": settings.userAgent
        }
        image_found = False
        product_image = ""
        while True:
            self.status_signal.emit({"msg": "Loading Product Page", "status": "normal"})
            try:
                r = self.session.get(self.product, headers=headers)
                if r.status_code == 200:
                    # check for captcha page
                    if self.is_captcha(r.text):
                        self.status_signal.emit({"msg": "CAPTCHA - Opening Product Page", "status": "error"})
                        self.handle_captcha(self.product)
                        continue

                    doc = lxml.html.fromstring(r.text)
                    if not image_found:
                        product_image = doc.xpath('//meta[@property="og:image"]/@content')[0]
                        self.image_signal.emit(product_image)
                        image_found = True
                    price = float(doc.xpath('//span[@itemprop="price"]/@content')[0])
                    if "add to cart" in r.text.lower():
                        if self.max_price != "":
                            if float(self.max_price) < price:
                                self.status_signal.emit({"msg": "Waiting For Price Restock", "status": "normal"})
                                self.session.cookies.clear()
                                time.sleep(random_delay(self.monitor_delay, settings.random_delay_start,
                                                        settings.random_delay_stop))
                                continue
                        offer_id = json.loads(doc.xpath('//script[@id="item"]/text()')[0])["item"]["product"]["buyBox"][
                            "products"][0]["offerId"]
                        return product_image, offer_id
                    self.status_signal.emit({"msg": "Waiting For Restock", "status": "normal"})
                    self.session.cookies.clear()
                    time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
                else:
                    self.status_signal.emit({"msg": "Product Not Found", "status": "normal"})
                    time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
            except Exception as e:
                self.status_signal.emit({"msg": "Error Loading Product Page (line {} {} {})".format(
                    sys.exc_info()[-1].tb_lineno, type(e).__name__, e), "status": "error"})
                time.sleep(self.error_delay)
Exemple #4
0
    def login(self):
        self.browser.get("https://www.target.com")
        accountBtn = wait(self.browser, self.TIMEOUT_LONG).until(
            EC.presence_of_element_located((By.ID, "account")))
        accountBtn.click()

        test = wait(self.browser, 5).until(
            EC.presence_of_element_located(
                (By.XPATH, "//li[@id='accountNav-signIn']/a")))
        test.click()
        self.fill_and_authenticate()

        test = self.browser.find_element_by_xpath(
            '//span[@data-test="accountUserName"]')
        time.sleep(1)
        if "sign in" in test.text.lower():
            if settings.run_headless:
                self.status_signal.emit(
                    create_msg(
                        "Did not detect username on target page. Got \"{}\"".
                        format(test.text), "stopnow"))
            else:
                self.status_signal.emit(
                    create_msg(
                        "Did not detect username on target page. Got \"{}\"".
                        format(test.text), "normal"))
        else:
            self.status_signal.emit(
                create_msg("Succesfully signed in as {}".format(test.text),
                           "normal"))
        # # Gives it time for the login to complete
        time.sleep(
            random_delay(self.monitor_delay, settings.random_delay_start,
                         settings.random_delay_stop))
Exemple #5
0
    def monitor(self):
        img_found = False
        in_stock = False

        self.browser.get(self.product)

        while not in_stock:
            try:
                if not img_found:
                    product_img = self.browser.find_elements_by_class_name(
                        'slideDeckPicture')[0].find_element_by_tag_name("img")
                    self.image_signal.emit(product_img.get_attribute("src"))
                    self.product_image = product_img.get_attribute("src")
                    img_found = True
            except Exception as e:
                continue

            try:
                ship_btn = self.browser.find_element_by_xpath(
                    '//button[@data-test= "shipItButton"]')
                self.browser.execute_script(
                    "return arguments[0].scrollIntoView(true);", ship_btn)
                ship_btn.click()
                in_stock = True
                self.status_signal.emit(create_msg("Added to cart", "normal"))
            except Exception as e:
                self.status_signal.emit(
                    create_msg("Waiting on Restock", "normal"))
                time.sleep(
                    random_delay(self.monitor_delay,
                                 settings.random_delay_start,
                                 settings.random_delay_stop))
                self.browser.refresh()
Exemple #6
0
    def monitor(self):
        self.in_stock = False
        self.browser.get(self.product)
        print(self.product)
        wait(self.browser, self.TIMEOUT_LONG).until(
            lambda _: self.browser.current_url == self.product)

        while not self.img_found:
            try:
                if not self.img_found:
                    product_img = self.browser.find_elements_by_class_name(
                        'slideDeckPicture')[0].find_element_by_tag_name("img")
                    self.image_signal.emit(product_img.get_attribute("src"))
                    self.product_image = product_img.get_attribute("src")
                    self.img_found = True
            except Exception as e:
                continue

        while not self.in_stock:
            self.in_stock = self.check_stock()
            if self.in_stock:
                continue
            else:
                self.status_signal.emit(
                    create_msg("Waiting on Restock", "normal"))
                time.sleep(
                    random_delay(self.monitor_delay,
                                 settings.random_delay_start,
                                 settings.random_delay_stop))
                self.browser.refresh()
Exemple #7
0
    def login(self):
        logged_in = False

        self.browser.get("https://www.target.com")
        self.browser.find_element_by_id("account").click()
        wait(self.browser, self.TIMEOUT_LONG).until(
            EC.element_to_be_clickable((By.ID, "accountNav-signIn"))).click()

        while not logged_in:
            try:
                wait(self.browser, self.TIMEOUT_LONG).until(
                    EC.presence_of_element_located((By.ID, "username")))
                self.fill_and_authenticate()

                # Gives it time for the login to complete
                time.sleep(
                    random_delay(self.monitor_delay,
                                 settings.random_delay_start,
                                 settings.random_delay_stop))

                wait(self.browser, self.TIMEOUT_LONG).until(
                    EC.presence_of_element_located(
                        (By.ID, "accountNav-account")))

                logged_in = True
            except Exception as e:
                self.status_signal.emit(
                    create_msg("Log In Failed.. Retrying", "normal"))
                self.browser.refresh()
                continue
Exemple #8
0
    def checkout(self):
        did_checkout = False
        self.status_signal.emit(create_msg("Checking out", "normal"))

        while not did_checkout:
            try:
                self.browser.find_element_by_xpath('//button[@data-test= "checkout-button"]').click()
                did_checkout = True
                time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
            except:
                continue
Exemple #9
0
    def login(self):
        self.browser.get("https://www.target.com")
        self.browser.find_element_by_id("account").click()
        wait(self.browser, 10).until(EC.element_to_be_clickable((By.ID, "accountNav-signIn"))).click()
        wait(self.browser, 10).until(EC.presence_of_element_located((By.ID, "username"))).send_keys(settings.target_user)
        password = self.browser.find_element_by_id("password")
        password.send_keys(settings.target_pass)
        self.browser.find_element_by_id("login").click()

        # Gives it time for the login to complete
        time.sleep(random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
Exemple #10
0
    def monitor(self):

        ## verify we have signed successfully else we should abort the task or attempt sign-in again
        # (TODO: add max attempts to sign-in before exiting task)
        if "user-message-initial" in self.browser.page_source:
            self.status_signal.emit(
                create_msg("Gamestop Successfully logged in...", "normal"))
        else:
            self.status_signal.emit(
                create_msg("Error logging in... please restart task",
                           "stopnow"))

        # TODO: Exit task if we are not signed in
        self.status_signal.emit(create_msg("Checking Stock..", "normal"))

        # self.browser.set_window_size(900, 900)

        self.browser.get(self.product)
        wait(self.browser, self.LONG_TIMEOUT).until(
            lambda _: self.browser.current_url == self.product)

        in_stock = False

        while not in_stock:
            try:
                wait(
                    self.browser,
                    random_delay(
                        self.monitor_delay, settings.random_delay_start,
                        settings.random_delay_stop)).until(
                            EC.element_to_be_clickable(
                                (By.XPATH,
                                 '//button[@data-buttontext="Add to Cart"]')))
                add_to_cart_btn = self.browser.find_element_by_xpath(
                    '//button[@data-buttontext="Add to Cart"]')
                add_to_cart_btn.click()
                time.sleep(1)
                if not add_to_cart_btn.is_enabled():
                    self.status_signal.emit(
                        create_msg("Waiting For Restock", "normal"))
                    self.browser.refresh()
                    continue
                in_stock = True
                self.status_signal.emit(create_msg("Added to cart", "normal"))
                self.browser.maximize_window()
                # remove stop temporarily to see if gamestop captcha is an issue
                # self.status_signal.emit(create_msg("Added to cart, check for captcha","stopnow"))
                self.browser.get("https://www.gamestop.com/cart/")
            except:
                self.status_signal.emit(
                    create_msg("Waiting For Restock", "normal"))
                self.browser.refresh()
Exemple #11
0
    def login(self):
        self.browser.get("https://www.target.com")
        self.browser.find_element_by_id("account").click()
        wait(self.browser, self.TIMEOUT_LONG).until(
            EC.element_to_be_clickable((By.ID, "accountNav-signIn"))).click()
        wait(self.browser, self.TIMEOUT_LONG).until(
            EC.presence_of_element_located((By.ID, "username")))
        self.fill_and_authenticate()

        # Gives it time for the login to complete
        time.sleep(
            random_delay(3, settings.random_delay_start,
                         settings.random_delay_stop))
Exemple #12
0
    def check_cart_items(self):
        headers = {
            "accept": "application/json, text/javascript, */*; q=0.01",
            "accept-encoding": "gzip, deflate, br",
            "accept-language": "en-US,en;q=0.9",
            "content-type": "application/json",
            "origin": "https://www.walmart.com",
            "referer": "https://www.walmart.com/checkout/",
            "user-agent": settings.userAgent,
            "wm_vertical_id": "0",
            "wm_cvv_in_session": "true",
        }

        profile = self.profile
        body = {"postalCode": profile["shipping_zipcode"], "city": profile["shipping_city"],
                "state": profile["shipping_state"], "isZipLocated": True, "crt:CRT": "", "customerId:CID": "",
                "customerType:type": "", "affiliateInfo:com.wm.reflector": "", "storeList": []}

        while True:
            self.status_signal.emit({"msg": "Loading Cart Items", "status": "normal"})
            try:
                r = self.session.post("https://www.walmart.com/api/checkout/v3/contract?page=CHECKOUT_VIEW", json=body,
                                      headers=headers)
                print(
                    r.text)  # this sometimes returns json data related to loading a captcha.js file so that could be intercepted when requests fail

                if r.status_code == 201 or r.status_code == 200:
                    r = json.loads(r.text)["items"][0]
                    item_id = r["id"]
                    fulfillment_option = r["fulfillmentSelection"]["fulfillmentOption"]
                    ship_method = r["fulfillmentSelection"]["shipMethod"]
                    self.status_signal.emit({"msg": "Loaded Cart Items", "status": "normal"})
                    return item_id, fulfillment_option, ship_method
                else:
                    if json.loads(r.text)["message"] == "Item is no longer in stock.":
                        self.status_signal.emit({"msg": "Waiting For Restock", "status": "normal"})
                        time.sleep(
                            random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop))
                    else:
                        if self.is_captcha(r.text):
                            self.handle_captcha("https://www.walmart.com/checkout")
                        self.status_signal.emit(
                            {"msg": "Error Loading Cart Items, Got Response: " + str(r.text), "status": "error"})
                        time.sleep(self.error_delay)
            except Exception as e:
                self.status_signal.emit({"msg": "Error Loading Cart Items (line {} {} {})".format(
                    sys.exc_info()[-1].tb_lineno, type(e).__name__, e), "status": "error"})
                time.sleep(self.error_delay)
Exemple #13
0
    def monitor(self):
        img_found = False
        in_stock = False

        self.browser.get(self.product)
        wait(self.browser, self.TIMEOUT_LONG).until(
            lambda _: self.browser.current_url == self.product)

        while not img_found:
            try:
                if not img_found:
                    product_img = self.browser.find_elements_by_class_name(
                        'swiper-zoom-container')[0].find_element_by_tag_name(
                            "img")
                    print(product_img)
                    self.image_signal.emit(product_img.get_attribute("src"))
                    self.product_image = product_img.get_attribute("src")
                    img_found = True
            except Exception as e:
                continue

        while not in_stock:
            add_to_cart_btn = None
            if len(
                    self.browser.find_elements_by_xpath(
                        '//button[@data-test= "orderPickupButton"]')) > 0:
                add_to_cart_btn = self.browser.find_element_by_xpath(
                    '//button[@data-test= "orderPickupButton"]')
            elif len(
                    self.browser.find_elements_by_xpath(
                        '//button[@data-test= "shipItButton"]')) > 0:
                add_to_cart_btn = self.browser.find_element_by_xpath(
                    '//button[@data-test= "shipItButton"]')
            else:
                self.status_signal.emit(
                    create_msg("Waiting on Restock", "normal"))
                time.sleep(
                    random_delay(self.monitor_delay,
                                 settings.random_delay_start,
                                 settings.random_delay_stop))
                self.browser.refresh()
                continue
            self.browser.execute_script(
                "return arguments[0].scrollIntoView(true);", add_to_cart_btn)
            add_to_cart_btn.click()
            in_stock = True
            self.status_signal.emit(create_msg("Added to cart", "normal"))
Exemple #14
0
    def monitor(self):
        if not self.MONITOR_ONLY:
            logged_in = False
            while not logged_in:
                try: 
                    wait(self.browser, self.LONG_TIMEOUT).until(lambda _: self.browser.current_url == "https://www.gamestop.com/account/")
                    self.status_signal.emit(create_msg("Successfully Logged In", "normal"))
                    logged_in = True
                except:
                    self.status_signal.emit(create_msg("Log in failed. Retrying.", "normal"))
                    self.login()
                
        if self.MONITOR_ONLY:
            time.sleep(random.randint(1, 4))

        self.status_signal.emit(create_msg("Checking Stock..", "normal"))

        self.browser.get(self.product)
        wait(self.browser, self.LONG_TIMEOUT).until(lambda _: self.browser.current_url == self.product)

        in_stock = False

        while not in_stock:
            try: 
                wait(self.browser, random_delay(self.monitor_delay, settings.random_delay_start, settings.random_delay_stop)).until(EC.element_to_be_clickable((By.XPATH, '//button[@data-buttontext="Add to Cart"]')))
                add_to_cart_btn = self.browser.find_element_by_xpath('//button[@data-buttontext="Add to Cart"]')
                home_delivery_option = self.browser.find_element_by_xpath('//input[@value="home"]')
                if not self.MONITOR_ONLY:
                    add_to_cart_btn.click()
                time.sleep(1)
                if not home_delivery_option.is_enabled() & add_to_cart_btn.is_enabled():
                    self.status_signal.emit(create_msg("Out of stock. Rechecking soon.", "normal"))
                    time.sleep(self.monitor_delay + random.randint(1, 4))
                    self.browser.refresh()
                    continue
                in_stock = True
                self.browser.save_screenshot("screenshots/gamestop_"+datetime.now().strftime('%s')+".png")
                if not self.MONITOR_ONLY:
                    self.status_signal.emit(create_msg("Added to cart", "normal"))
                    self.browser.get("https://www.gamestop.com/cart/")
                else:
                    self.status_signal.emit(create_msg("Item in stock. Sending notification", "normal"))
                    self.notify()
            except:
                self.status_signal.emit(create_msg("Waiting For Restock", "normal"))
                self.browser.refresh()
Exemple #15
0
    def monitor(self):
        self.in_stock = False
        self.browser.get(self.product)
        wait(self.browser, self.TIMEOUT_LONG).until(
            lambda _: self.browser.current_url == self.product)

        while not self.img_found:
            try:
                if not self.img_found:
                    product_img = self.browser.find_elements_by_class_name(
                        'slideDeckPicture')[0].find_element_by_tag_name("img")
                    self.image_signal.emit(product_img.get_attribute("src"))
                    self.product_image = product_img.get_attribute("src")
                    self.img_found = True
            except Exception as e:
                continue

        while not self.in_stock:
            try:
                self.in_stock = self.check_stock()
                if self.in_stock:
                    self.status_signal.emit(
                        create_msg("Item in stock...", "normal"))
                    self.browser.save_screenshot(
                        "screenshots/target_" + datetime.now().strftime('%s') +
                        ".png")

                    if self.MONITOR_ONLY:
                        self.notify()

                    time.sleep(15)
                    self.in_stock = False

                    continue
                else:
                    self.status_signal.emit(
                        create_msg("Waiting on Restock", "normal"))
                    time.sleep(
                        random_delay(self.monitor_delay,
                                     settings.random_delay_start,
                                     settings.random_delay_stop))
                    self.browser.refresh()
            except Exception as e:
                continue
Exemple #16
0
    def monitor(self):
        wait(self.browser,
             self.LONG_TIMEOUT).until(lambda _: self.browser.current_url ==
                                      "https://www.gamestop.com/account/")

        self.status_signal.emit(create_msg("Checking Stock..", "normal"))

        self.browser.set_window_size(900, 900)

        self.browser.get(self.product)
        wait(self.browser, self.LONG_TIMEOUT).until(
            lambda _: self.browser.current_url == self.product)

        in_stock = False

        while not in_stock:
            try:
                wait(
                    self.browser,
                    random_delay(
                        self.monitor_delay, settings.random_delay_start,
                        settings.random_delay_stop)).until(
                            EC.element_to_be_clickable(
                                (By.XPATH,
                                 '//button[@data-buttontext="Add to Cart"]')))
                add_to_cart_btn = self.browser.find_element_by_xpath(
                    '//button[@data-buttontext="Add to Cart"]')
                add_to_cart_btn.click()
                time.sleep(1)
                if not add_to_cart_btn.is_enabled():
                    self.status_signal.emit(
                        create_msg("Waiting For Restock", "normal"))
                    self.browser.refresh()
                    continue
                in_stock = True
                self.status_signal.emit(create_msg("Added to cart", "normal"))
                self.browser.get("https://www.gamestop.com/cart/")
            except:
                self.status_signal.emit(
                    create_msg("Waiting For Restock", "normal"))
                self.browser.refresh()
Exemple #17
0
 def login(self):
     self.browser.get("https://newegg.com")
     wait(self.browser, self.TIMEOUT_LONG).until(
         EC.element_to_be_clickable(
             (By.CSS_SELECTOR, ".nav-complex-title"))).click()
     wait(self.browser, self.TIMEOUT_LONG).until(
         EC.element_to_be_clickable(
             (By.ID,
              "labeled-input-signEmail"))).send_keys(settings.bestbuy_user)
     wait(self.browser, self.TIMEOUT_LONG).until(
         EC.element_to_be_clickable(
             (By.CSS_SELECTOR, ".btn-orange"))).click()
     wait(self.browser, self.TIMEOUT_LONG).until(
         EC.element_to_be_clickable(
             (By.ID,
              "labeled-input-password"))).send_keys(settings.bestbuy_pass)
     wait(self.browser, self.TIMEOUT_LONG).until(
         EC.element_to_be_clickable(
             (By.CSS_SELECTOR, ".btn-orange"))).click()
     time.sleep(
         random_delay(self.monitor_delay, settings.random_delay_start,
                      settings.random_delay_stop))
def request_www_westernunion_com(prepared_requests, emails, proxies=[]):
    worker_name = current_thread().name
    emails_per_request = int(len(emails) / len(prepared_requests)) + bool(len(emails) % len(prepared_requests))

    for idx, prepared_request in enumerate(prepared_requests):
        subset_start = idx * emails_per_request
        subset_end = subset_start + emails_per_request 

        for k, email in enumerate(emails[subset_start:subset_end]):
            try:
                request, session_id = prepared_request
                
                body = json.dumps({
                    'email': email,
                    'security': {
                        'session': {
                            'id': session_id
                        },
                        'version': '2'
                    },
                    'bashPath': '/us/en'
                }).encode()


                random_delay(1, 2)

                # Add the proxy and send the request
                if proxies:
                    request.set_proxy(proxies[k], 'http')
                response = urllib.request.urlopen(request, body)

                # Parse the message
                data = response.fp.read()
                if data:
                    # Get the message from the response
                    msg = json.loads(data.decode('utf-8'))['error']['message']

                    # Email IS NOT registered
                    if "We can't find that email address" in msg:
                        log.info('{}: Not registered: {}'.format(worker_name, email))
                        with open('data/not_registered_emails.txt', 'a') as fp:
                            fp.write(email)
                    
                    # Email IS registered
                    elif "There's already an account with this email address" in msg:
                        log.info('{}: Registered: {}'.format(worker_name, email))
                        with open('data/registered_emails.txt', 'a') as fp:
                            fp.write(email)
                    
                    # Other message
                    else:
                        log.info('{}: {}'.format(worker_name, msg))
                
                # No data received in response
                else:
                    log.info('{}: Status: {}, Message: {}, Data: {} ({})'.format(
                        worker_name, response.status, response.msg, data, email)
                    )
                    with open('data/failed_emails.txt', 'a') as fp:
                        fp.write(email)

            except urllib.error.URLError as e:
                logging.exception(e)

            except Exception as e:
                raise e
# read open source project names
open_source_projs = pd.read_csv('awesome_open_source_projs.csv')
# proj_names = list(open_source_projs['Project Name'])
proj_names = ['SirixDB', 'PublicLab.org']
# proj_urls = list(open_source_projs['Project Link'])
proj_urls = [
    str(open_source_projs.loc[open_source_projs['Project Name'] == i].iloc[0]
        ['Project Link']) for i in proj_names
]

for os_proj_url in proj_urls[company_lower_lim:company_upper_lim + 1]:
    print(os_proj_url)
    try:
        # driver.get(os_proj_url)

        time.sleep(random_delay(1, 3))

        # # grab relevant data: NUMBER of watchers, stars, forks, branches, tags
        # watch_elements = driver.find_elements_by_xpath('//a[contains(@aria-label, "users are watching this repository")]')
        # star_elements = driver.find_elements_by_xpath('//a[contains(@aria-label, "users starred this repository")]')
        # fork_elements = driver.find_elements_by_xpath('//a[contains(@aria-label, "users forked this repository")]')

        # # maintaining initialized value for later click
        # fork_element = None

        # if len(watch_elements) > 0:
        # 	try:
        # 		watch_element = watch_elements[0]
        # 		num_watchers = int(numbers.search(watch_element.get_attribute('aria-label')).group(1))
        # 		num_watchers_data.append(num_watchers)
        # 	except:
Exemple #20
0
from selenium import webdriver
from utils import random_delay
import pandas as pd
import time
import regex as re

main_proj_url_format = re.compile('(https:\/\/github\.com\/[^\/]+\/[^\/]+\/)')

driver = webdriver.Firefox()
driver.get('https://github.com/MunGell/awesome-for-beginners')

assert 'GitHub' in driver.title

time.sleep(random_delay(1, 3))

proj_link_elements = driver.find_elements_by_xpath(
    '//a[contains(@href, "/labels/")]')
proj_names = [i.text for i in proj_link_elements]
proj_links = [
    str(main_proj_url_format.search(i.get_attribute('href')).group(1))
    for i in proj_link_elements
]

print(proj_names)
print(proj_links)

assert len(proj_names) == len(proj_links)

print(len(proj_names))

proj_df = pd.DataFrame({
Exemple #21
0
fork_compare_text = []
base_urls_curr = []

os_proj_data = pd.read_csv('awesome_proj_data_github.csv')
for index, row in list(os_proj_data.iterrows())[start_lim:end_lim + 1]:
    base_url = row['Url']
    forked_urls = ast.literal_eval(row['Fork urls'])[fork_start:fork_end + 1]
    jobs = []
    fork_num = 0
    for f in forked_urls:
        curr_forked_url = f
        print(curr_forked_url)

        driver.get(curr_forked_url)
        time.sleep(random_delay(2, 5))

        try:
            # compare the fork to the base
            compare_elem = driver.find_element_by_xpath(
                "//*[contains(text(), 'This branch is')]")
            curr_fork_text = compare_elem.text
            print(curr_fork_text)
            fork_compare_text.append(curr_fork_text)
        except:
            fork_compare_text.append(None)

        base_urls_curr.append(curr_forked_url)

        # try:
        # 	# compare the fork to the base