Ejemplo n.º 1
0
# http://propertydata.orangecountygov.com/imate/viewlist.aspx?sort=printkey&swis=3311

# get this cookie:
cookie_url = 'http://www.co.orange.ny.us/content/124/1368/4136.aspx'

towncode = str(swis)[0:4]
search = '/'.join([BASE_URL, 'viewlist..aspx?sort=printkey&swis={towncode}'])

# SWS = 332489; first four are town code
# 02200000090030000000

printkey = str(pin).strip(str(swis))
SEARCH_URL = ''.join([BASE_URL, '/propdetail.aspx?'])
prop_search = '&'.join(['swis={}'.format(swis),
                        'printkey={}'.format(printkey)])
full_url = SEARCH_URL + prop_search

# create driver
driver = Firefox()
driver.get(cookie_url)
link = driver.find_element_by_class_name('ApplyClass')
link.click()
response = driver.get(full_url)
response = driver.request('GET', full_url)
print(response)

# driver.save_screenshot(''.join([pin,'.png']))
# property_info = driver.find_element_by_id('pnlRTaxID')
# property_info.screenshot(''.join([pin,'.png']))
Ejemplo n.º 2
0
class RedBubble:
    def __init__(self):
        self.options = Options()
        self.options.set_preference("intl.accept_languages", 'en-us')
        self.options.set_preference('useAutomationExtension', False)
        self.options.set_preference('dom.webdriver.enabled', False)

        self.driver = Firefox(options=self.options)
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
        )
        self.driver.get("https://amiunique.org/fp")
        time.sleep(5)
        self.driver.get("https://antcpt.com/score_detector/")
        time.sleep(5)

    def get_to(self, url):
        if self.driver.current_url != url:
            self.driver.get(url)

    def download_image(self, file_name, image_url):
        if "x1000" in image_url:
            big_image_url = image_url.replace("1000x1000", "2000x2000")
            big_image_url = big_image_url.replace("750x1000", "1500x2000")
            big_image_url = big_image_url.replace("1000", "2000")
            big_image_url = big_image_url.replace("750", "1500")
            if self.download_image(file_name, big_image_url):
                print(f"Gotten bigger image for {file_name} @ {big_image_url}")
                return True
        try:
            res = self.driver.request('GET', image_url)
        except requests.exceptions.ConnectionError:
            time.sleep(60)
            res = self.driver.request('GET', image_url)

        if 200 <= res.status_code < 300:
            im = Image.open(io.BytesIO(res.content))

            width, height = im.size

            # Add watermark
            if max(width, height) >= 2000:
                font = ImageFont.truetype('Arial.ttf', 72)
            else:
                font = ImageFont.truetype('Arial.ttf', 36)

            draw = ImageDraw.Draw(im)
            text = "DuckHunt.me"

            fill_color = (128, 0, 0)

            textwidth, textheight = draw.textsize(text, font)

            margin = 30
            x = width - textwidth - margin
            y = height - textheight - margin
            try:
                draw.text((x, y), text, font=font, fill=fill_color)
            except TypeError:
                draw.text((x, y), text, font=font, fill=128)

            # Write to file
            im.save(file_name)

            return True
        else:
            print(f"Download of {file_name} (@{image_url}) FAILED.")
            return False

    def clean_html(self, html_string):
        tree = html.fromstring(html_string)

        cleaner = html.clean.Cleaner()
        cleaner.safe_attrs_only = True
        cleaner.safe_attrs = frozenset(['id'])
        cleaned = cleaner.clean_html(tree)
        return html.tostring(cleaned, encoding='unicode')

    def dismiss_cookie_popup(self):
        self.dismiss_member_prompt()
        try:
            self.driver.find_element_by_class_name(
                "Toastify__toast-body").find_element_by_tag_name(
                    "button").click()
            return True
        except (NoSuchElementException, ElementClickInterceptedException):
            return False

    def dismiss_member_prompt(self):
        try:
            self.driver.find_element_by_class_name(
                "sailthru-overlay-close").click()
            return True
        except NoSuchElementException:
            return False

    def login(self):
        self.driver.get("https://www.redbubble.com/en/auth/login")

        username = self.driver.find_element_by_xpath(
            '//*[@id="ReduxFormInput1"]')
        username.send_keys(USERNAME)

        password = self.driver.find_element_by_xpath(
            '//*[@id="ReduxFormInput2"]')
        password.send_keys(PASSWORD)

        connect = self.driver.find_element_by_xpath(
            '/html/body/div[1]/div[7]/div[2]/div[2]/div/form/span/button')

        prev_url = self.driver.current_url
        connect.click()
        time.sleep(5)

        while prev_url == self.driver.current_url:
            print("Please solve captcha")
            time.sleep(1)

    def change_locale(self):
        self.get_to("https://www.redbubble.com/settings/show")

        locale_dropdown = Select(
            self.driver.find_element_by_xpath('//*[@id="settings_locale"]'))
        locale_dropdown.select_by_visible_text("English")

        country_code_dropdown = Select(
            self.driver.find_element_by_xpath(
                '//*[@id="settings_country_code"]'))
        country_code_dropdown.select_by_value("US")

        currency_dropdown = Select(
            self.driver.find_element_by_xpath(
                '//*[@id="settings_currency_iso"]'))
        currency_dropdown.select_by_value("USD")

        try:
            button = self.driver.find_element_by_xpath(
                '/html/body/div/form/div[4]/input')
        except NoSuchElementException:
            button = self.driver.find_element_by_xpath(
                '/html/body/div[1]/div[6]/div[2]/form/div[4]/button')

        button.click()

    def get_works_urls(self):
        self.get_to("https://www.redbubble.com/en/portfolio/manage_works")
        self.dismiss_cookie_popup()
        works = []

        links = self.driver.find_elements_by_class_name(
            'works_work-menu-option')
        for a in links:
            if a.tag_name != 'a':
                continue

            href = a.get_attribute('href')
            if href.startswith(
                    "https://www.redbubble.com/people/duckhuntdiscord/works"):
                works.append(href)

        random.shuffle(works)
        return works

    def get_work_products_urls(self, work_url):
        self.get_to(work_url)
        products_urls = []

        links = self.driver.find_elements_by_class_name('carousel_item-link')
        for a in links:
            if a.tag_name != 'a':
                continue

            href = a.get_attribute('href')
            if href.startswith("https://www.redbubble.com/i/"):
                products_urls.append(href)

        random.shuffle(products_urls)
        return products_urls

    def get_work_info(self, work_url):
        self.get_to(work_url)

        return {
            "name":
            self.driver.find_element_by_class_name(
                'work-information_title').text,
            "url":
            urljoin(work_url,
                    urlparse(work_url).path)
        }

    def _get_colorswatch(self):
        try:
            colors_button = self.driver.find_element_by_css_selector(
                "[class^='ColorPickerActivator__colorPickerActivator--']")
        except NoSuchElementException:
            colors_button = None

        if colors_button:
            colors_button.click()

        try:
            colors_swatch = self.driver.find_elements_by_css_selector(
                "[class^='DesktopColorControls__swatch--']")
        except NoSuchElementException:
            colors_swatch = [None]

        if len(colors_swatch) == 0:
            colors_swatch = [None]

        return colors_swatch

    def _get_print_locations(self):
        print_locations = self.driver.find_elements_by_name("printLocation")

        if not print_locations:
            print_locations = [None]

        return print_locations

    def _get_sizes(self):
        sizes = self.driver.find_elements_by_name("size")

        if not sizes:
            sizes = [None]

        return sizes

    def download_product_information(self, product_url, work_info):
        self.get_to(product_url)
        self.dismiss_cookie_popup()

        varients = []

        product_name = self.driver.find_element_by_tag_name('h1').text
        print(f"Downloading {product_name}...")

        download_to = DOWNLOAD_DIR / work_info["name"] / product_name
        download_to.mkdir(exist_ok=True, parents=True)

        if (download_to / "download.json").exists():
            print("Skipping : already downloaded")
            return

        colors = len(self._get_colorswatch())
        print(f"Found {colors} colors_swatch.")
        for color_n in range(colors):
            color_element = self._get_colorswatch()[color_n]

            if color_element is None:
                color_product_name = None
            else:
                color_product_name = color_element.get_attribute('title')
                color_element.click()
                self.dismiss_cookie_popup()

            print_locations_count = len(self._get_print_locations())
            print(f"Found {print_locations_count} print locations.")

            for print_location_n in range(print_locations_count):
                print_location_element = self._get_print_locations(
                )[print_location_n]
                if print_location_element is None:
                    print_location_text = None
                else:
                    label = print_location_element.find_element_by_xpath('..')
                    print_location_text = label.text
                    label.click()
                    self.dismiss_cookie_popup()

                sizes = self._get_sizes()
                sizes_names = []
                for size_element in sizes:
                    if size_element is None:
                        continue
                    else:
                        label = size_element.find_element_by_xpath('..')
                        size_name = label.text
                        sizes_names.append(size_name)

                time.sleep(1)  # Time for images to load
                images = self.driver.find_elements_by_tag_name('img')

                images_downloaded = 0
                images_files = []

                for img in images:
                    src = img.get_attribute('src')
                    klass = img.get_attribute('class')
                    if src.startswith("https://ih1.redbubble.net/image"
                                      ) and "GalleryImage__img--" in klass:
                        main = "PreviewGallery__rightColumn--" in img.find_element_by_xpath(
                            '..').find_element_by_xpath(
                                '..').find_element_by_xpath(
                                    '..').get_attribute('class')

                        img_folder = download_to / f"{color_product_name} - {print_location_text}"
                        img_folder.mkdir(parents=True, exist_ok=True)
                        image_name = img_folder / f"{images_downloaded}.jpg"
                        dld = self.download_image(file_name=image_name,
                                                  image_url=src)
                        if dld:
                            images_downloaded += 1
                            images_files.append({
                                "link": str(image_name),
                                "main": main
                            })

                price = ''
                config_div = self.driver.find_element_by_tag_name(
                    'h1').find_element_by_xpath('..')
                prices_maybe = config_div.find_elements_by_tag_name('span')

                for price_maybe in prices_maybe:
                    try:
                        price = price_maybe.find_element_by_tag_name(
                            'span').text
                        break
                    except NoSuchElementException:
                        continue

                varient = {
                    "color_product_name": color_product_name,
                    "print_location_text": print_location_text,
                    "sizes_names": sizes_names,
                    "images_count": images_downloaded,
                    "images": images_files,
                    "price": price,
                    "url": self.driver.current_url,
                }

                print(varient)

                varients.append(varient)
        try:
            features = self.driver.find_element_by_xpath("//*[contains(text(), 'Features')]")\
                                  .find_element_by_xpath('..')\
                                  .find_element_by_tag_name('ul')\
                                  .get_attribute('outerHTML')
        except NoSuchElementException:
            features = self.driver.find_element_by_xpath("//*[contains(text(), 'Features')]") \
                .find_element_by_xpath('..') \
                .find_element_by_xpath('..') \
                .find_element_by_tag_name('ul') \
                .get_attribute('outerHTML')

        with open(download_to / "download.json", "w") as f:
            json.dump(
                {
                    "varients": varients,
                    "features_html": self.clean_html(features),
                    "work": work_info,
                },
                f,
                sort_keys=True)

    def main(self):
        self.login()
        self.change_locale()
        works_urls = self.get_works_urls()
        print(f"Discovered {len(works_urls)} works")
        for work_url in works_urls:
            work_info = self.get_work_info(work_url)
            # if (DOWNLOAD_DIR / work_info["name"]).exists():
            #     print(f"Skipping {work_info['name']} because it was already downloaded.")
            #     continue
            product_urls = self.get_work_products_urls(work_url)
            for product_url in product_urls:
                ok = False
                while not ok:
                    try:
                        self.download_product_information(
                            product_url, work_info)
                        ok = True
                    except (StaleElementReferenceException,
                            ElementNotInteractableException,
                            NoSuchElementException) as e:
                        print(f"That failed {e}, retrying...")

        self.driver.quit()
Ejemplo n.º 3
0
    return results


#%% Solve an individual game currently on page
def solve_game(window):
    cards = organize_cards(window.find_elements_by_class_name('set-board-card'))
    sets = evaluate_cards(cards)

    for s in sets:
        for c in s:
            c[0].click()
    sleep(1)


#%% Initialize Browser
browser = Firefox()

browser.get('https://www.nytimes.com/puzzles/set')
#Scroll away from banner ad
browser.execute_script("window.scrollTo(0, 400)")
sleep(1)


#%% Select each tab and solve the game.
for i in range(4):
    solve_game(browser)
    try:
        browser.find_element_by_class_name('pzm-modal__button').click()
    except:
        pass