def __get_ornament_links_by_year(url, links={}):
    driver.get(url)
    ornament_blocks = driver.find_elements_by_class_name("card")

    for block in ornament_blocks:
        link_block = block.find_element_by_class_name("card-figure")
        element = link_block.find_element_by_tag_name("a")
        ornament_link = element.get_attribute("href")

        name_block = block.find_element_by_class_name("card-title")
        element = name_block.find_element_by_tag_name("a")
        ornament_name = element.get_attribute("text")

        # if ornament_name in links.keys():
        #     print('duplicate ornament name found: {}'.format(ornament_name))

        links[ornament_name] = ornament_link

    try:
        next_page_block = driver.find_element_by_class_name(
            'pagination-item--next')
        next_page_link = next_page_block.find_element_by_tag_name(
            'a').get_attribute('href')

        # print('next page link found: {}'.format(next_page_link))
        __get_ornament_links_by_year(next_page_link, links)
    except Exception as err:
        print('no next page found from url: {} err {}'.format(url, err))

    return links
def get_year_links():
    url = 'https://www.ornament-shop.com/hallmark-ornaments-by-year.html'
    driver.get(url)

    content_blocks = driver.find_elements_by_class_name("navList-item")
    year_links = {}

    for block in content_blocks:
        element = block.find_element_by_tag_name("a")
        year_link = element.get_attribute("href")

        segments = year_link.split('/')
        year = segments[3].split('-')[0]

        year_links[year] = year_link

    return year_links
def __get_ornament_by_url(link):
    # navigate to the ornament details page
    driver.get(link)

    # define and grab all elements from the ornament details page
    brand_element = 'hallmark'
    sku_element = driver.find_element_by_xpath('//*[@id="product_id"]')
    price_element = driver.find_element_by_xpath('//*[@id="price"]')

    availability_element = driver.find_element_by_xpath(
        '//*[@id="availability"]')
    availability = availability_element.text
    if 'In Stock - Ships Next Business Day'.lower() in str(
            availability).lower():
        availability = 'available'
    else:
        availability = 'unavailable'

    # id_element = driver.find_element_by_xpath('//*[@id="add"]/input[1]')
    name_element = driver.find_element_by_xpath(
        '//*[@id="add"]/div[2]/div[2]/div[1]/h1')

    # make sure that there is a column for everything in the schema
    ornament_details = dict.fromkeys(COLUMNS, None)
    try:
        ornament_details["sku"] = sku_element.text
        ornament_details["price"] = price_element.text
        ornament_details["brand"] = brand_element
        ornament_details["availability"] = availability
        ornament_details["name"] = name_element.text
        # ornament_details["Product Id"] = id_element.get_attribute('value')
        ornament_details["vendor"] = integration_name
        ornament_details["link"] = link
    except Exception as err:
        print(
            'unable to sync integration {} using link {} ornament_details {} err {}'
            .format(integration_name, link, ornament_details, err))
    return ornament_details
def __get_ornament_by_url(link):
    # navigate to the ornament details page
    driver.get(link)

    # define and grab all elements from the ornament details page
    brand_element = driver.find_element_by_xpath(
        '/html/body/div[3]/div[1]/div[2]/div/div[1]/section[2]/div/dl/dd[4]/a/span'
    )
    sku_element = driver.find_element_by_xpath(
        '/html/body/div[3]/div[1]/div[2]/div/div[1]/section[2]/div/dl/dd[5]')
    name_element = driver.find_element_by_xpath(
        '/html/body/div[3]/div[1]/div[2]/div/div[1]/section[2]/div/h1')
    price_element = driver.find_element_by_xpath(
        '/html/body/div[3]/div[1]/div[2]/div/div[1]/section[2]/div/dl/dd[3]/div/span'
    )
    availability_element = driver.find_element_by_xpath(
        '/html/body/div[3]/div[1]/div[2]/div/div[1]/section[2]/div/dl/dd[6]')
    # id_element = driver.find_element_by_xpath('/html/body/div[3]/div[1]/div[2]/div/div[1]/section[3]/div[1]/form[1]/input[2]')

    availability = availability_element.text
    if 'new' in str(availability).lower() or 'in stock' in str(
            availability).lower():
        availability = 'available'
    else:
        availability = 'unavailable'

    # make sure that there is a column for everything in the schema
    ornament_details = dict.fromkeys(COLUMNS, None)
    ornament_details["sku"] = sku_element.text
    ornament_details["name"] = name_element.text
    ornament_details["price"] = price_element.text
    ornament_details["brand"] = brand_element.text
    ornament_details["availability"] = availability
    # ornament_details["vendor_id"] = id_element.get_attribute('value')
    ornament_details["vendor"] = integration_name
    ornament_details["link"] = link
    return ornament_details
def get_year_links():
    url = 'https://www.hookedonhallmark.com/keepsake-hallmark-ornaments-by-year.html'
    driver.get(url)

    content_blocks = driver.find_elements_by_class_name("columns-5")
    year_links = {}

    for content in content_blocks:
        for block in content.find_elements_by_tag_name("li"):
            element = block.find_element_by_tag_name("a")
            year_link = element.get_attribute("href")
            name_span = block.find_element_by_tag_name("span")

            try:
                year_img = name_span.find_element_by_tag_name("img")
                img_alt = year_img.get_attribute("alt")
                segments = img_alt.split(' ')
            except:
                segments = name_span.text.split(' ')

            year = segments[0]
            year_links[year] = year_link

    return year_links
def __get_ornament_links_by_year(url, links={}):
    driver.get(url)

    try:
        # if a view all button exists
        view_all_button = driver.find_element_by_xpath(
            '//*[@id="category"]/div/div/div/section[3]/div/ul[1]/li[1]/a')
        view_all_link = view_all_button.get_attribute('href')
        print('navigating to view all page at link {}'.format(view_all_link))
        driver.get(view_all_link)
    except:
        print('no view all button found on base year page. continuing')

    ornament_blocks = driver.find_elements_by_class_name("product-item")

    # try to find the category button that links to the "view all" page
    if len(ornament_blocks) == 0:
        category_blocks = driver.find_elements_by_class_name('sub-categories')
        reg = re.compile(r'view-all', re.IGNORECASE)

        for block in category_blocks:
            view_all_a = block.find_element_by_tag_name('a')
            view_all_link = view_all_a.get_attribute('href')
            is_valid_link = len(reg.findall(view_all_link)) > 0
            if is_valid_link:
                print('using view all page to get products {}'.format(
                    view_all_link))
                driver.get(view_all_link)
                ornament_blocks = driver.find_elements_by_class_name(
                    "product-item")
                break

    for block in ornament_blocks:
        name_block = block.find_element_by_class_name("name")
        ornament_name = name_block.text

        a = name_block.find_element_by_tag_name('a')
        ornament_link = a.get_attribute("href")

        # if ornament_name in links.keys():
        # print('duplicate ornament name found: {}'.format(ornament_name))

        links[ornament_name] = ornament_link

    try:
        pagination_block = driver.find_element_by_class_name('paging')
        paging_elements = pagination_block.find_elements_by_tag_name('a')

        next_page_link = None
        for a in paging_elements:
            if 'next' in a.text.lower():
                next_page_link = a.get_attribute('href')
                break

        if next_page_link is None:
            return links

        # print('next page link found: {}'.format(next_page_link))
        __get_ornament_links_by_year(next_page_link, links)
    except Exception as err:
        print('no next page found from url: {} err {}'.format(url, err))

    return links