Example #1
0
def parse_table(rows, window_arrow):
    the_time = ''
    category = ''
    for row in rows:
        price = ''
        price_item = row.find('span', {'class': 'sell'})
        if price_item:
            price = get_text_from_child(price_item)

        td_time_item = row.find('td', {'class': 'dateTime'})
        if td_time_item:
            span_time_item = td_time_item.find('span', {'class': 'time'})
            the_time = get_text_from_child(span_time_item) or the_time

        td_goods_item = row.find('td', {'class': 'goods'})
        if td_goods_item:
            product_item = td_goods_item.find('div', {
                'class': 'text'
            }).find('a')
            product_url = product_item['href']
            prod_id = extract_value_from_url_key(product_url, 'goods_code')
            product_name = get_text_from_child(product_item)
            image = get_image_url_by_prod_id(prod_id)
            yield ProductInfo(
                name=product_name,
                start_time=the_time.split(' ~ ')[0],
                end_time=the_time.split(' ~ ')[1],
                category=category,
                price=price,
                image=image,
                product_id=prod_id,
                detail_product_url=product_url,
            )
Example #2
0
def parse_table(rows):
    the_time = ''
    category = ''
    for row in rows:
        product_link = ''
        image_url = ''
        product_name = ''
        product_id = ''
        price = ''
        time_item = row.find('th')
        if time_item:
            the_time = get_text_from_child(time_item)
            category = get_text_from_child(time_item.find('span'))
            category = row.get('th', {}).get('span', '') or category
        product_item = row.find('td')
        product_item = product_item.find('div',
                                         {'class': 'layerUp'}) or product_item
        price_item = product_item.find('dl', {'class': 'price'})
        product_item = product_item.find('dl',
                                         {'class': 'pdtTxts'}) or product_item
        product_item = product_item.find('dd',
                                         {'class': 'txt'}) or product_item
        product_link_item = product_item.find('a')
        if product_link_item:
            product_raw_link = product_link_item['href']
            product_name = get_text_from_child(product_link_item)
            product_id = extract_value_from_url_key(product_raw_link,
                                                    'slitmCd')
            if product_id:
                product_link = get_detail_prod_url_by_prod_id(product_id)
                image_url = get_image_url_bu_prod_id(product_id)

        if price_item:
            price_item = price_item.find('span', {'class': 'txtStrong'})
            if price_item:
                price = get_text_from_child(price_item)

        # yield the_time.split(' ~ ')[0], category, ''
        yield ProductInfo(
            name=product_name,
            start_time=the_time.split(' ~ ')[0],
            end_time=the_time.split(' ~ ')[1],
            category=category,
            shop_code='7',
            ch_no='10',
            shop_prod_id=product_id,
            product_id='001811' + product_id,
            detail_product_url=product_link,
            image=image_url,
            price=price,
        )
Example #3
0
def parsing_td_desc(items):
    category = ''
    product_name = ''
    for td_item in items:
        span_item = td_item.find('span', {'class': 'category'})
        if span_item:
            category_item = span_item.find('a', {'class': 'prod_link'})
            if category_item:
                category = get_text_from_child(category_item)
        div_item = td_item.find('div', {'class': 'tdWrap'})
        if div_item:
            product_item = div_item.find('a', {'class': 'prod_link'}, recursive=False)
            if product_item:
                product_name = get_text_from_child(product_item)
    return category, product_name
Example #4
0
def parsing_td_price(items):
    the_price = ''
    for td_item in items:
        div_item = td_item.find('div', {'class': 'tdWrap'})
        if div_item:
            price_item = div_item.find('ins')
            price_item = price_item.find('b') if price_item else price_item
            if price_item:
                the_price = get_text_from_child(price_item)
    return the_price
def lotte_home_shopping(window_arrow):
    print ('LOTTE')
    url = "http://www.lotteimall.com/main/viewMain.lotte?dpml_no=6&tab=3&tlog=19000_2"
    driver = webdriver.Firefox()
    driver.get(url)
    # soup = build_soup(driver.page_source)

    date_format = window_arrow.format('MM.DD')
    wait_for_condition(driver, By.XPATH, "//span[@class='rn_day']", 5)
    the_day_element = None
    for element in driver.find_elements_by_xpath("//span[@class='rn_day']"):
        if element.text == date_format:
            the_day_element = element
            break
    if not the_day_element:
        return
    the_day_element.click()

    if wait_for_condition(driver, By.LINK_TEXT, "이전 방송상품 보기", 2):
        prev_see_item = driver.find_element_by_link_text("이전 방송상품 보기")
        try:
            if prev_see_item.is_displayed():
                driver.find_element_by_link_text("이전 방송상품 보기").click()
        except StaleElementReferenceException:
            pass

    html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
    soup = build_soup_from_page(html)

    item_list = soup.find('div', {'class': 'rn_tsitem_list'}).find_all('div', {'class': 'rn_tsitem_box'})
    for item in item_list:
        prod_id = ''
        the_time = ''
        title = ''
        price = ''
        prod_detail_url = ''
        image_url = ''

        time_item = item.find('div', {'class': 'rn_tsitem_caption'}).find('span')
        the_time = get_text_from_child(time_item)

        view_list = soup.find_all('div', {'class': 'rn_tsitem_view'})
        for view_item in view_list:
            image = view_item.find('img')
            if image:
                prod_id = image['src'].split('/')[-1].split('_')[0]

        info_item = item.find('div', {'class': 'rn_tsitem_info'})
        title_item = info_item.find('a')
        if title_item:
            title = get_text_from_child(title_item)
        price_info_item = info_item.find('div', {'class': 'rn_tsitem_priceinfo'})
        if price_info_item:
            price_item = price_info_item.find('span', {'class': 'rn_tsitem_price'})
            if price_item:
                price = get_text_from_child(price_item)

        prod_detail_url = get_prod_url_by_prod_id(prod_id)
        image_url = get_image_url_by_prod_id(prod_id)

        yield ProductInfo(
            name=title,
            category='',
            start_time=the_time.split(' ~ ')[0],
            end_time=the_time.split(' ~ ')[1],
            detail_product_url=prod_detail_url,
            price=price,
            product_id=prod_id,
            image=image_url,
        )