def parse_table(rows, window_arrow): the_time = '' category = '' for row in rows: price = '' price_item = row.find('span', {'class': 'sell'}) if price_item: price = get_text_from_child(price_item) td_time_item = row.find('td', {'class': 'dateTime'}) if td_time_item: span_time_item = td_time_item.find('span', {'class': 'time'}) the_time = get_text_from_child(span_time_item) or the_time td_goods_item = row.find('td', {'class': 'goods'}) if td_goods_item: product_item = td_goods_item.find('div', { 'class': 'text' }).find('a') product_url = product_item['href'] prod_id = extract_value_from_url_key(product_url, 'goods_code') product_name = get_text_from_child(product_item) image = get_image_url_by_prod_id(prod_id) yield ProductInfo( name=product_name, start_time=the_time.split(' ~ ')[0], end_time=the_time.split(' ~ ')[1], category=category, price=price, image=image, product_id=prod_id, detail_product_url=product_url, )
def parse_table(rows): the_time = '' category = '' for row in rows: product_link = '' image_url = '' product_name = '' product_id = '' price = '' time_item = row.find('th') if time_item: the_time = get_text_from_child(time_item) category = get_text_from_child(time_item.find('span')) category = row.get('th', {}).get('span', '') or category product_item = row.find('td') product_item = product_item.find('div', {'class': 'layerUp'}) or product_item price_item = product_item.find('dl', {'class': 'price'}) product_item = product_item.find('dl', {'class': 'pdtTxts'}) or product_item product_item = product_item.find('dd', {'class': 'txt'}) or product_item product_link_item = product_item.find('a') if product_link_item: product_raw_link = product_link_item['href'] product_name = get_text_from_child(product_link_item) product_id = extract_value_from_url_key(product_raw_link, 'slitmCd') if product_id: product_link = get_detail_prod_url_by_prod_id(product_id) image_url = get_image_url_bu_prod_id(product_id) if price_item: price_item = price_item.find('span', {'class': 'txtStrong'}) if price_item: price = get_text_from_child(price_item) # yield the_time.split(' ~ ')[0], category, '' yield ProductInfo( name=product_name, start_time=the_time.split(' ~ ')[0], end_time=the_time.split(' ~ ')[1], category=category, shop_code='7', ch_no='10', shop_prod_id=product_id, product_id='001811' + product_id, detail_product_url=product_link, image=image_url, price=price, )
def parsing_td_desc(items): category = '' product_name = '' for td_item in items: span_item = td_item.find('span', {'class': 'category'}) if span_item: category_item = span_item.find('a', {'class': 'prod_link'}) if category_item: category = get_text_from_child(category_item) div_item = td_item.find('div', {'class': 'tdWrap'}) if div_item: product_item = div_item.find('a', {'class': 'prod_link'}, recursive=False) if product_item: product_name = get_text_from_child(product_item) return category, product_name
def parsing_td_price(items): the_price = '' for td_item in items: div_item = td_item.find('div', {'class': 'tdWrap'}) if div_item: price_item = div_item.find('ins') price_item = price_item.find('b') if price_item else price_item if price_item: the_price = get_text_from_child(price_item) return the_price
def lotte_home_shopping(window_arrow): print ('LOTTE') url = "http://www.lotteimall.com/main/viewMain.lotte?dpml_no=6&tab=3&tlog=19000_2" driver = webdriver.Firefox() driver.get(url) # soup = build_soup(driver.page_source) date_format = window_arrow.format('MM.DD') wait_for_condition(driver, By.XPATH, "//span[@class='rn_day']", 5) the_day_element = None for element in driver.find_elements_by_xpath("//span[@class='rn_day']"): if element.text == date_format: the_day_element = element break if not the_day_element: return the_day_element.click() if wait_for_condition(driver, By.LINK_TEXT, "이전 방송상품 보기", 2): prev_see_item = driver.find_element_by_link_text("이전 방송상품 보기") try: if prev_see_item.is_displayed(): driver.find_element_by_link_text("이전 방송상품 보기").click() except StaleElementReferenceException: pass html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML") soup = build_soup_from_page(html) item_list = soup.find('div', {'class': 'rn_tsitem_list'}).find_all('div', {'class': 'rn_tsitem_box'}) for item in item_list: prod_id = '' the_time = '' title = '' price = '' prod_detail_url = '' image_url = '' time_item = item.find('div', {'class': 'rn_tsitem_caption'}).find('span') the_time = get_text_from_child(time_item) view_list = soup.find_all('div', {'class': 'rn_tsitem_view'}) for view_item in view_list: image = view_item.find('img') if image: prod_id = image['src'].split('/')[-1].split('_')[0] info_item = item.find('div', {'class': 'rn_tsitem_info'}) title_item = info_item.find('a') if title_item: title = get_text_from_child(title_item) price_info_item = info_item.find('div', {'class': 'rn_tsitem_priceinfo'}) if price_info_item: price_item = price_info_item.find('span', {'class': 'rn_tsitem_price'}) if price_item: price = get_text_from_child(price_item) prod_detail_url = get_prod_url_by_prod_id(prod_id) image_url = get_image_url_by_prod_id(prod_id) yield ProductInfo( name=title, category='', start_time=the_time.split(' ~ ')[0], end_time=the_time.split(' ~ ')[1], detail_product_url=prod_detail_url, price=price, product_id=prod_id, image=image_url, )