コード例 #1
0
from scrap_images import scrap_image_url
from selenium.common.exceptions import StaleElementReferenceException
import pandas as pd

# creating an instance of google chrome
DRIVER_PATH = '/home/sk-ji/Desktop/chromedriver_linux64/chromedriver'

# to run chrome in a headfull moe(like regular chrome)
driver = webdriver.Chrome(executable_path=DRIVER_PATH)
current_page_url = driver.get(
    "https://www.flipkart.com/search?q=jeANS&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
)
#current_page_url = driver.get("https://www.flipkart.com/clothing-and-accessories/topwear/shirt/men-shirt/casual-shirt/pr?sid=clo,ash,axc,mmk,kp7&otracker=categorytree&otracker=nmenu_sub_Men_0_Casual%20Shirts")
DIRNAME = "jeans"
#DIRNAME = "Men_Shirt"
make_directory(DIRNAME)

start_page = 2
total_pages = 6

# Scraping the pages

for page in range(start_page, total_pages + 1):
    try:
        product_details = scrap_image_url(driver=driver)
        print("Scraping Page {0} of {1} pages".format(page, total_pages))

        #page_value = driver.find_element_by_xpath("//a[@class='_2Xp0TH fyt9Eu']").text
        page_value = driver.find_element_by_xpath(
            "//a[@class='_2Xp0TH fyt9Eu']").text
        print("The current page scraped is {}".format(page_value))
コード例 #2
0
from selenium import webdriver
from save_images import make_directory, save_images, save_data_to_csv
from scrap_images import scrap_image_url
from selenium.common.exceptions import StaleElementReferenceException

#creating a instance for google chrome
Driver_path = r'C:\Users\Fahad Ehtesham\Desktop\data Trained\FLIP ROBO\Web Scrapping\chromedriver.exe'

#to run chrome in headfull mode(like regular chrome)
driver = webdriver.Chrome(executable_path=Driver_path)
current_page_url = driver.get(
    'https://www.flipkart.com/clothing-and-accessories/topwear/shirt/men-shirt/formal-shirt/pr?sid=clo,ash,axc,mmk,bk1&otracker=categorytree&otracker=nmenu_sub_Men_0_Formal%20Shirts'
)

dir_name = 'men_shirt'
make_directory(dir_name)

start_page = 1
total_pages = 3

#scrapping the images
for page in range(start_page, total_pages + 1):
    try:
        product_details = scrap_image_url(driver=driver)
        print('scrapping page {0} of {1} pages '.format(page, total_pages))

        page_value = driver.find_element_by_xpath(
            r"//a[@class='_2Xp0TH fyt9Eu']").text
        print('The current page  scraped is {} '.format(page_value))

        #downlaoding the images