-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler.py
89 lines (73 loc) · 3.84 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from time import sleep, ctime
from collections import namedtuple
import csv
listDetailsProducts = [['title', 'price', 'rating', 'numberOfSales', 'seller']]
class crawlerAliexpress():
#global numberItem
def __init__(self, searchName, numberPage = 1):
# Инициализация браузера.
opts = Options()
#opts.set_headless()
self.browser = Firefox(options=opts)
self.searchName = searchName
self.numberPage = numberPage
self.listDetailsProducts = []
currentUrl = f'https://aliexpress.ru/wholesale?SearchText={self.searchName}&page={self.numberPage}'
self.browser.get(currentUrl)
cookie = {'name': 'aep_usuc_f', 'value': 'isfm=y&site=rus&c_tp=RUB&isb=y®ion=RU&b_locale=ru_RU', 'domain': '.aliexpress.ru'}
self.browser.add_cookie(cookie)
self.browser.get(currentUrl)
sleep(1)
def scroll_down_page(self, speed=8):
current_scroll_position, new_height= 0, 1
while current_scroll_position <= new_height:
current_scroll_position += speed
self.browser.execute_script("window.scrollTo(0, {});".format(current_scroll_position))
new_height = self.browser.execute_script("return document.body.scrollHeight")
def getProductsDetail(self, countPage):
#scrollPauseTime = 2
self.countPage = countPage
self.scroll_down_page()
title = self.browser.find_elements_by_xpath("//li[@class='list-item']//div[@class='item-title-wrap']//a[@class='item-title']")
price = self.browser.find_elements_by_xpath("//li[@class='list-item']//div[@class='hover-help']//div[@class='item-price-row']")
rating = self.browser.find_elements_by_xpath("//li[@class='list-item']//div[@class='hover-help']//span[@class='rating-value']")
numberOfSales = self.browser.find_elements_by_xpath("//li[@class='list-item']//div[@class='hover-help']//a[@class='sale-value-link']")
seller = self.browser.find_elements_by_xpath("//li[@class='list-item']//div[@class='hover-help']//a[@class='store-name']")
itemsTitle = len(title)
itemsPrice = len(price)
itemsRating = len(rating)
itemsNumberOfSales = len(numberOfSales)
itemsSeller = len(seller)
for i in range(min(itemsTitle, itemsPrice, itemsRating, itemsNumberOfSales, itemsSeller)):
itemProduct = [title[i].text, price[i].text, rating[i].text, numberOfSales[i].text, seller[i].text]
listDetailsProducts.append(itemProduct)
self.paginator(self.countPage)
def saveInCsv(self, nameFile):
self.nameFile = nameFile
with open(self.nameFile, "w", newline='') as out_file:
writer = csv.writer(out_file)
writer.writerows(listDetailsProducts)
def check_exists_by_xpath(self, xpath):
try:
self.browser.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
def paginator(self, countPage):
sleep(2)
isPaginator = self.check_exists_by_xpath("//button[@class='next-btn next-medium next-btn-normal next-pagination-item next-next' and not(@disabled)]")
self.numberPage += 1
currentUrl = f'https://aliexpress.ru/wholesale?SearchText={self.searchName}&page={self.numberPage}'
if isPaginator and (countPage>1):
self.browser.get(currentUrl)
self.getProductsDetail(countPage-1)
else:
print('\nВсе страницы обработали')
self.browser.close()
if __name__ == '__main__':
test = crawlerAliexpress('собачка', 1)
test.getProductsDetail(4)
test.saveInCsv('out.csv')