def scrape_name(request): # username = request.GET.get('username', None) i = 1080 driver = '' while(i<=1086): try: option = webdriver.ChromeOptions() option.add_argument("window-size=1280,800") # option.add_argument("--headless") driver = webdriver.Chrome(ChromeDriverManager().install(), options=option) driver.get('https://sfera.sferabit.com/servizi/alboonlineBoot/index.php?id='+str(i)) original_window = driver.current_window_handle # Check we don't have other windows open already assert len(driver.window_handles) == 1 driver.switch_to.window(driver.window_handles[0]) name_input = driver.find_element(By.ID, 'filtroRagioneSociale') name_input.send_keys('an') send_button = driver.find_element(By.CSS_SELECTOR, 'button.btn-primary') ActionChains(driver).move_to_element(send_button).click(send_button).perform() driver.implicitly_wait(10) td = driver.find_element(By.CSS_SELECTOR, '#risultatoRicerca>table.table tr>td:last-child').get_attribute('innerHTML') td = td.split(" ")[-2].strip() city = City(url_id=i, city_name=td) city.save() except: print(str(i)+" does not support") finally: i = i + 1 driver.quit() data = { 'is_taken': 'sdfsdfsdfsdfsdf' } return JsonResponse(data)
def parse(self, **kwargs): worksheet = GoogleSheetsParser.get_worksheet(self, '1Mp9r7CNxVnKip-tLAFpbGp4K_MY2iUrbrBOQBcsKLVE') i = 2 while True: values_list = worksheet.row_values(i) i += 1 if not values_list[0]: break try: country = Country.objects.get( name=values_list[4] ) except ObjectDoesNotExist: country = Country( name=values_list[4] ) self.country_count += 1 country.save() try: region = Region.objects.get( name=values_list[1] ) except ObjectDoesNotExist: region = Region( name=values_list[1], country=country ) self.region_count += 1 region.save() try: city = City.objects.get( name=values_list[0] ) except ObjectDoesNotExist: city = City( name=values_list[0], lat=values_list[2], lon=values_list[3], region=region ) self.city_count += 1 city.save() return [ 'New Countries: ' + str(self.country_count), 'New Regions: ' + str(self.region_count), 'New Cities: ' + str(self.city_count), ]
def add_city(): """Добавить области в БД. Запустить только один раз!""" raions = ('Алексинский район', 'Арсеньевский район', 'Белёвский район', 'Богородицкий район', 'Венёвский район', 'Воловский район', 'Дубенский район', 'Ефремовский район', 'Заокский район', 'Каменский район', 'Кимовский район', 'Киреевский район', 'Куркинский район', 'Ленинский район', 'Новомосковский район', 'Одоевский район', 'Плавский район', 'Суворовский район', 'Тёпло-Огарёвский район', 'Узловский район', 'Чернский район', 'Щёкинский район', 'Ясногорский район') region = Region() region.name = 'Тульская область' region.save() for raion in raions: city = City() city.region = region city.name = raion city.save()
def parse(self, **kwargs): worksheet = GoogleSheetsParser.get_worksheet(self, '15Q8sDyG_eBUHMcriIAHTmwDcdSdJSSLNAo34iBZKyJk') i = 2 while True: values_list = worksheet.row_values(i) i += 1 if not values_list[0]: break if values_list[5] == kwargs['options']['wave']: try: university = University.objects.get( name=values_list[0] ) except ObjectDoesNotExist: try: city = City.objects.get( name=values_list[3] ) except ObjectDoesNotExist: try: region = Region.objects.get( name='unknown' ) except ObjectDoesNotExist: try: country = Country.objects.get( name=values_list[2] ) except ObjectDoesNotExist: country = Country( name=values_list[2] ) country.save() region = Region( name='unknown', country=country, ) self.unknown_region = True region.save() city = City( name=values_list[3], region=region, lat=0, lon=0 ) self.city_count += 1 city.save() university = University( name=values_list[0], abbr=values_list[1], city=city, site=values_list[4] ) self.university_count += 1 university.save() result = [ 'Wave ' + str(kwargs['options']['wave']), 'New University: ' + str(self.university_count), ] if self.city_count > 0: result.append('------') result.append('Check this situation! It is not normal!') result.append('------') result.append('New City: ' + str(self.city_count)) if self.unknown_region: result.append('Added Region "unknown"') return result