def get_data_from_link(self,links): data = [] for link in links: new_animal = Animal() new_animal.type = 1 new_animal.status = 1 new_animal.city = "Kraków" new_animal.shelter = "Krakowskie Towarzystwo Opieki Nad Zwierzętami" new_animal.shelter_group = 2 record = { 'name': None, 'sex': None, 'age': None, 'color': None, 'description': None, 'vaccination': None, 'debug': None, 'ster': None, 'city': None, 'address': None, 'type': None, 'name': None, 'shelter': None, 'shelter_group': None, 'image': None, 'phone': None, 'mail': None, 'virtual_adoption': None, 'status': None } url = self.BASE_URL + link resp = requests.get(url) soup = BeautifulSoup(resp.text,'html.parser') new_animal.link = url # Imię try: record['name'] = soup.select('div.default_description > p')[0].getText().strip() name = record['name'] new_animal.name = name except Exception as e: print('Błąd z ustalaniem imienia!',e) #Inne - BRAK new_animal.sex = 3 new_animal.age_years = 0 #opis try: full_text = soup.select('div.default_description')[0] opis = full_text.select('p') opis = opis[6].getText().strip() #text = "\n".join([node.getText() for node in opis]) #text = opis.strip() new_animal.description = opis #print(text[:30]) except Exception as e: print('Błąd z ustalaniem opisu!' + record['name'] ,e) #foto try: img_url = soup.select('#photo0')[0]['href'] image_url = 'http://www.schronisko.krakow.pl' + img_url new_animal.image = image_url img_data = requests.get(image_url).content record['img'] = img_data name_filename = record['name'].replace(" ", "_") name_filename = name_filename.replace("/", "_") filename = "dog" + "_" + name + "_krakow" new_animal.identifier = filename #f = open('psy/'+filename,'wb') #f.write(img_data) #f.close() except Exception as e: print('Błąd z pobraniem zdjecia!' + record['name'] ,e) data.append(record) new_animal.save() return data
def get_data_from_link(self, links): data = [] for link in links: new_animal = Animal() new_animal.type = 2 new_animal.status = 1 new_animal.city = "Gdynia" new_animal.shelter = "Schronisko Ciapkowo w Gdyni" new_animal.shelter_group = 4 new_animal.sex = 1 new_animal.age_years = 0 record = { 'name': None, 'sex': None, 'age': None, 'color': None, 'description': None, 'vaccination': None, 'debug': None, 'ster': None, 'city': None, 'address': None, 'type': None, 'name': None, 'shelter': None, 'shelter_group': None, 'image': None, 'phone': None, 'mail': None, 'virtual_adoption': None, 'status': None } url = link resp = requests.get(url) soup = BeautifulSoup(resp.text, 'html.parser') new_animal.link = url # Imię try: record['name'] = soup.select( 'h1.entry-title')[0].getText().strip() name = record['name'] new_animal.name = name except Exception as e: print('Błąd z ustalaniem imienia!', e) #Opis: try: try: full_text = soup.select('div.entry-content')[0] opis = full_text.select('p') opis = opis[1:] text = "\n".join([node.getText() for node in opis]) text = text.strip() text = text[:text.lower().find(' #niekupujadoptuj')] except Exception: pass try: full_text = soup.select('div.entry-content')[0] opis = full_text.select('div.text_exposed') opis = opis[1:] text = "\n".join([node.getText() for node in opis]) text = text.strip() except Exception: pass new_animal.description = text except Exception: print('Błąd z pobraniem opisu!' + record['name']) #Inne - wiek, kastracja try: full_text = soup.select('div.entry-content')[0] base_info = full_text.select('div.text_exposed') base_info = base_info[0] #.getText().strip() except Exception: pass try: full_text = soup.select('div.entry-content')[0] base_info = full_text.select('p') base_info = base_info[0] #.getText().strip() except Exception: pass #foto try: imgs = [ node['href'] for node in soup.select( 'div.ngg-galleryoverview div.ngg-gallery-thumbnail-box div.ngg-gallery-thumbnail a' ) ] image_url = imgs[0] new_animal.image = image_url img_data = requests.get(image_url).content record['img'] = img_data name_filename = record['name'].replace(" ", "_") name_filename = name_filename.replace("/", "_") filename = "cat" + "_" + name + "_ciapkowo" new_animal.identifier = filename #f = open('koty/'+filename,'wb') #f.write(img_data) #f.close() except Exception as e: print('Błąd z pobraniem zdjecia!' + record['name'], e) data.append(record) new_animal.save() return data
def get_data_from_link(self, links): data = [] for link in links: new_animal = Animal() new_animal.type = 2 new_animal.status = 1 record = { 'name': None, 'sex': None, 'age': None, 'description': None, 'vaccination': None, 'debug': None, 'ster': None, 'city': None, 'address': None, 'type': None, 'name': None, 'shelter': None, 'shelter_group': None, 'image': None, 'phone': None, 'mail': None } url = self.BASE_URL + link resp = requests.get(url) soup = BeautifulSoup(resp.text, 'html.parser') new_animal.link = url # Imię try: record['name'] = soup.select( 'h1.SPTitle')[0].getText().strip() name = record['name'] new_animal.name = name except Exception as e: print('Błąd z ustalaniem imienia!', e) # Opis try: full_text = soup.select('.SPDetails')[0].getText().strip() full_text = full_text[full_text.lower().find('opis:') + 5:] desc = full_text[:full_text.lower().find('kontakt:')] desc = desc.strip() record['description'] = desc new_animal.description = desc except Exception as e: print('Błąd z ustalaniem opisu!', e) # Kontakt - nieudane :( try: full_text = soup.select('.SPDetails')[0].getText().strip() full_text = full_text[full_text.lower().find('kontakt:') + 8:] except Exception as e: print('Błąd z ustalaniem kontaktu!', e) # Inne dane - płeć, wiek, rasa, miasto, sterylizacja, odrobaczenie, szczepienia for field in soup.select('div.spField'): try: text = field.getText() (k, v) = text.strip().split(':', 2) k = k.lower().strip() v = v.strip() if k == 'płeć': v = v.lower().strip() if v == 'kot': record['sex'] = 1 else: record['sex'] = 2 new_animal.sex = record['sex'] elif k == 'wiek': v = int(v.lower().strip().split(' ')[0]) record['age'] = v new_animal.age = v new_animal.age_years = v elif k == 'rasa': record['type'] = v new_animal.breed = v elif k == 'miasto': record['city'] = v new_animal.city = v new_animal.shelter = "Schronisko w Milanówku" new_animal.shelter_group = 1 elif k == 'sterylizacja': record['ster'] = (v.lower() == 'tak') new_animal.sterilization = record['ster'] elif k == 'szczepienie p. wściekliźnie': record['vaccination'] = (v.lower() == 'tak') new_animal.vaccination = record['vaccination'] elif k == 'odrobaczenie': record['debug'] = (v.lower() == 'tak') new_animal.deworming = record['debug'] except Exception as e: pass #Zdjęcie try: img_url = soup.select( 'div.spField > img.spFieldsData.field_photo1' )[0]['src'] new_animal.image = img_url img_data = requests.get(img_url).content record['image'] = img_data name_filename = record['name'].replace(" ", "_") filename = "cat" + "_" + name_filename + "_milanowek" new_animal.identifier = filename #f = open('static/koty/'+filename,'wb') #f.write(img_data) #f.close() except Exception as e: print('Błąd z pobraniem zdjecia!', e) data.append(record) new_animal.save() return data
def get_data_from_link(self,links): data = [] for link in links: new_animal = Animal() new_animal.type = 2 new_animal.status = 1 new_animal.city = "Warszawa" new_animal.shelter = "Schronisko Na Paluchu" new_animal.shelter_group = 1 record = { 'name': None, 'sex': None, 'age': None, 'color': None, 'description': None, 'vaccination': None, 'debug': None, 'ster': None, 'city': None, 'address': None, 'type': None, 'name': None, 'shelter': None, 'shelter_group': None, 'image': None, 'phone': None, 'mail': None, 'virtual_adoption': None, 'status': None } url = self.BASE_URL + link resp = requests.get(url) soup = BeautifulSoup(resp.text,'html.parser') new_animal.link = url # Imię try: record['name'] = soup.select('h5')[0].getText().strip() name = record['name'] #name.encode('windows-1250').decode('utf-8') <- NIE DZIAŁA new_animal.name = name print(name) except Exception as e: print('Błąd z ustalaniem imienia!',e) # Opis try: try: full_text = soup.select('div.main_content div.description')[0].getText().strip() except Exception: full_text = '' record['description'] = full_text new_animal.description = full_text except Exception as e: print('Błąd z ustalaniem opisu!' + record['name'] ,e) # Inne dane - płeć, wiek, rasa for field in soup.select('div.info span'): try: text = field.getText() (k,v) = text.strip().split(':',2) k = k.lower().strip() v = v.strip() if k == 'påeä': v = v.lower().strip() if v == 'samiec': record['sex'] = 1 else: record['sex'] = 2 new_animal.sex = record['sex'] elif k == 'wiek': v = (v.lower().strip().split(' ')) if (v[1] == "rok") or (v[1] == "lat") or (v[1] == "lata"): v = int(v[0]) else: v = 1 record['age'] = v new_animal.age = v new_animal.age_years = v elif k == 'rasa': record['type'] = v new_animal.breed = v except Exception as e: pass #foto try: imgs = [] try: imgs += [ node['src'] for node in soup.select('div#main_image_cont > a > img.main_img_one')][:1] except Exception: pass try: imgs += [ node['href'] for node in soup.select('div#main_image_cont > a')][:1] except Exception: pass try: imgs += [ node['href'] for node in soup.select('div.ani_images > div.ani_image_bottom > a')] except Exception: pass if len(imgs)>1: imgs = imgs[1:] # pierwsze to zwykle miniatura imgs = [ i if i.startswith('http') else 'http://napaluchu.waw.pl/{}'.format(i) for i in imgs ] img_url = imgs[0] new_animal.image = img_url img_data = requests.get(img_url).content record['img'] = img_data name_filename = record['name'].replace(" ", "_") name_filename = name_filename.replace("/", "_") filename = "cat" + "_" + name_filename + "_paluch" new_animal.identifier = filename #f = open('psy/'+filename,'wb') #f.write(img_data) #f.close() except Exception as e: print('Błąd z pobraniem zdjecia!' + record['name'] ,e) data.append(record) new_animal.save() return data