def get_data_from_link(self,links):
            data = []
            for link in links:
                new_animal = Animal()
                new_animal.type = 1
                new_animal.status = 1
                new_animal.city = "Kraków"
                new_animal.shelter = "Krakowskie Towarzystwo Opieki Nad Zwierzętami"
                new_animal.shelter_group = 2
                record = {
                    'name': None,
                    'sex': None,
                    'age': None,
                    'color': None,
                    'description': None,
                    'vaccination': None,
                    'debug': None,
                    'ster': None,
                    'city': None,
                    'address': None,
                    'type': None,
                    'name': None,
                    'shelter': None,
                    'shelter_group': None,
                    'image': None,
                    'phone': None,
                    'mail': None,
                    'virtual_adoption': None,
                    'status': None
                }
                url = self.BASE_URL + link
                resp = requests.get(url)
                soup = BeautifulSoup(resp.text,'html.parser')
                new_animal.link = url

                # Imię
                try:
                    record['name'] = soup.select('div.default_description > p')[0].getText().strip()
                    name = record['name']
                    new_animal.name = name
                except Exception as e:
                    print('Błąd z ustalaniem imienia!',e)

                #Inne - BRAK
                new_animal.sex = 3
                new_animal.age_years = 0

                #opis
                try:
                    full_text = soup.select('div.default_description')[0]
                    opis = full_text.select('p')
                    opis = opis[6].getText().strip()
                    #text = "\n".join([node.getText() for node in opis])
                    #text = opis.strip()
                    new_animal.description = opis
                    #print(text[:30])
                except Exception as e:
                    print('Błąd z ustalaniem opisu!' + record['name'] ,e)

                #foto
                try:
                    img_url = soup.select('#photo0')[0]['href']
                    image_url = 'http://www.schronisko.krakow.pl' + img_url
                    new_animal.image = image_url
                    img_data = requests.get(image_url).content
                    record['img'] = img_data
                    name_filename = record['name'].replace(" ", "_")
                    name_filename = name_filename.replace("/", "_")
                    filename = "dog" + "_" + name + "_krakow"
                    new_animal.identifier = filename
                    #f = open('psy/'+filename,'wb')
                    #f.write(img_data)
                    #f.close()
                except Exception as e:
                    print('Błąd z pobraniem zdjecia!' + record['name'] ,e)
                data.append(record)
                new_animal.save()
            return data
Example #2
0
        def get_data_from_link(self, links):
            data = []
            for link in links:
                new_animal = Animal()
                new_animal.type = 2
                new_animal.status = 1
                new_animal.city = "Gdynia"
                new_animal.shelter = "Schronisko Ciapkowo w Gdyni"
                new_animal.shelter_group = 4
                new_animal.sex = 1
                new_animal.age_years = 0
                record = {
                    'name': None,
                    'sex': None,
                    'age': None,
                    'color': None,
                    'description': None,
                    'vaccination': None,
                    'debug': None,
                    'ster': None,
                    'city': None,
                    'address': None,
                    'type': None,
                    'name': None,
                    'shelter': None,
                    'shelter_group': None,
                    'image': None,
                    'phone': None,
                    'mail': None,
                    'virtual_adoption': None,
                    'status': None
                }
                url = link
                resp = requests.get(url)
                soup = BeautifulSoup(resp.text, 'html.parser')
                new_animal.link = url

                # Imię
                try:
                    record['name'] = soup.select(
                        'h1.entry-title')[0].getText().strip()
                    name = record['name']
                    new_animal.name = name
                except Exception as e:
                    print('Błąd z ustalaniem imienia!', e)

                #Opis:
                try:
                    try:
                        full_text = soup.select('div.entry-content')[0]
                        opis = full_text.select('p')
                        opis = opis[1:]
                        text = "\n".join([node.getText() for node in opis])
                        text = text.strip()
                        text = text[:text.lower().find(' #niekupujadoptuj')]
                    except Exception:
                        pass
                    try:
                        full_text = soup.select('div.entry-content')[0]
                        opis = full_text.select('div.text_exposed')
                        opis = opis[1:]
                        text = "\n".join([node.getText() for node in opis])
                        text = text.strip()
                    except Exception:
                        pass
                    new_animal.description = text
                except Exception:
                    print('Błąd z pobraniem opisu!' + record['name'])

                #Inne - wiek, kastracja
                try:
                    full_text = soup.select('div.entry-content')[0]
                    base_info = full_text.select('div.text_exposed')
                    base_info = base_info[0]  #.getText().strip()

                except Exception:
                    pass
                try:
                    full_text = soup.select('div.entry-content')[0]
                    base_info = full_text.select('p')
                    base_info = base_info[0]  #.getText().strip()
                except Exception:
                    pass

                #foto
                try:
                    imgs = [
                        node['href'] for node in soup.select(
                            'div.ngg-galleryoverview div.ngg-gallery-thumbnail-box div.ngg-gallery-thumbnail a'
                        )
                    ]
                    image_url = imgs[0]
                    new_animal.image = image_url
                    img_data = requests.get(image_url).content
                    record['img'] = img_data
                    name_filename = record['name'].replace(" ", "_")
                    name_filename = name_filename.replace("/", "_")
                    filename = "cat" + "_" + name + "_ciapkowo"
                    new_animal.identifier = filename
                    #f = open('koty/'+filename,'wb')
                    #f.write(img_data)
                    #f.close()
                except Exception as e:
                    print('Błąd z pobraniem zdjecia!' + record['name'], e)
                data.append(record)
                new_animal.save()
            return data
Example #3
0
        def get_data_from_link(self, links):
            data = []
            for link in links:
                new_animal = Animal()
                new_animal.type = 2
                new_animal.status = 1
                record = {
                    'name': None,
                    'sex': None,
                    'age': None,
                    'description': None,
                    'vaccination': None,
                    'debug': None,
                    'ster': None,
                    'city': None,
                    'address': None,
                    'type': None,
                    'name': None,
                    'shelter': None,
                    'shelter_group': None,
                    'image': None,
                    'phone': None,
                    'mail': None
                }
                url = self.BASE_URL + link
                resp = requests.get(url)
                soup = BeautifulSoup(resp.text, 'html.parser')
                new_animal.link = url

                # Imię
                try:
                    record['name'] = soup.select(
                        'h1.SPTitle')[0].getText().strip()
                    name = record['name']
                    new_animal.name = name
                except Exception as e:
                    print('Błąd z ustalaniem imienia!', e)

                # Opis
                try:
                    full_text = soup.select('.SPDetails')[0].getText().strip()
                    full_text = full_text[full_text.lower().find('opis:') + 5:]
                    desc = full_text[:full_text.lower().find('kontakt:')]
                    desc = desc.strip()
                    record['description'] = desc
                    new_animal.description = desc
                except Exception as e:
                    print('Błąd z ustalaniem opisu!', e)

                # Kontakt - nieudane :(
                try:
                    full_text = soup.select('.SPDetails')[0].getText().strip()
                    full_text = full_text[full_text.lower().find('kontakt:') +
                                          8:]
                except Exception as e:
                    print('Błąd z ustalaniem kontaktu!', e)

                # Inne dane - płeć, wiek, rasa, miasto, sterylizacja, odrobaczenie, szczepienia
                for field in soup.select('div.spField'):
                    try:
                        text = field.getText()
                        (k, v) = text.strip().split(':', 2)
                        k = k.lower().strip()
                        v = v.strip()
                        if k == 'płeć':
                            v = v.lower().strip()
                            if v == 'kot': record['sex'] = 1
                            else: record['sex'] = 2
                            new_animal.sex = record['sex']
                        elif k == 'wiek':
                            v = int(v.lower().strip().split(' ')[0])
                            record['age'] = v
                            new_animal.age = v
                            new_animal.age_years = v
                        elif k == 'rasa':
                            record['type'] = v
                            new_animal.breed = v
                        elif k == 'miasto':
                            record['city'] = v
                            new_animal.city = v
                            new_animal.shelter = "Schronisko w Milanówku"
                            new_animal.shelter_group = 1
                        elif k == 'sterylizacja':
                            record['ster'] = (v.lower() == 'tak')
                            new_animal.sterilization = record['ster']
                        elif k == 'szczepienie p. wściekliźnie':
                            record['vaccination'] = (v.lower() == 'tak')
                            new_animal.vaccination = record['vaccination']
                        elif k == 'odrobaczenie':
                            record['debug'] = (v.lower() == 'tak')
                            new_animal.deworming = record['debug']
                    except Exception as e:
                        pass

                #Zdjęcie
                try:
                    img_url = soup.select(
                        'div.spField > img.spFieldsData.field_photo1'
                    )[0]['src']
                    new_animal.image = img_url
                    img_data = requests.get(img_url).content
                    record['image'] = img_data
                    name_filename = record['name'].replace(" ", "_")
                    filename = "cat" + "_" + name_filename + "_milanowek"
                    new_animal.identifier = filename
                    #f = open('static/koty/'+filename,'wb')
                    #f.write(img_data)
                    #f.close()
                except Exception as e:
                    print('Błąd z pobraniem zdjecia!', e)
                data.append(record)
                new_animal.save()
            return data
        def get_data_from_link(self,links):
            data = []
            for link in links:
                new_animal = Animal()
                new_animal.type = 2
                new_animal.status = 1
                new_animal.city = "Warszawa"
                new_animal.shelter = "Schronisko Na Paluchu"
                new_animal.shelter_group = 1
                record = {
                    'name': None,
                    'sex': None,
                    'age': None,
                    'color': None,
                    'description': None,
                    'vaccination': None,
                    'debug': None,
                    'ster': None,
                    'city': None,
                    'address': None,
                    'type': None,
                    'name': None,
                    'shelter': None,
                    'shelter_group': None,
                    'image': None,
                    'phone': None,
                    'mail': None,
                    'virtual_adoption': None,
                    'status': None
                }
                url = self.BASE_URL + link
                resp = requests.get(url)
                soup = BeautifulSoup(resp.text,'html.parser')
                new_animal.link = url

                # Imię
                try:
                    record['name'] = soup.select('h5')[0].getText().strip()
                    name = record['name']
                    #name.encode('windows-1250').decode('utf-8')  <- NIE DZIAŁA
                    new_animal.name = name
                    print(name)
                except Exception as e:
                    print('Błąd z ustalaniem imienia!',e)

                # Opis
                try:
                    try: full_text = soup.select('div.main_content div.description')[0].getText().strip()
                    except Exception: full_text = ''
                    record['description'] = full_text
                    new_animal.description = full_text
                except Exception as e:
                    print('Błąd z ustalaniem opisu!' + record['name'] ,e)

                # Inne dane - płeć, wiek, rasa
                for field in soup.select('div.info span'):
                    try:
                        text = field.getText()
                        (k,v) = text.strip().split(':',2)
                        k = k.lower().strip()
                        v = v.strip()
                        if k == 'på‚eä‡':
                            v = v.lower().strip()
                            if v == 'samiec': record['sex'] = 1
                            else: record['sex'] = 2
                            new_animal.sex = record['sex']

                        elif k == 'wiek':
                            v = (v.lower().strip().split(' '))
                            if (v[1] == "rok") or (v[1] == "lat") or (v[1] == "lata"):
                                v = int(v[0])
                            else:
                                v = 1
                            record['age'] = v
                            new_animal.age = v
                            new_animal.age_years = v

                        elif k == 'rasa':
                            record['type'] = v
                            new_animal.breed = v
                    except Exception as e:
                        pass
                        
                #foto
                try:
                    imgs = []
                    try: imgs += [ node['src'] for node in soup.select('div#main_image_cont > a > img.main_img_one')][:1]
                    except Exception: pass

                    try: imgs += [ node['href'] for node in soup.select('div#main_image_cont > a')][:1]
                    except Exception: pass

                    try: imgs += [ node['href'] for node in soup.select('div.ani_images > div.ani_image_bottom > a')]
                    except Exception: pass

                    if len(imgs)>1: imgs = imgs[1:] # pierwsze to zwykle miniatura

                    imgs = [ i if i.startswith('http') else 'http://napaluchu.waw.pl/{}'.format(i) for i in imgs ]

                    img_url = imgs[0]
                    new_animal.image = img_url
                    img_data = requests.get(img_url).content
                    record['img'] = img_data
                    name_filename = record['name'].replace(" ", "_")
                    name_filename = name_filename.replace("/", "_")
                    filename = "cat" + "_" + name_filename + "_paluch"
                    new_animal.identifier = filename
                    #f = open('psy/'+filename,'wb')
                    #f.write(img_data)
                    #f.close()
                except Exception as e:
                    print('Błąd z pobraniem zdjecia!' + record['name'] ,e)
                data.append(record)
                new_animal.save()
            return data