try: infos = soup.find_all("div", {"class": "col-lg-6"}) infos = infos[1] valor = infos.find("div", {"class": "titulo faixa"}) valor = valor.text.split('-') self.data["Tipo"] = valor[0].strip() self.data["Valor"] = valor[1].strip() local = infos.find("strong") local = local.previous.previous.previous.strip() local = local.split(' ') estado = local[-1].strip() self.data["Estado"] = estado for info in infos.find_all("strong"): self.data[info.text.strip()] = info.previous.strip() except: pass for key in self.data: print key.encode("utf-8") + ": " + self.data[key].encode("utf-8") db = ExtractorDB() for item in db.get_domain("teuimovel"): print item teuimovel = Teuimovel_crawler(item) teuimovel.crawl() print "\n\n"
try: preco = soup.find("span", {"class": "actual-price"}) self.data["Valor de Venda"] = preco.get_text() except: pass try: estado = self.start_url[7] + self.start_url[8] self.data["Estado:"] = estado.upper() address = soup.find_all("ul", {"class": "list square-gray"}) for info in address: ps = info.find_all("p") for p in ps: self.data[p(text=True)[1]] = p(text=True)[3].strip() except: pass try: self.title = self.data["Tipo:"] except: pass for key in self.data: print key.encode("utf-8") + " " + self.data[key].encode("utf-8") db = ExtractorDB() for item in db.get_domain("olx"): print item olx = Olx_crawler(item) olx.crawl() print "\n\n"
except: pass try: valor = soup.find("li", {"class": "boxValue"}) self.data[valor(text=True)[0]] = valor(text=True)[1].strip() except: pass try: infos = soup.find_all("li", {"class": "boxTextList"}) info = infos[1].find("p") info = info.text.split('-') self.data["Area Total"] = info[1].strip() for i in range(2, len(info)): val = info[i].split(':') self.data[val[0].strip()] = val[1].strip() except: pass for key in self.data: print key.encode("utf-8") + ": " + self.data[key].encode("utf-8") db = ExtractorDB() for item in db.get_domain("directimoveis"): print item directimoveis = Directimoveis_crawler(item) directimoveis.crawl() print "\n\n"
infos = soup.find_all("div", {"class": "importantes"}) if re.search(r"\d+", infos[0].text).group(): self.data[infos[0](text=True)[1].strip()] = infos[0](text=True)[2].strip() infos = infos[1].find_all("span") self.data["Area"] = infos[0].text.strip() for i in range(1, len(infos)): x = infos[i].text.split(' ') self.data[x[-1].strip()] = x[-2].strip() except: pass try: address = soup.find("div", {"id": "mapaCond"}) self.data["Endereco"] = address.text.strip() except: pass for key in self.data: print key.encode("utf-8") + ": " + self.data[key].encode("utf-8") # ri = Redeimoveispe_crawler('http://www.redeimoveispe.com.br/empreendimento-detalhes.aspx?id_empreendimento=7759469') # ri.crawl() db = ExtractorDB() for item in db.get_domain("redeimoveispe"): print item redeimoveispe = Redeimoveispe_crawler(item) redeimoveispe.crawl() print "\n\n"
except: pass try: infos = soup.find("div", {"id": "basicInfo"}) for b in infos.find_all("b"): campo = b.text.replace(':', '') valor = b.next_element.next_element.text if campo == "Cidade": valor = valor.split('-') self.data["Estado"] = valor[1].strip() valor = valor[0] self.data[campo] = valor except: pass for key in self.data: try: print(key, end='') print(": ", end='') print(self.data[key]) except: pass db = ExtractorDB() for item in db.get_domain("imovelavenda"): print(item) imoveisavenda = Imoveisavenda_crawler(item) imoveisavenda.crawl() print("\n\n")
def extract_data_mb(self, soup): prop_addr = soup.find_all("span", {"class": "info-imovel"}) prop = prop_addr[0].text self.title = self.title + " - " + prop print self.title valor = soup.find("span", {"class": "dados-ficha no-show"}) self.data["Valor de Venda"] = valor.text address = prop_addr[1].text address = address.split(',') self.data["Bairro"] = address[0] address = address[1].split('-') self.data["Cidade"] = address[0] self.data["Estado"] = address[1] ul = soup.find("ul", {"class": "unstyled container"}) h3 = ul.find_all("h3") for item in h3: val = "" for i in range(0, len(item(text=True)) -1 ): val = val + str(item(text=True)[i]) self.data[ item(text=True)[-1]] = val db = ExtractorDB() for item in db.get_domain("zapi"): print item zapi = Zapi_crawler(item) zapi.crawl() print "\n\n"
modalidade = valores[0].text.split(' ') self.data["Modalidade"] = modalidade[1].strip() for i in range(1, len(lis) - 1): span = lis[i].find_all("span") if len(span) == 1: span = span[0].text.split(' ') self.data[span[1].strip()] = span[0].strip() else: self.data[span[0].text.strip()] = span[1].text.strip() except: pass try: local = soup.find("a", {"href": "#map"}) local = local.text.split(',') self.data["Bairro"] = local[-2].strip() self.data["Cidade"] = local[-1].strip() except: pass for key in self.data: print key.encode("utf-8") + ": " + self.data[key].encode("utf-8") db = ExtractorDB() for item in db.get_domain("imovelweb"): print item imovelweb = Imovelweb_crawler(item) imovelweb.crawl() print "\n\n"
def extract_data2(self, soup): try: infos = soup.find("div", {"class": "pZ"}) for item in infos: self.data[item(text=True)[0].strip()] = item( text=True)[1].strip() infos = soup.find("div", {"class": "pZ pY"}) infos = infos.find_all("dl") for item in infos: self.data[item(text=True)[0].strip()] = item( text=True)[1].strip() except: pass try: address = soup.find("span", {"class": "touch-nav__address"}) self.data["ENDERECO"] = address.text.strip() except: pass for key in self.data: print key.encode("utf-8") + ": " + self.data[key].encode("utf-8") db = ExtractorDB() for item in db.get_domain("vivareal"): print item vivareal = Vivareal_crawler(item) vivareal.crawl() print "\n\n"
pass try: condominio = soup.find("div", {"id": "noxSubValCond"}) self.data["Condominio"] = condominio.text.strip() except: pass local = soup.find("div", {"class": "prentesaoTopDet"}) try: local = local.text.split('-') cidade = local[-1].split('/')[0].strip() estado = local[-1].split('/')[1].strip() self.data["Estado"] = estado self.data["Cidade"] = cidade bairro = local[0].split(',')[-1].strip() self.data["Bairro"] = bairro except: pass #sem endereco for key in self.data: print key.encode("utf-8") + ": " + self.data[key].encode("utf-8") db = ExtractorDB() for item in db.get_domain("expoimovel"): print item expo = Expoimovel_crawler(item) expo.crawl() print "\n\n"