def retrieve(mls, state="md"): url = None # Get the first page for the redirection to the list of matches try: url = REDFINRE.format(mls=mls) (a,b,c) = dl.retrieve_url(url) # c contains meta data, but not all of it del b except: raise NotFoundException("Error retrieving {0}".format(mls)) try: url = re.search('"URL":"([a-zA-Z0-9\/\\-_]+)",', c).group(1) url = REDFINURL + url (a,b,c) = dl.retrieve_url(url) del b return c except: raise RetrievalError("Failed!")
def retrieve(mls, state="md"): url = None # Get the first page for the redirection to the list of matches try: url = REMAXRE.format(mls=mls, state=state) (a, b, c) = dl.retrieve_url(url) b = bs.BeautifulSoup(c) scripts = b.findAll('script') for s in scripts: m = re.search(r"window.location = '(.*)'", str(s)) if m is not None: url = m.group(1) del b del c except: raise NotFoundException("Error retrieving {0}".format(mls)) if url is None: raise NotFoundException("Error retrieving {0}".format(mls)) # Get the second page for the list of matches, and grab the first match try: (a, b, c) = dl.retrieve_url(url) url = None b = bs.BeautifulSoup(c) links = b.findAll('a') for l in links: a = l.get('href') if a is not None: m = re.search(r"/property/(.*)", str(a)) if m is not None: #print("Matched on {url}".format(url=m.group(1))) url = "http://www.remax.com/property/{rest}".format( rest=m.group(1)) break del b del c except KeyboardInterrupt: raise except Exception, e: raise NotFoundException("Retrieval error: {0}".format(e))
def retrieve(mls, state="md"): url = None # Get the first page for the redirection to the list of matches try: url = REMAXRE.format(mls=mls, state=state) (a,b,c) = dl.retrieve_url(url) b = bs.BeautifulSoup(c) scripts = b.findAll('script') for s in scripts: m = re.search(r"window.location = '(.*)'", str(s)) if m is not None: url = m.group(1) del b del c except: raise NotFoundException("Error retrieving {0}".format(mls)) if url is None: raise NotFoundException("Error retrieving {0}".format(mls)) # Get the second page for the list of matches, and grab the first match try: (a,b,c) = dl.retrieve_url(url) url = None b = bs.BeautifulSoup(c) links = b.findAll('a') for l in links: a = l.get('href') if a is not None: m = re.search(r"/property/(.*)", str(a)) if m is not None: #print("Matched on {url}".format(url=m.group(1))) url = "http://www.remax.com/property/{rest}".format(rest=m.group(1)) break del b del c except KeyboardInterrupt: raise except Exception, e: raise NotFoundException("Retrieval error: {0}".format(e))
def calculate_distance(x,y): try: x1 = re.sub(r' ', r'\+', x) y1 = re.sub(r' ', r'\+', y) url = URLRE.format(x1, y1) (code, url, data) = dl.retrieve_url(url) x = xmlparse.parseString(data) distance = x.getElementsByTagName('distance')[0] value = remax.getnodetext(distance.getElementsByTagName('text')[0]) return str(value) except Exception, e: print("Error: {0}".format(e)) raise
def calculate_distance(x, y): try: x1 = re.sub(r" ", r"\+", x) y1 = re.sub(r" ", r"\+", y) url = URLRE.format(x1, y1) (code, url, data) = dl.retrieve_url(url) x = xmlparse.parseString(data) distance = x.getElementsByTagName("distance")[0] value = remax.getnodetext(distance.getElementsByTagName("text")[0]) return str(value) except Exception, e: print("Error: {0}".format(e)) raise
#print("Matched on {url}".format(url=m.group(1))) url = "http://www.remax.com/property/{rest}".format(rest=m.group(1)) break del b del c except KeyboardInterrupt: raise except Exception, e: raise NotFoundException("Retrieval error: {0}".format(e)) if url is None: raise NotFoundException("Error retrieving {0}: Could not determine url".format(mls)) try: # Get the house details page itself (a,b,c) = dl.retrieve_url(url) xml = '' m = re.search("g_strXML = '(.*)'", str(c)) if m is not None: xml = str(m.group(1)) # Works better (read: at all) if we format it utf-8 instead of utf-16 xml = re.sub("utf-16", "utf-8", xml) return xml except KeyboardInterrupt: raise except Exception, e: raise RetrievalError("Could not get details for {0}: {1}".format(mls, e)) def parse_house(xmlstr): x = xmlparse.parseString(xmlstr) try:
rest=m.group(1)) break del b del c except KeyboardInterrupt: raise except Exception, e: raise NotFoundException("Retrieval error: {0}".format(e)) if url is None: raise NotFoundException( "Error retrieving {0}: Could not determine url".format(mls)) try: # Get the house details page itself (a, b, c) = dl.retrieve_url(url) xml = '' m = re.search("g_strXML = '(.*)'", str(c)) if m is not None: xml = str(m.group(1)) # Works better (read: at all) if we format it utf-8 instead of utf-16 xml = re.sub("utf-16", "utf-8", xml) return xml except KeyboardInterrupt: raise except Exception, e: raise RetrievalError("Could not get details for {0}: {1}".format( mls, e)) def parse_house(xmlstr):