Python retrieve_url Beispiele, downloader.retrieve_url Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: redfin.py Projekt: leishen/MlsTools

def retrieve(mls, state="md"):
    url = None 
    # Get the first page for the redirection to the list of matches
    try:
        url = REDFINRE.format(mls=mls)
        (a,b,c) = dl.retrieve_url(url)
        # c contains meta data, but not all of it
        del b
    except:
        raise NotFoundException("Error retrieving {0}".format(mls))

    try:
        url = re.search('"URL":"([a-zA-Z0-9\/\\-_]+)",', c).group(1)
        url = REDFINURL + url
        (a,b,c) = dl.retrieve_url(url)
        del b
        return c
    except:
        raise RetrievalError("Failed!")

Beispiel #2

0

Datei anzeigen

def retrieve(mls, state="md"):
    url = None
    # Get the first page for the redirection to the list of matches
    try:
        url = REMAXRE.format(mls=mls, state=state)
        (a, b, c) = dl.retrieve_url(url)
        b = bs.BeautifulSoup(c)
        scripts = b.findAll('script')
        for s in scripts:
            m = re.search(r"window.location = '(.*)'", str(s))
            if m is not None:
                url = m.group(1)
        del b
        del c
    except:
        raise NotFoundException("Error retrieving {0}".format(mls))

    if url is None:
        raise NotFoundException("Error retrieving {0}".format(mls))

    # Get the second page for the list of matches, and grab the first match
    try:
        (a, b, c) = dl.retrieve_url(url)
        url = None
        b = bs.BeautifulSoup(c)
        links = b.findAll('a')
        for l in links:
            a = l.get('href')
            if a is not None:
                m = re.search(r"/property/(.*)", str(a))
                if m is not None:
                    #print("Matched on {url}".format(url=m.group(1)))
                    url = "http://www.remax.com/property/{rest}".format(
                        rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

Beispiel #3

0

Datei anzeigen

Datei: remax.py Projekt: leishen/MlsTools

def retrieve(mls, state="md"):
    url = None 
    # Get the first page for the redirection to the list of matches
    try:
        url = REMAXRE.format(mls=mls, state=state)
        (a,b,c) = dl.retrieve_url(url)
        b = bs.BeautifulSoup(c)
        scripts = b.findAll('script')
        for s in scripts:
            m = re.search(r"window.location = '(.*)'", str(s))
            if m is not None:
                url = m.group(1)
        del b
        del c
    except:
        raise NotFoundException("Error retrieving {0}".format(mls))

    if url is None:
        raise NotFoundException("Error retrieving {0}".format(mls))

    # Get the second page for the list of matches, and grab the first match
    try:
        (a,b,c) = dl.retrieve_url(url)
        url = None
        b = bs.BeautifulSoup(c)
        links = b.findAll('a')
        for l in links:
            a = l.get('href')
            if a is not None:
                m = re.search(r"/property/(.*)", str(a))
                if m is not None:
                    #print("Matched on {url}".format(url=m.group(1)))
                    url = "http://www.remax.com/property/{rest}".format(rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

Beispiel #4

0

Datei anzeigen

def calculate_distance(x,y):
    try:
        x1 = re.sub(r' ', r'\+', x) 
        y1 = re.sub(r' ', r'\+', y)
        url = URLRE.format(x1, y1)
        (code, url, data) = dl.retrieve_url(url)
        x = xmlparse.parseString(data)
        distance = x.getElementsByTagName('distance')[0]
        value = remax.getnodetext(distance.getElementsByTagName('text')[0])
        return str(value)
    except Exception, e:
        print("Error: {0}".format(e))
        raise

Beispiel #5

0

Datei anzeigen

Datei: distance.py Projekt: leishen/MlsTools

def calculate_distance(x, y):
    try:
        x1 = re.sub(r" ", r"\+", x)
        y1 = re.sub(r" ", r"\+", y)
        url = URLRE.format(x1, y1)
        (code, url, data) = dl.retrieve_url(url)
        x = xmlparse.parseString(data)
        distance = x.getElementsByTagName("distance")[0]
        value = remax.getnodetext(distance.getElementsByTagName("text")[0])
        return str(value)
    except Exception, e:
        print("Error: {0}".format(e))
        raise

Beispiel #6

0

Datei anzeigen

Datei: remax.py Projekt: leishen/MlsTools

                    #print("Matched on {url}".format(url=m.group(1)))
                    url = "http://www.remax.com/property/{rest}".format(rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

    if url is None:
        raise NotFoundException("Error retrieving {0}: Could not determine url".format(mls))

    try:
        # Get the house details page itself 
        (a,b,c) = dl.retrieve_url(url)
        xml = ''
        m = re.search("g_strXML = '(.*)'", str(c))
        if m is not None:
            xml = str(m.group(1))
            # Works better (read: at all) if we format it utf-8 instead of utf-16
            xml = re.sub("utf-16", "utf-8", xml)
        return xml
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise RetrievalError("Could not get details for {0}: {1}".format(mls, e))

def parse_house(xmlstr):
    x = xmlparse.parseString(xmlstr)
    try:

Beispiel #7

0

Datei anzeigen

                        rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

    if url is None:
        raise NotFoundException(
            "Error retrieving {0}: Could not determine url".format(mls))

    try:
        # Get the house details page itself
        (a, b, c) = dl.retrieve_url(url)
        xml = ''
        m = re.search("g_strXML = '(.*)'", str(c))
        if m is not None:
            xml = str(m.group(1))
            # Works better (read: at all) if we format it utf-8 instead of utf-16
            xml = re.sub("utf-16", "utf-8", xml)
        return xml
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise RetrievalError("Could not get details for {0}: {1}".format(
            mls, e))


def parse_house(xmlstr):