Python retrieve_url 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: downloader

메소드/함수: retrieve_url

hotexamples.com에서의 예제들: 7

Python retrieve_url - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 downloader.retrieve_url에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: redfin.py 프로젝트: leishen/MlsTools

def retrieve(mls, state="md"):
    url = None 
    # Get the first page for the redirection to the list of matches
    try:
        url = REDFINRE.format(mls=mls)
        (a,b,c) = dl.retrieve_url(url)
        # c contains meta data, but not all of it
        del b
    except:
        raise NotFoundException("Error retrieving {0}".format(mls))

    try:
        url = re.search('"URL":"([a-zA-Z0-9\/\\-_]+)",', c).group(1)
        url = REDFINURL + url
        (a,b,c) = dl.retrieve_url(url)
        del b
        return c
    except:
        raise RetrievalError("Failed!")

예제 #2

파일 보기

def retrieve(mls, state="md"):
    url = None
    # Get the first page for the redirection to the list of matches
    try:
        url = REMAXRE.format(mls=mls, state=state)
        (a, b, c) = dl.retrieve_url(url)
        b = bs.BeautifulSoup(c)
        scripts = b.findAll('script')
        for s in scripts:
            m = re.search(r"window.location = '(.*)'", str(s))
            if m is not None:
                url = m.group(1)
        del b
        del c
    except:
        raise NotFoundException("Error retrieving {0}".format(mls))

    if url is None:
        raise NotFoundException("Error retrieving {0}".format(mls))

    # Get the second page for the list of matches, and grab the first match
    try:
        (a, b, c) = dl.retrieve_url(url)
        url = None
        b = bs.BeautifulSoup(c)
        links = b.findAll('a')
        for l in links:
            a = l.get('href')
            if a is not None:
                m = re.search(r"/property/(.*)", str(a))
                if m is not None:
                    #print("Matched on {url}".format(url=m.group(1)))
                    url = "http://www.remax.com/property/{rest}".format(
                        rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

예제 #3

파일 보기

파일: remax.py 프로젝트: leishen/MlsTools

def retrieve(mls, state="md"):
    url = None 
    # Get the first page for the redirection to the list of matches
    try:
        url = REMAXRE.format(mls=mls, state=state)
        (a,b,c) = dl.retrieve_url(url)
        b = bs.BeautifulSoup(c)
        scripts = b.findAll('script')
        for s in scripts:
            m = re.search(r"window.location = '(.*)'", str(s))
            if m is not None:
                url = m.group(1)
        del b
        del c
    except:
        raise NotFoundException("Error retrieving {0}".format(mls))

    if url is None:
        raise NotFoundException("Error retrieving {0}".format(mls))

    # Get the second page for the list of matches, and grab the first match
    try:
        (a,b,c) = dl.retrieve_url(url)
        url = None
        b = bs.BeautifulSoup(c)
        links = b.findAll('a')
        for l in links:
            a = l.get('href')
            if a is not None:
                m = re.search(r"/property/(.*)", str(a))
                if m is not None:
                    #print("Matched on {url}".format(url=m.group(1)))
                    url = "http://www.remax.com/property/{rest}".format(rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

예제 #4

파일 보기

def calculate_distance(x,y):
    try:
        x1 = re.sub(r' ', r'\+', x) 
        y1 = re.sub(r' ', r'\+', y)
        url = URLRE.format(x1, y1)
        (code, url, data) = dl.retrieve_url(url)
        x = xmlparse.parseString(data)
        distance = x.getElementsByTagName('distance')[0]
        value = remax.getnodetext(distance.getElementsByTagName('text')[0])
        return str(value)
    except Exception, e:
        print("Error: {0}".format(e))
        raise

예제 #5

파일 보기

파일: distance.py 프로젝트: leishen/MlsTools

def calculate_distance(x, y):
    try:
        x1 = re.sub(r" ", r"\+", x)
        y1 = re.sub(r" ", r"\+", y)
        url = URLRE.format(x1, y1)
        (code, url, data) = dl.retrieve_url(url)
        x = xmlparse.parseString(data)
        distance = x.getElementsByTagName("distance")[0]
        value = remax.getnodetext(distance.getElementsByTagName("text")[0])
        return str(value)
    except Exception, e:
        print("Error: {0}".format(e))
        raise

예제 #6

파일 보기

파일: remax.py 프로젝트: leishen/MlsTools

                    #print("Matched on {url}".format(url=m.group(1)))
                    url = "http://www.remax.com/property/{rest}".format(rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

    if url is None:
        raise NotFoundException("Error retrieving {0}: Could not determine url".format(mls))

    try:
        # Get the house details page itself 
        (a,b,c) = dl.retrieve_url(url)
        xml = ''
        m = re.search("g_strXML = '(.*)'", str(c))
        if m is not None:
            xml = str(m.group(1))
            # Works better (read: at all) if we format it utf-8 instead of utf-16
            xml = re.sub("utf-16", "utf-8", xml)
        return xml
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise RetrievalError("Could not get details for {0}: {1}".format(mls, e))

def parse_house(xmlstr):
    x = xmlparse.parseString(xmlstr)
    try:

예제 #7

파일 보기

                        rest=m.group(1))
                    break
        del b
        del c
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise NotFoundException("Retrieval error: {0}".format(e))

    if url is None:
        raise NotFoundException(
            "Error retrieving {0}: Could not determine url".format(mls))

    try:
        # Get the house details page itself
        (a, b, c) = dl.retrieve_url(url)
        xml = ''
        m = re.search("g_strXML = '(.*)'", str(c))
        if m is not None:
            xml = str(m.group(1))
            # Works better (read: at all) if we format it utf-8 instead of utf-16
            xml = re.sub("utf-16", "utf-8", xml)
        return xml
    except KeyboardInterrupt:
        raise
    except Exception, e:
        raise RetrievalError("Could not get details for {0}: {1}".format(
            mls, e))


def parse_house(xmlstr):