Ejemplo n.º 1
0
def _retrieve_yp_business(name,cityOrZip,state,surrounding,categoryOrName):
    url = ""
    name = name.replace(" ","+")
    if categoryOrName == "Name":
        if surrounding == "Yes":
            url = ypServerUrlBusinessSearchYPsa % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
        else:
            url = ypServerUrlBusinessSearch % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
    if categoryOrName == "Category":
        if surrounding == "Yes":
            url = ypServerUrlBusinessSearchCategoryYPsa % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
        else:
            url = ypServerUrlBusinessSearchCategory % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))

    htmlText = getHttp(url, retryCount=3)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411.businessSearch(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url)
    return res, data    
Ejemplo n.º 2
0
def retrieveBusinessSearchByUrl(urlIn):
    res = RETRIEVE_FAILED
    data = None
    # witch server?
    type = "?"
    url = ""
    if urlIn.startswith("yplist.php"):
        url = wy_com_address+"/%s" % urlIn
        type = "yp"
    elif urlIn.startswith("/servlet"):
        url = "http://www.dexonline.com%s" % urlIn
        type = "dex"
    elif urlIn.startswith("http://www.switchboard.com"):
        url = urlIn
        type = "switch"

    # retrieve
    htmlText = None
    if type == "yp":
        htmlText = getHttp(url, retryCount=3)
    elif type == "dex" or type == "switch":
        htmlText = getHttp(url, retryCount=3)

    # no?
    if htmlText is None:
        return (RETRIEVE_FAILED, None)

    # parse
    if type == "yp":
        res, data = m411.businessSearch(htmlText)
    elif type == "dex":
        res, data = m411_by411.businessSearchDex(htmlText)
    elif type == "switch":
        res, data = m411_by411.parseSwitchboardBusiness(htmlText)

    # ending
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search-By-Url", urlIn , htmlText, url)
    return res, data