Example #1
0
def main():
    if len(sys.argv) != 2:
        usageAndExit();
    fileName = sys.argv[1]
    fo = open(fileName, "rb")
    query = fo.readline().strip()
    queryArg = fo.readline().strip()
    queryUrl = fo.readline().strip()
    queryHtml = fo.read()
    fo.close()
    print "query: " + query
    print "arg: " + queryArg
    print "url:" + queryUrl

    if query == "411-Reverse-Phone":
        res = m411_by411.reversePhoneLookup(queryHtml)
        print res

    if query == "411-Business-Search":
        if -1 != queryUrl.find("switchboard.com"):
            res = m411_by411.parseSwitchboardBusiness(queryHtml)
            print res
        elif -1 != queryUrl.find("65.116.24.186"):
            res = m411_by411.businessSearchDex(queryHtml)
            print res
        else:
            print "Unknown 411-Business-Search source"
def retrieveBusinessSearchByUrl(urlIn):
    res = RETRIEVE_FAILED
    data = None
    # witch server?
    type = "?"
    url = ""
    if urlIn.startswith("yplist.php"):
        url = wy_com_address+"/%s" % urlIn
        type = "yp"
    elif urlIn.startswith("/servlet"):
        url = "http://www.dexonline.com%s" % urlIn
        type = "dex"
    elif urlIn.startswith("http://www.switchboard.com"):
        url = urlIn
        type = "switch"

    # retrieve
    htmlText = None
    if type == "yp":
        htmlText = getHttp(url, retryCount=3)
    elif type == "dex" or type == "switch":
        htmlText = getHttp(url, retryCount=3)

    # no?
    if htmlText is None:
        return (RETRIEVE_FAILED, None)

    # parse
    if type == "yp":
        res, data = m411.businessSearch(htmlText)
    elif type == "dex":
        res, data = m411_by411.businessSearchDex(htmlText)
    elif type == "switch":
        res, data = m411_by411.parseSwitchboardBusiness(htmlText)

    # ending
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search-By-Url", urlIn , htmlText, url)
    return res, data
def _retrieve_dex_business(name,cityOrZip,state,surrounding,categoryOrName):
    ## from www.dexonline.com
    ## no zip accepted:
    if cityOrZip.isdigit() and len(cityOrZip)==5:
        log(SEV_EXC, "_retrieve_dex_business doesn't support cityOrZip='%s'" % cityOrZip)
        return RETRIEVE_FAILED, None    
    url = ""
    sur = "false"
    if surrounding == "Yes":
        sur = "true"
    
    if categoryOrName == "Name":
        url = dexServerUrlBusinessSearch % (urllib.quote(cityOrZip),urllib.quote(state), sur, urllib.quote(name))
    elif categoryOrName == "Category":
        url = dexServerUrlBusinessSearchCategory % (sur, urllib.quote(name), urllib.quote(cityOrZip),urllib.quote(state))

    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411_by411.businessSearchDex(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url)
    return res, data