Example #1
0
def retrieveFullSearch(flyNo, airlines, airportFrom, airportTo, dateUp, timeUp):
    global g_airlinesTable
    query = [flyNo, airlines, airportFrom, airportTo, dateUp, timeUp]
    # date in format: m/d/year
    # time in hh:mm format
    month, day, year = dateUp.split("/")
    hour, minutes = timeUp.split(":")
    apm = "am"
    hour = int(hour)
    if hour >= 12:
        apm = "pm"
        if hour > 12:
            hour -= 12
    elif hour == 0:
        hour = 12
    timeUp = "%d:%s%s" % (hour, minutes.zfill(2), apm)
    if airlines == "select airlines":
        airlines = ""

    airportFrom = airportFrom.strip().replace(" ", "+")
    airportTo   = airportTo.strip().replace(" ", "+")
    airlines = g_airlinesTable[airlines]
    
    url = "http://dps1.travelocity.com/dparflifo.ctl?CMonth=%s&CDayOfMonth=%s&CYear=%s&LANG=EN&last_pgd_page=dparrqst.pgd&dep_arp_name=%s&arr_arp_name=%s&dep_dt_mn_1=%s&dep_dt_dy_1=%s&dep_tm_1=%s&aln_name=%s&flt_num=%s&CDayOfMonth=%s&x=66&y=10" % (month, day, year, airportFrom, airportTo, monthFromNumber(month), day, urllib.quote(timeUp), airlines, flyNo, day)

    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;"+url, query)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("flights", (flyNo+";"+airlines+";"+airportFrom+";"+airportTo+";"+dateUp+";"+timeUp), htmlTxt, url)
    return resultType, resultBody
def _parse_currency(htmlText, function, url):
    try:
        res = function(htmlText)
        return res
    except:
        logParsingFailure("Get-Currency-Conversion", "", htmlText, url)
        raise
Example #3
0
def _retrieveRandomJoke():
    global g_randomJokesParseFailed
    # for some reason some jokes are empty...
    counter = 6
    res, body = UNKNOWN_FORMAT, None
    while 0 < counter and res != JOKE_DATA:
        randomJokeNumber = random.randint(1, 11073)
        url = "http://jokes.comedycentral.com/index_joke.asp?initRand=true&sql=12&rand_id=%s" % str(
            randomJokeNumber)
        htmlTxt = getHttp(url)
        if None == htmlTxt:
            return MODULE_DOWN, None
        res, body = _parseRandomJoke(htmlTxt)
        counter -= 1
        if 0 == counter and res != JOKE_DATA:
            udf = [[
                "No joke",
                "Please try again. It may be module problem. If you seen this screen more than 3 times, please wait some time (one day) before you press 'Random joke' button."
            ]]
            res = universalDataFormatReplaceEntities(udf)

    if UNKNOWN_FORMAT == res:
        if g_randomJokesParseFailed < 5:
            logParsingFailure("Get-Joke", "random", htmlTxt, url)
            g_randomJokesParseFailed += 1
    return res, body
Example #4
0
def _parse_currency(htmlText, function, url):
    try:
        res = function(htmlText)
        return res
    except:
        logParsingFailure("Get-Currency-Conversion", "", htmlText, url)
        raise
Example #5
0
def _retrieveRandomJoke():
    global g_randomJokesParseFailed
    # for some reason some jokes are empty...
    counter = 6
    res, body = UNKNOWN_FORMAT, None
    while 0 < counter and res != JOKE_DATA:
        randomJokeNumber = random.randint(1, 11073)
        url = "http://jokes.comedycentral.com/index_joke.asp?initRand=true&sql=12&rand_id=%s" % str(randomJokeNumber)
        htmlTxt = getHttp(url)
        if None == htmlTxt:
            return MODULE_DOWN, None
        res, body = _parseRandomJoke(htmlTxt)
        counter -= 1
        if 0 == counter and res != JOKE_DATA:
            udf = [
                [
                    "No joke",
                    "Please try again. It may be module problem. If you seen this screen more than 3 times, please wait some time (one day) before you press 'Random joke' button.",
                ]
            ]
            res = universalDataFormatReplaceEntities(udf)

    if UNKNOWN_FORMAT == res:
        if g_randomJokesParseFailed < 5:
            logParsingFailure("Get-Joke", "random", htmlTxt, url)
            g_randomJokesParseFailed += 1
    return res, body
Example #6
0
def retrieveSwitchboardBusiness(name,cityOrZip,state,surrounding,categoryOrName):
    url = ""
    zip = False
    if cityOrZip.isdigit() and len(cityOrZip) == 5:
        zip = True
    if categoryOrName == "Name":
        if zip:
            url = switchboardServerUrlBusinessSearchZip % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
        else:
            url = switchboardServerUrlBusinessSearch % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
    if categoryOrName == "Category":
        if zip:
            url = switchboardServerUrlBusinessSearchCategoryZip % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
        else:
            url = switchboardServerUrlBusinessSearchCategory % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))

    # using cached for testing
    #htmlText = getHttpCached(url)
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = parseSwitchboardBusiness(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url)
    return res, data
Example #7
0
def retrieveRandom(modulesInfo):
    # get data
    url = "http://www.quotationspage.com/random.php3"
    # all?
    postData = {
        "number": "4",
        "collection[]": "mgm",
        "collection[]": "motivate",
        "collection[]": "classic",
        "collection[]": "coles",
        "collection[]": "lindsly",
        "collection[]": "poorc",
        "collection[]": "altq",
        "collection[]": "20thcent",
        "collection[]": "bywomen",
        "collection[]": "devils",
        "collection[]": "contrib"
    }
    htmlTxt = getHttp(url, postData=postData)
    if None == htmlTxt:
        return RETRIEVE_FAILED, None
    # parse
    resultType, resultBody = parseRandomQuotes(htmlTxt, modulesInfo)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("quotes", "random", htmlTxt, url)
    return resultType, resultBody
Example #8
0
def retrieveRandom(modulesInfo):
    # get data
    url = "http://www.quotationspage.com/random.php3"
    # all?
    postData = {
        "number":"4",
        "collection[]":"mgm",
        "collection[]":"motivate",
        "collection[]":"classic",
        "collection[]":"coles",
        "collection[]":"lindsly",
        "collection[]":"poorc",
        "collection[]":"altq",
        "collection[]":"20thcent",
        "collection[]":"bywomen",
        "collection[]":"devils",
        "collection[]":"contrib"
        }
    htmlTxt = getHttp(url, postData=postData)
    if None == htmlTxt:
        return RETRIEVE_FAILED, None
    # parse
    resultType, resultBody = parseRandomQuotes(htmlTxt, modulesInfo)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("quotes", "random", htmlTxt, url)
    return resultType, resultBody
Example #9
0
def retrieveListsOfBestsSearch(fieldValue, modulesInfo):
    parts = fieldValue.split(";")
    if 3 != len(parts):
        return INVALID_REQUEST, None
    mediaTable = {"Everything": "0", "Books": "1", "Movies": "2", "Music": "3"}
    titleTable = {"Both": "0", "Title": "1", "Creator": "2"}
    try:
        media, title, keywords = mediaTable[parts[1]], titleTable[
            parts[2]], parts[0]
    except:
        return INVALID_REQUEST, None
    postData = {
        "full_qry": keywords,
        "media": media,
        "which": title,
        "srch": "Search",
        "": "Clear"
    }
    url = "http://listsofbests.com/?"
    htmlTxt = getHttp(url, postData=postData)
    # parse it
    if None == htmlTxt:
        return MODULE_DOWN, None
    resultType, resultBody = parseListsOfBestsSearch(htmlTxt, keywords,
                                                     parts[1], modulesInfo)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("listsofbestssearch", fieldValue, url, htmlTxt)
    return resultType, resultBody
Example #10
0
def retrieveUrlToc(url):
    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;"+url, None)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("flights", "toc;"+url, htmlTxt, url)
    return resultType, resultBody
Example #11
0
def getJoke(fieldValue):
    url = fieldValue.replace("/results/detail.asp", "http://jokes.comedycentral.com/index_joke.asp")
    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    res, body = _parseRandomJoke(htmlTxt)
    if UNKNOWN_FORMAT == res:
        logParsingFailure("Get-Joke", fieldValue, htmlTxt, url)
    return res, body
Example #12
0
def retrieveUrlToc(url):
    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;" + url,
                                              None)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("flights", "toc;" + url, htmlTxt, url)
    return resultType, resultBody
Example #13
0
def retrieveDaily(modulesInfo):
    global g_quotesDataCached, g_quotesDataWhenCached, g_quotesCacheExpiration
    resultType, resultBody = RETRIEVE_FAILED, None
    # get data
    url1 = "http://www.quotationspage.com/data/qotd.rss"
    url2 = "http://www.quotationspage.com/data/mqotd.rss"
    htmlTxt1, htmlTxt2 = None, None

    if None != g_quotesDataCached:
        if not quotesCacheExpired():
            htmlTxt1, htmlTxt2 = g_quotesDataCached
            print "from cache"
    else:
        ### TODO: remove this!
        ##        try:
        ##            fo = open("c:\\tmp\\t1.txt","rt")
        ##            htmlTxt1 = fo.read()
        ##            fo.close()
        ##            fo = open("c:\\tmp\\t2.txt","rt")
        ##            htmlTxt2 = fo.read()
        ##            fo.close()
        ##        except:
        htmlTxt1 = getHttp(url1)
        htmlTxt2 = getHttp(url2)


##            fo = open("c:\\tmp\\t1.txt","wt")
##            fo.write(htmlTxt1)
##            fo.close()
##            fo = open("c:\\tmp\\t2.txt","wt")
##            fo.write(htmlTxt2)
##            fo.close()

    if None == htmlTxt1 and None == htmlTxt2:
        return RETRIEVE_FAILED, None
    if None == htmlTxt1 or None == htmlTxt2:
        # one is down - print sth or what?
        print "One is down"

    resultType, resultBody = parseDailyQuotes(htmlTxt1, htmlTxt2, modulesInfo)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("quotes", "daily", htmlTxt1 + "\n----\n" + htmlTxt2,
                          url1)
    if QUOTES_DATA == resultType and None != htmlTxt1 and None != htmlTxt2:
        # cache it (but only if both pages are good
        g_quotesDataWhenCached = time.time()
        g_quotesDataCached = (htmlTxt1, htmlTxt2)
    else:
        # no data? so maybe from cache? (older)
        if None != g_quotesDataCached:
            resultType, resultBody = parseDailyQuotes(g_quotesDataCached[0],
                                                      g_quotesDataCached[1],
                                                      modulesInfo)
            logParsingFailure("quotes", "daily", None,
                              "Using old cache - retrieve failed")
    return resultType, resultBody
Example #14
0
def _retrieve_dreammoods(keyword):
    url = "http://dreammoods.com/cgibin/searchcsv.pl?search=%s&method=exact&header=symbol"
    url = url % urllib.quote(keyword)
    htmlText = retrieveHttpResponseWithRedirectionHandleException(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = dreams.parseDream(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("Get-Dream", keyword, htmlText, url)
    return res, data    
Example #15
0
def _retrieve_411_international(code):
    url = "http://www.411.com/search/Find_Intl_Code?country_id=%s"
    url = url % code
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411_by411.internationalCodeSearch(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-International-Code", code, htmlText, url)
    return res, data    
Example #16
0
def _retrieve_411_reverseZipCode(code):
    url = "http://www.411.com/search/Reverse_Zip?zip=%s"
    url = url % code
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411_by411.reverseZIPCodeLookup(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Reverse-Zip", code, htmlText, url)
    return res, data    
Example #17
0
def _retrieve_yp_reversePhone(xxx,yyy,zzzz):
    url = wy_com_address+"/wp-p-results.php?npa=%s&np3=%s&np4=%s&client=1482&ver=1.2&type=p&phone=%s%s"
    url = url % (xxx,yyy,zzzz,yyy,zzzz)
    htmlText = getHttp(url, retryCount=3)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411.reversePhoneLookup(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Reverse-Phone", xxx+"-"+yyy+"-"+zzzz, htmlText, url)
    return res, data    
Example #18
0
def _retrieve_411_areaCodeByCity(city, state):
    url = "http://www.411.com/search/Find_Areacode?city=%s&state_id=%s"
    url = url % (urllib.quote(city), urllib.quote(state))
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411_by411.areaCodeByCity(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Area-Code-By-City", city+","+state, htmlText, url)
    return res, data    
Example #19
0
def _retrieve_whitepages_reversePhone(xxx,yyy,zzzz):
    url = "http://yp.whitepages.com/1048/search/Reverse_Phone?phone=%s%s%s"
    url = url % (xxx,yyy,zzzz)
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411.reversePhoneLookupWhitepages(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Reverse-Phone", xxx+"-"+yyy+"-"+zzzz, htmlText, url)
    return res, data    
Example #20
0
def getJoke(fieldValue):
    url = fieldValue.replace("/results/detail.asp",
                             "http://jokes.comedycentral.com/index_joke.asp")
    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    res, body = _parseRandomJoke(htmlTxt)
    if UNKNOWN_FORMAT == res:
        logParsingFailure("Get-Joke", fieldValue, htmlTxt, url)
    return res, body
Example #21
0
def _retrieve_411_person(firstName,lastName,cityOrZip,state):
    url = "http://www.411.com/search/Find_Person?firstname_begins_with=1&firstname=%s&name_begins_with=1&name=%s&city_zip=%s&state_id=%s"
    url = url % (urllib.quote(firstName),urllib.quote(lastName),urllib.quote(cityOrZip),urllib.quote(state))
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411_by411.personSearch(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Person-Search", firstName+";"+lastName+";"+cityOrZip+";"+state, htmlText, url)
    return res, data
Example #22
0
def _retrieve_411_reverseAreaCode(code):
    url = "http://www.411.com/log_feature/sort/search/Reverse_Areacode?npa=%s&sort=alpha"
    url = url % code
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411_by411.reverseAreaCodeLookup(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Reverse-Area-Code", code, htmlText, url)
    return res, data    
Example #23
0
def _retrieve_yp_person(firstName,lastName,cityOrZip,state):
    url = wy_com_address+"/white-pages-results.php?f=%s&firstname_begins_with=1&l=%s&name_begins_with=1&c=%s&s=%s&client=&ver=1.4&type=r"
    url = url % (urllib.quote(firstName),urllib.quote(lastName),urllib.quote(cityOrZip),urllib.quote(state))
    htmlText = getHttp(url, retryCount=3)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411.personSearch(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Person-Search", firstName+";"+lastName+";"+cityOrZip+";"+state, htmlText, url)
    return res, data    
Example #24
0
def _retrieve_yp_zipCodeByCity(city, state):
    url = "http://yp.whitepages.com/search/Find_Zip?city_zip=%s&state_id=%s"
    url = url % (urllib.quote(city), urllib.quote(state))
    htmlText = getHttp(url, retryCount=3)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411.ZIPCodeByCity(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Zip-By-City", city+","+state, htmlText, url)
    return res, data    
Example #25
0
def _retrieve_wordiq(keyword):
    url = "http://www.wordiq.com/dream/%s"
    url = url % urllib.quote(keyword)
    htmlText = retrieveHttpResponseWithRedirectionHandleException(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = dreams.parseDream2(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("Get-Dream", keyword, htmlText, url)
    return res, data    
Example #26
0
def retrieveListsOfBestsItem(fieldValue, modulesInfo):
    (id, category) = fieldValue.split(";")
    category = _symbolToCategory(category)
    url = "http://listsofbests.com/details.cgi?id=%s" % id
    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    resultType, resultBody = parseListsOfBests(htmlTxt, category, modulesInfo)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("listsofbestsitem", fieldValue, url, htmlTxt)
    return resultType, resultBody
Example #27
0
def retrieve411ReversePhone(xxx, yyy, zzzz):
    url = "http://www.411.com/search/Reverse_Phone?phone=%s-%s-%s" % (xxx,yyy,zzzz)
    # using cached for testing
    #htmlText = getHttpCached(url)
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = parse411ReversePhoneLookup(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Reverse-Phone", xxx + "-" + yyy + "-" + zzzz, htmlText, url)
    return res, data    
Example #28
0
def retrieve411ReversePhone(xxx, yyy, zzzz):
    url = "http://www.411.com/search/Reverse_Phone?phone=%s-%s-%s" % (xxx, yyy, zzzz)
    # using cached for testing
    # htmlText = getHttpCached(url)
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = parse411ReversePhoneLookup(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Reverse-Phone", xxx + "-" + yyy + "-" + zzzz, htmlText, url)
    return res, data
Example #29
0
def retrieveYpReverseAreaCode(zipCode):
    url = "http://yp.whitepages.com/log_feature/sort/search/Reverse_Areacode?npa=%s&sort=alpha" % zipCode
    print "retrieveYpReverseAreaCode"
    #use cached for testing
    #htmlText = getHttpCached(url, retryCount=3)
    htmlText = getHttp(url, retryCount=3)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = parseYpReverseAreaCode(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Reverse-Area-Code", zipCode, htmlText, url)
    return res, data    
Example #30
0
def _zap2it_retrieve_providers(jar, zipCode):
    response, opener = _zap2it_get_providers_stage(jar, zipCode)
    providers = None
    try:
        htmlText = response.read()
        try:
            providers = _zap2it_parse_providers(htmlText)
        except Exception, ex:
            logParsingFailure(Fields.getTvListingsProviders, zipCode, 'zap2it_providers', htmlText)
            return None
    finally:
        response.close()
    return providers
Example #31
0
def retrieveDaily(modulesInfo):
    global g_quotesDataCached, g_quotesDataWhenCached, g_quotesCacheExpiration
    resultType, resultBody = RETRIEVE_FAILED, None
    # get data
    url1 = "http://www.quotationspage.com/data/qotd.rss"
    url2 = "http://www.quotationspage.com/data/mqotd.rss"
    htmlTxt1, htmlTxt2 = None, None
    
    if None != g_quotesDataCached:
        if not quotesCacheExpired():
            htmlTxt1, htmlTxt2 = g_quotesDataCached
            print "from cache"
    else:
        ### TODO: remove this!
##        try:
##            fo = open("c:\\tmp\\t1.txt","rt")
##            htmlTxt1 = fo.read()
##            fo.close()
##            fo = open("c:\\tmp\\t2.txt","rt")
##            htmlTxt2 = fo.read()
##            fo.close()
##        except:
        htmlTxt1 = getHttp(url1)
        htmlTxt2 = getHttp(url2)
##            fo = open("c:\\tmp\\t1.txt","wt")
##            fo.write(htmlTxt1)
##            fo.close()
##            fo = open("c:\\tmp\\t2.txt","wt")
##            fo.write(htmlTxt2)
##            fo.close()

    if None == htmlTxt1 and None == htmlTxt2:
        return RETRIEVE_FAILED, None
    if None == htmlTxt1 or None == htmlTxt2:
        # one is down - print sth or what?
        print "One is down"

    resultType, resultBody = parseDailyQuotes(htmlTxt1, htmlTxt2, modulesInfo)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("quotes", "daily", htmlTxt1+"\n----\n"+htmlTxt2, url1)
    if QUOTES_DATA == resultType and None != htmlTxt1 and None != htmlTxt2:
        # cache it (but only if both pages are good
        g_quotesDataWhenCached = time.time()
        g_quotesDataCached = (htmlTxt1, htmlTxt2)
    else:
        # no data? so maybe from cache? (older)
        if None != g_quotesDataCached:
            resultType, resultBody = parseDailyQuotes(g_quotesDataCached[0], g_quotesDataCached[1], modulesInfo)
            logParsingFailure("quotes", "daily", None, "Using old cache - retrieve failed")
    return resultType, resultBody
Example #32
0
def retrieveSwitchboardBusiness(name, cityOrZip, state, surrounding, categoryOrName):
    url = ""
    zip = False
    if cityOrZip.isdigit() and len(cityOrZip) == 5:
        zip = True
    if categoryOrName == "Name":
        if zip:
            url = switchboardServerUrlBusinessSearchZip % (
                urllib.quote(name),
                urllib.quote(cityOrZip),
                urllib.quote(state),
            )
        else:
            url = switchboardServerUrlBusinessSearch % (
                urllib.quote(name),
                urllib.quote(cityOrZip),
                urllib.quote(state),
            )
    if categoryOrName == "Category":
        if zip:
            url = switchboardServerUrlBusinessSearchCategoryZip % (
                urllib.quote(name),
                urllib.quote(cityOrZip),
                urllib.quote(state),
            )
        else:
            url = switchboardServerUrlBusinessSearchCategory % (
                urllib.quote(name),
                urllib.quote(cityOrZip),
                urllib.quote(state),
            )

    # using cached for testing
    # htmlText = getHttpCached(url)
    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = parseSwitchboardBusiness(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure(
            "411-Business-Search",
            name + "," + cityOrZip + "," + state + "," + surrounding + "," + categoryOrName,
            htmlText,
            url,
        )
    return res, data
Example #33
0
def _retrieve_yp_business(name,cityOrZip,state,surrounding,categoryOrName):
    url = ""
    name = name.replace(" ","+")
    if categoryOrName == "Name":
        if surrounding == "Yes":
            url = ypServerUrlBusinessSearchYPsa % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
        else:
            url = ypServerUrlBusinessSearch % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
    if categoryOrName == "Category":
        if surrounding == "Yes":
            url = ypServerUrlBusinessSearchCategoryYPsa % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))
        else:
            url = ypServerUrlBusinessSearchCategory % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state))

    htmlText = getHttp(url, retryCount=3)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411.businessSearch(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url)
    return res, data    
Example #34
0
def retrieveBusinessSearchByUrl(urlIn):
    res = RETRIEVE_FAILED
    data = None
    # witch server?
    type = "?"
    url = ""
    if urlIn.startswith("yplist.php"):
        url = wy_com_address+"/%s" % urlIn
        type = "yp"
    elif urlIn.startswith("/servlet"):
        url = "http://www.dexonline.com%s" % urlIn
        type = "dex"
    elif urlIn.startswith("http://www.switchboard.com"):
        url = urlIn
        type = "switch"

    # retrieve
    htmlText = None
    if type == "yp":
        htmlText = getHttp(url, retryCount=3)
    elif type == "dex" or type == "switch":
        htmlText = getHttp(url, retryCount=3)

    # no?
    if htmlText is None:
        return (RETRIEVE_FAILED, None)

    # parse
    if type == "yp":
        res, data = m411.businessSearch(htmlText)
    elif type == "dex":
        res, data = m411_by411.businessSearchDex(htmlText)
    elif type == "switch":
        res, data = m411_by411.parseSwitchboardBusiness(htmlText)

    # ending
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search-By-Url", urlIn , htmlText, url)
    return res, data
Example #35
0
def retrieveFullSearch(flyNo, airlines, airportFrom, airportTo, dateUp,
                       timeUp):
    global g_airlinesTable
    query = [flyNo, airlines, airportFrom, airportTo, dateUp, timeUp]
    # date in format: m/d/year
    # time in hh:mm format
    month, day, year = dateUp.split("/")
    hour, minutes = timeUp.split(":")
    apm = "am"
    hour = int(hour)
    if hour >= 12:
        apm = "pm"
        if hour > 12:
            hour -= 12
    elif hour == 0:
        hour = 12
    timeUp = "%d:%s%s" % (hour, minutes.zfill(2), apm)
    if airlines == "select airlines":
        airlines = ""

    airportFrom = airportFrom.strip().replace(" ", "+")
    airportTo = airportTo.strip().replace(" ", "+")
    airlines = g_airlinesTable[airlines]

    url = "http://dps1.travelocity.com/dparflifo.ctl?CMonth=%s&CDayOfMonth=%s&CYear=%s&LANG=EN&last_pgd_page=dparrqst.pgd&dep_arp_name=%s&arr_arp_name=%s&dep_dt_mn_1=%s&dep_dt_dy_1=%s&dep_tm_1=%s&aln_name=%s&flt_num=%s&CDayOfMonth=%s&x=66&y=10" % (
        month, day, year, airportFrom, airportTo, monthFromNumber(month), day,
        urllib.quote(timeUp), airlines, flyNo, day)

    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;" + url,
                                              query)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("flights",
                          (flyNo + ";" + airlines + ";" + airportFrom + ";" +
                           airportTo + ";" + dateUp + ";" + timeUp), htmlTxt,
                          url)
    return resultType, resultBody
Example #36
0
def retrieveListsOfBestsBrowse(fieldValue, modulesInfo):
    translate = {
        "books": "http://listsofbests.com/lists/1/",
        "movies": "http://listsofbests.com/lists/2/",
        "music": "http://listsofbests.com/lists/3/"
    }
    url = ""
    category = "Books"
    try:
        url = translate[fieldValue.lower()]
    except:
        (id, category) = fieldValue.split(";")
        category = _symbolToCategory(category)
        url = "http://listsofbests.com/list/%s/" % id

    htmlTxt = getHttp(url)
    if None == htmlTxt:
        return MODULE_DOWN, None
    resultType, resultBody = parseListsOfBests(htmlTxt, category, modulesInfo)
    if UNKNOWN_FORMAT == resultType:
        logParsingFailure("listsofbestsbrowse", fieldValue, url, htmlTxt)
    return resultType, resultBody
Example #37
0
def _retrieve_dex_business(name,cityOrZip,state,surrounding,categoryOrName):
    ## from www.dexonline.com
    ## no zip accepted:
    if cityOrZip.isdigit() and len(cityOrZip)==5:
        log(SEV_EXC, "_retrieve_dex_business doesn't support cityOrZip='%s'" % cityOrZip)
        return RETRIEVE_FAILED, None    
    url = ""
    sur = "false"
    if surrounding == "Yes":
        sur = "true"
    
    if categoryOrName == "Name":
        url = dexServerUrlBusinessSearch % (urllib.quote(cityOrZip),urllib.quote(state), sur, urllib.quote(name))
    elif categoryOrName == "Category":
        url = dexServerUrlBusinessSearchCategory % (sur, urllib.quote(name), urllib.quote(cityOrZip),urllib.quote(state))

    htmlText = getHttp(url)
    if htmlText is None:
        return (RETRIEVE_FAILED, None)
    res, data = m411_by411.businessSearchDex(htmlText)
    if res == UNKNOWN_FORMAT:
        logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url)
    return res, data
Example #38
0
def getJokesList(fieldValue):
    parts = fieldValue.split(";")
    if 6 > len(parts):
        return INVALID_REQUEST, None

    userRating = parts[0].strip()
    userOrder = parts[1].strip()
    userExplicitnessList = parts[2].strip().split(" ")
    userTypesList = parts[3].strip().split(" ")
    userCategoriesList = parts[4].strip().split(" ")
    userKeyword = string.join(parts[5:], ";").strip()

    if "rating" != userOrder and "rank" != userOrder:
        return INVALID_REQUEST, None
    if "0" > userRating or "9" < userRating or 1 != len(userRating):
        return INVALID_REQUEST, None
    # rating
    rating = str(int(userRating) * 2)
    # sort order
    order = ""
    if "rating" == userOrder:
        order = "rtd"
    elif "rank" == userOrder and 0 < len(userKeyword):
        order = "rd"
    # keyword
    keyword = userKeyword
    # explicitness
    explicitnessList = ["Clean", "Tame", "Racy"]
    explicitnessOut = []
    for item in userExplicitnessList:
        if 2 < len(item):
            index = explicitnessList.index(item)
            if -1 < index and len(explicitnessList) > index:
                explicitnessOut.append(str(index))
            else:
                return INVALID_REQUEST, None
    explicitness = string.join(explicitnessOut, ",")
    # category
    categoriesList = [
        "Blonde", "Entertainment", "Men/Women", "Insults", "Yo-Mama", "Lawyer",
        "News&Politics", "Redneck", "Barroom", "Gross", "Sports", "Foreign",
        "Whatever", "Medical", "Sexuality", "Animals", "Children", "Anti-Joke",
        "Bush", "College", "Farm", "Business", "Religious", "Tech"
    ]
    categoriesOut = []
    for item in userCategoriesList:
        if 2 < len(item):
            index = categoriesList.index(item)
            if -1 < index and len(categoriesList) > index:
                categoriesOut.append(
                    str(index + 14))  # categories are in <14;37> range
            else:
                return INVALID_REQUEST, None

    categories = string.join(categoriesOut, ",")
    # type
    typesList = [
        "Articles", "One-Liners", "QandA", "Sketches", "Stories", "Lists"
    ]
    typesOut = []
    for item in userTypesList:
        if 2 < len(item):
            index = typesList.index(item)
            if -1 < index and len(typesList) > index:
                typesOut.append(str(index))
            else:
                return INVALID_REQUEST, None
    types = string.join(typesOut, ",")

    jokesUrl = "http://jokes.comedycentral.com/search/results_output.asp?p=1&c=%s&e=%s&t=%s&r=%s&o=%s&k=%s"
    url = jokesUrl % (categories, explicitness, types, rating, order,
                      urllib.quote(keyword))
    htmlTxt = retrieveHttpResponseWithRedirectionHandleException(url)
    if None == htmlTxt:
        return MODULE_DOWN, None

    resultType, resultBody = _parseList(htmlTxt)

    if resultType == UNKNOWN_FORMAT:
        logParsingFailure(fieldName, fieldValue, htmlTxt, url)
    return resultType, resultBody
Example #39
0
def getJokesList(fieldValue):
    parts = fieldValue.split(";")
    if 6 > len(parts):
        return INVALID_REQUEST, None

    userRating = parts[0].strip()
    userOrder = parts[1].strip()
    userExplicitnessList = parts[2].strip().split(" ")
    userTypesList = parts[3].strip().split(" ")
    userCategoriesList = parts[4].strip().split(" ")
    userKeyword = string.join(parts[5:], ";").strip()

    if "rating" != userOrder and "rank" != userOrder:
        return INVALID_REQUEST, None
    if "0" > userRating or "9" < userRating or 1 != len(userRating):
        return INVALID_REQUEST, None
    # rating
    rating = str(int(userRating) * 2)
    # sort order
    order = ""
    if "rating" == userOrder:
        order = "rtd"
    elif "rank" == userOrder and 0 < len(userKeyword):
        order = "rd"
    # keyword
    keyword = userKeyword
    # explicitness
    explicitnessList = ["Clean", "Tame", "Racy"]
    explicitnessOut = []
    for item in userExplicitnessList:
        if 2 < len(item):
            index = explicitnessList.index(item)
            if -1 < index and len(explicitnessList) > index:
                explicitnessOut.append(str(index))
            else:
                return INVALID_REQUEST, None
    explicitness = string.join(explicitnessOut, ",")
    # category
    categoriesList = [
        "Blonde",
        "Entertainment",
        "Men/Women",
        "Insults",
        "Yo-Mama",
        "Lawyer",
        "News&Politics",
        "Redneck",
        "Barroom",
        "Gross",
        "Sports",
        "Foreign",
        "Whatever",
        "Medical",
        "Sexuality",
        "Animals",
        "Children",
        "Anti-Joke",
        "Bush",
        "College",
        "Farm",
        "Business",
        "Religious",
        "Tech",
    ]
    categoriesOut = []
    for item in userCategoriesList:
        if 2 < len(item):
            index = categoriesList.index(item)
            if -1 < index and len(categoriesList) > index:
                categoriesOut.append(str(index + 14))  # categories are in <14;37> range
            else:
                return INVALID_REQUEST, None

    categories = string.join(categoriesOut, ",")
    # type
    typesList = ["Articles", "One-Liners", "QandA", "Sketches", "Stories", "Lists"]
    typesOut = []
    for item in userTypesList:
        if 2 < len(item):
            index = typesList.index(item)
            if -1 < index and len(typesList) > index:
                typesOut.append(str(index))
            else:
                return INVALID_REQUEST, None
    types = string.join(typesOut, ",")

    jokesUrl = "http://jokes.comedycentral.com/search/results_output.asp?p=1&c=%s&e=%s&t=%s&r=%s&o=%s&k=%s"
    url = jokesUrl % (categories, explicitness, types, rating, order, urllib.quote(keyword))
    htmlTxt = retrieveHttpResponseWithRedirectionHandleException(url)
    if None == htmlTxt:
        return MODULE_DOWN, None

    resultType, resultBody = _parseList(htmlTxt)

    if resultType == UNKNOWN_FORMAT:
        logParsingFailure(fieldName, fieldValue, htmlTxt, url)
    return resultType, resultBody