def retrieveFullSearch(flyNo, airlines, airportFrom, airportTo, dateUp, timeUp): global g_airlinesTable query = [flyNo, airlines, airportFrom, airportTo, dateUp, timeUp] # date in format: m/d/year # time in hh:mm format month, day, year = dateUp.split("/") hour, minutes = timeUp.split(":") apm = "am" hour = int(hour) if hour >= 12: apm = "pm" if hour > 12: hour -= 12 elif hour == 0: hour = 12 timeUp = "%d:%s%s" % (hour, minutes.zfill(2), apm) if airlines == "select airlines": airlines = "" airportFrom = airportFrom.strip().replace(" ", "+") airportTo = airportTo.strip().replace(" ", "+") airlines = g_airlinesTable[airlines] url = "http://dps1.travelocity.com/dparflifo.ctl?CMonth=%s&CDayOfMonth=%s&CYear=%s&LANG=EN&last_pgd_page=dparrqst.pgd&dep_arp_name=%s&arr_arp_name=%s&dep_dt_mn_1=%s&dep_dt_dy_1=%s&dep_tm_1=%s&aln_name=%s&flt_num=%s&CDayOfMonth=%s&x=66&y=10" % (month, day, year, airportFrom, airportTo, monthFromNumber(month), day, urllib.quote(timeUp), airlines, flyNo, day) htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;"+url, query) if UNKNOWN_FORMAT == resultType: logParsingFailure("flights", (flyNo+";"+airlines+";"+airportFrom+";"+airportTo+";"+dateUp+";"+timeUp), htmlTxt, url) return resultType, resultBody
def _parse_currency(htmlText, function, url): try: res = function(htmlText) return res except: logParsingFailure("Get-Currency-Conversion", "", htmlText, url) raise
def _retrieveRandomJoke(): global g_randomJokesParseFailed # for some reason some jokes are empty... counter = 6 res, body = UNKNOWN_FORMAT, None while 0 < counter and res != JOKE_DATA: randomJokeNumber = random.randint(1, 11073) url = "http://jokes.comedycentral.com/index_joke.asp?initRand=true&sql=12&rand_id=%s" % str( randomJokeNumber) htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None res, body = _parseRandomJoke(htmlTxt) counter -= 1 if 0 == counter and res != JOKE_DATA: udf = [[ "No joke", "Please try again. It may be module problem. If you seen this screen more than 3 times, please wait some time (one day) before you press 'Random joke' button." ]] res = universalDataFormatReplaceEntities(udf) if UNKNOWN_FORMAT == res: if g_randomJokesParseFailed < 5: logParsingFailure("Get-Joke", "random", htmlTxt, url) g_randomJokesParseFailed += 1 return res, body
def _retrieveRandomJoke(): global g_randomJokesParseFailed # for some reason some jokes are empty... counter = 6 res, body = UNKNOWN_FORMAT, None while 0 < counter and res != JOKE_DATA: randomJokeNumber = random.randint(1, 11073) url = "http://jokes.comedycentral.com/index_joke.asp?initRand=true&sql=12&rand_id=%s" % str(randomJokeNumber) htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None res, body = _parseRandomJoke(htmlTxt) counter -= 1 if 0 == counter and res != JOKE_DATA: udf = [ [ "No joke", "Please try again. It may be module problem. If you seen this screen more than 3 times, please wait some time (one day) before you press 'Random joke' button.", ] ] res = universalDataFormatReplaceEntities(udf) if UNKNOWN_FORMAT == res: if g_randomJokesParseFailed < 5: logParsingFailure("Get-Joke", "random", htmlTxt, url) g_randomJokesParseFailed += 1 return res, body
def retrieveSwitchboardBusiness(name,cityOrZip,state,surrounding,categoryOrName): url = "" zip = False if cityOrZip.isdigit() and len(cityOrZip) == 5: zip = True if categoryOrName == "Name": if zip: url = switchboardServerUrlBusinessSearchZip % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) else: url = switchboardServerUrlBusinessSearch % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) if categoryOrName == "Category": if zip: url = switchboardServerUrlBusinessSearchCategoryZip % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) else: url = switchboardServerUrlBusinessSearchCategory % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) # using cached for testing #htmlText = getHttpCached(url) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = parseSwitchboardBusiness(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url) return res, data
def retrieveRandom(modulesInfo): # get data url = "http://www.quotationspage.com/random.php3" # all? postData = { "number": "4", "collection[]": "mgm", "collection[]": "motivate", "collection[]": "classic", "collection[]": "coles", "collection[]": "lindsly", "collection[]": "poorc", "collection[]": "altq", "collection[]": "20thcent", "collection[]": "bywomen", "collection[]": "devils", "collection[]": "contrib" } htmlTxt = getHttp(url, postData=postData) if None == htmlTxt: return RETRIEVE_FAILED, None # parse resultType, resultBody = parseRandomQuotes(htmlTxt, modulesInfo) if UNKNOWN_FORMAT == resultType: logParsingFailure("quotes", "random", htmlTxt, url) return resultType, resultBody
def retrieveRandom(modulesInfo): # get data url = "http://www.quotationspage.com/random.php3" # all? postData = { "number":"4", "collection[]":"mgm", "collection[]":"motivate", "collection[]":"classic", "collection[]":"coles", "collection[]":"lindsly", "collection[]":"poorc", "collection[]":"altq", "collection[]":"20thcent", "collection[]":"bywomen", "collection[]":"devils", "collection[]":"contrib" } htmlTxt = getHttp(url, postData=postData) if None == htmlTxt: return RETRIEVE_FAILED, None # parse resultType, resultBody = parseRandomQuotes(htmlTxt, modulesInfo) if UNKNOWN_FORMAT == resultType: logParsingFailure("quotes", "random", htmlTxt, url) return resultType, resultBody
def retrieveListsOfBestsSearch(fieldValue, modulesInfo): parts = fieldValue.split(";") if 3 != len(parts): return INVALID_REQUEST, None mediaTable = {"Everything": "0", "Books": "1", "Movies": "2", "Music": "3"} titleTable = {"Both": "0", "Title": "1", "Creator": "2"} try: media, title, keywords = mediaTable[parts[1]], titleTable[ parts[2]], parts[0] except: return INVALID_REQUEST, None postData = { "full_qry": keywords, "media": media, "which": title, "srch": "Search", "": "Clear" } url = "http://listsofbests.com/?" htmlTxt = getHttp(url, postData=postData) # parse it if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = parseListsOfBestsSearch(htmlTxt, keywords, parts[1], modulesInfo) if UNKNOWN_FORMAT == resultType: logParsingFailure("listsofbestssearch", fieldValue, url, htmlTxt) return resultType, resultBody
def retrieveUrlToc(url): htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;"+url, None) if UNKNOWN_FORMAT == resultType: logParsingFailure("flights", "toc;"+url, htmlTxt, url) return resultType, resultBody
def getJoke(fieldValue): url = fieldValue.replace("/results/detail.asp", "http://jokes.comedycentral.com/index_joke.asp") htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None res, body = _parseRandomJoke(htmlTxt) if UNKNOWN_FORMAT == res: logParsingFailure("Get-Joke", fieldValue, htmlTxt, url) return res, body
def retrieveUrlToc(url): htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;" + url, None) if UNKNOWN_FORMAT == resultType: logParsingFailure("flights", "toc;" + url, htmlTxt, url) return resultType, resultBody
def retrieveDaily(modulesInfo): global g_quotesDataCached, g_quotesDataWhenCached, g_quotesCacheExpiration resultType, resultBody = RETRIEVE_FAILED, None # get data url1 = "http://www.quotationspage.com/data/qotd.rss" url2 = "http://www.quotationspage.com/data/mqotd.rss" htmlTxt1, htmlTxt2 = None, None if None != g_quotesDataCached: if not quotesCacheExpired(): htmlTxt1, htmlTxt2 = g_quotesDataCached print "from cache" else: ### TODO: remove this! ## try: ## fo = open("c:\\tmp\\t1.txt","rt") ## htmlTxt1 = fo.read() ## fo.close() ## fo = open("c:\\tmp\\t2.txt","rt") ## htmlTxt2 = fo.read() ## fo.close() ## except: htmlTxt1 = getHttp(url1) htmlTxt2 = getHttp(url2) ## fo = open("c:\\tmp\\t1.txt","wt") ## fo.write(htmlTxt1) ## fo.close() ## fo = open("c:\\tmp\\t2.txt","wt") ## fo.write(htmlTxt2) ## fo.close() if None == htmlTxt1 and None == htmlTxt2: return RETRIEVE_FAILED, None if None == htmlTxt1 or None == htmlTxt2: # one is down - print sth or what? print "One is down" resultType, resultBody = parseDailyQuotes(htmlTxt1, htmlTxt2, modulesInfo) if UNKNOWN_FORMAT == resultType: logParsingFailure("quotes", "daily", htmlTxt1 + "\n----\n" + htmlTxt2, url1) if QUOTES_DATA == resultType and None != htmlTxt1 and None != htmlTxt2: # cache it (but only if both pages are good g_quotesDataWhenCached = time.time() g_quotesDataCached = (htmlTxt1, htmlTxt2) else: # no data? so maybe from cache? (older) if None != g_quotesDataCached: resultType, resultBody = parseDailyQuotes(g_quotesDataCached[0], g_quotesDataCached[1], modulesInfo) logParsingFailure("quotes", "daily", None, "Using old cache - retrieve failed") return resultType, resultBody
def _retrieve_dreammoods(keyword): url = "http://dreammoods.com/cgibin/searchcsv.pl?search=%s&method=exact&header=symbol" url = url % urllib.quote(keyword) htmlText = retrieveHttpResponseWithRedirectionHandleException(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = dreams.parseDream(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("Get-Dream", keyword, htmlText, url) return res, data
def _retrieve_411_international(code): url = "http://www.411.com/search/Find_Intl_Code?country_id=%s" url = url % code htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411_by411.internationalCodeSearch(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-International-Code", code, htmlText, url) return res, data
def _retrieve_411_reverseZipCode(code): url = "http://www.411.com/search/Reverse_Zip?zip=%s" url = url % code htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411_by411.reverseZIPCodeLookup(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Reverse-Zip", code, htmlText, url) return res, data
def _retrieve_yp_reversePhone(xxx,yyy,zzzz): url = wy_com_address+"/wp-p-results.php?npa=%s&np3=%s&np4=%s&client=1482&ver=1.2&type=p&phone=%s%s" url = url % (xxx,yyy,zzzz,yyy,zzzz) htmlText = getHttp(url, retryCount=3) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411.reversePhoneLookup(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Reverse-Phone", xxx+"-"+yyy+"-"+zzzz, htmlText, url) return res, data
def _retrieve_411_areaCodeByCity(city, state): url = "http://www.411.com/search/Find_Areacode?city=%s&state_id=%s" url = url % (urllib.quote(city), urllib.quote(state)) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411_by411.areaCodeByCity(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Area-Code-By-City", city+","+state, htmlText, url) return res, data
def _retrieve_whitepages_reversePhone(xxx,yyy,zzzz): url = "http://yp.whitepages.com/1048/search/Reverse_Phone?phone=%s%s%s" url = url % (xxx,yyy,zzzz) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411.reversePhoneLookupWhitepages(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Reverse-Phone", xxx+"-"+yyy+"-"+zzzz, htmlText, url) return res, data
def _retrieve_411_person(firstName,lastName,cityOrZip,state): url = "http://www.411.com/search/Find_Person?firstname_begins_with=1&firstname=%s&name_begins_with=1&name=%s&city_zip=%s&state_id=%s" url = url % (urllib.quote(firstName),urllib.quote(lastName),urllib.quote(cityOrZip),urllib.quote(state)) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411_by411.personSearch(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Person-Search", firstName+";"+lastName+";"+cityOrZip+";"+state, htmlText, url) return res, data
def _retrieve_411_reverseAreaCode(code): url = "http://www.411.com/log_feature/sort/search/Reverse_Areacode?npa=%s&sort=alpha" url = url % code htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411_by411.reverseAreaCodeLookup(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Reverse-Area-Code", code, htmlText, url) return res, data
def _retrieve_yp_person(firstName,lastName,cityOrZip,state): url = wy_com_address+"/white-pages-results.php?f=%s&firstname_begins_with=1&l=%s&name_begins_with=1&c=%s&s=%s&client=&ver=1.4&type=r" url = url % (urllib.quote(firstName),urllib.quote(lastName),urllib.quote(cityOrZip),urllib.quote(state)) htmlText = getHttp(url, retryCount=3) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411.personSearch(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Person-Search", firstName+";"+lastName+";"+cityOrZip+";"+state, htmlText, url) return res, data
def _retrieve_yp_zipCodeByCity(city, state): url = "http://yp.whitepages.com/search/Find_Zip?city_zip=%s&state_id=%s" url = url % (urllib.quote(city), urllib.quote(state)) htmlText = getHttp(url, retryCount=3) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411.ZIPCodeByCity(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Zip-By-City", city+","+state, htmlText, url) return res, data
def _retrieve_wordiq(keyword): url = "http://www.wordiq.com/dream/%s" url = url % urllib.quote(keyword) htmlText = retrieveHttpResponseWithRedirectionHandleException(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = dreams.parseDream2(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("Get-Dream", keyword, htmlText, url) return res, data
def retrieveListsOfBestsItem(fieldValue, modulesInfo): (id, category) = fieldValue.split(";") category = _symbolToCategory(category) url = "http://listsofbests.com/details.cgi?id=%s" % id htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = parseListsOfBests(htmlTxt, category, modulesInfo) if UNKNOWN_FORMAT == resultType: logParsingFailure("listsofbestsitem", fieldValue, url, htmlTxt) return resultType, resultBody
def retrieve411ReversePhone(xxx, yyy, zzzz): url = "http://www.411.com/search/Reverse_Phone?phone=%s-%s-%s" % (xxx,yyy,zzzz) # using cached for testing #htmlText = getHttpCached(url) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = parse411ReversePhoneLookup(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Reverse-Phone", xxx + "-" + yyy + "-" + zzzz, htmlText, url) return res, data
def retrieve411ReversePhone(xxx, yyy, zzzz): url = "http://www.411.com/search/Reverse_Phone?phone=%s-%s-%s" % (xxx, yyy, zzzz) # using cached for testing # htmlText = getHttpCached(url) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = parse411ReversePhoneLookup(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Reverse-Phone", xxx + "-" + yyy + "-" + zzzz, htmlText, url) return res, data
def retrieveYpReverseAreaCode(zipCode): url = "http://yp.whitepages.com/log_feature/sort/search/Reverse_Areacode?npa=%s&sort=alpha" % zipCode print "retrieveYpReverseAreaCode" #use cached for testing #htmlText = getHttpCached(url, retryCount=3) htmlText = getHttp(url, retryCount=3) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = parseYpReverseAreaCode(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Reverse-Area-Code", zipCode, htmlText, url) return res, data
def _zap2it_retrieve_providers(jar, zipCode): response, opener = _zap2it_get_providers_stage(jar, zipCode) providers = None try: htmlText = response.read() try: providers = _zap2it_parse_providers(htmlText) except Exception, ex: logParsingFailure(Fields.getTvListingsProviders, zipCode, 'zap2it_providers', htmlText) return None finally: response.close() return providers
def retrieveDaily(modulesInfo): global g_quotesDataCached, g_quotesDataWhenCached, g_quotesCacheExpiration resultType, resultBody = RETRIEVE_FAILED, None # get data url1 = "http://www.quotationspage.com/data/qotd.rss" url2 = "http://www.quotationspage.com/data/mqotd.rss" htmlTxt1, htmlTxt2 = None, None if None != g_quotesDataCached: if not quotesCacheExpired(): htmlTxt1, htmlTxt2 = g_quotesDataCached print "from cache" else: ### TODO: remove this! ## try: ## fo = open("c:\\tmp\\t1.txt","rt") ## htmlTxt1 = fo.read() ## fo.close() ## fo = open("c:\\tmp\\t2.txt","rt") ## htmlTxt2 = fo.read() ## fo.close() ## except: htmlTxt1 = getHttp(url1) htmlTxt2 = getHttp(url2) ## fo = open("c:\\tmp\\t1.txt","wt") ## fo.write(htmlTxt1) ## fo.close() ## fo = open("c:\\tmp\\t2.txt","wt") ## fo.write(htmlTxt2) ## fo.close() if None == htmlTxt1 and None == htmlTxt2: return RETRIEVE_FAILED, None if None == htmlTxt1 or None == htmlTxt2: # one is down - print sth or what? print "One is down" resultType, resultBody = parseDailyQuotes(htmlTxt1, htmlTxt2, modulesInfo) if UNKNOWN_FORMAT == resultType: logParsingFailure("quotes", "daily", htmlTxt1+"\n----\n"+htmlTxt2, url1) if QUOTES_DATA == resultType and None != htmlTxt1 and None != htmlTxt2: # cache it (but only if both pages are good g_quotesDataWhenCached = time.time() g_quotesDataCached = (htmlTxt1, htmlTxt2) else: # no data? so maybe from cache? (older) if None != g_quotesDataCached: resultType, resultBody = parseDailyQuotes(g_quotesDataCached[0], g_quotesDataCached[1], modulesInfo) logParsingFailure("quotes", "daily", None, "Using old cache - retrieve failed") return resultType, resultBody
def retrieveSwitchboardBusiness(name, cityOrZip, state, surrounding, categoryOrName): url = "" zip = False if cityOrZip.isdigit() and len(cityOrZip) == 5: zip = True if categoryOrName == "Name": if zip: url = switchboardServerUrlBusinessSearchZip % ( urllib.quote(name), urllib.quote(cityOrZip), urllib.quote(state), ) else: url = switchboardServerUrlBusinessSearch % ( urllib.quote(name), urllib.quote(cityOrZip), urllib.quote(state), ) if categoryOrName == "Category": if zip: url = switchboardServerUrlBusinessSearchCategoryZip % ( urllib.quote(name), urllib.quote(cityOrZip), urllib.quote(state), ) else: url = switchboardServerUrlBusinessSearchCategory % ( urllib.quote(name), urllib.quote(cityOrZip), urllib.quote(state), ) # using cached for testing # htmlText = getHttpCached(url) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = parseSwitchboardBusiness(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure( "411-Business-Search", name + "," + cityOrZip + "," + state + "," + surrounding + "," + categoryOrName, htmlText, url, ) return res, data
def _retrieve_yp_business(name,cityOrZip,state,surrounding,categoryOrName): url = "" name = name.replace(" ","+") if categoryOrName == "Name": if surrounding == "Yes": url = ypServerUrlBusinessSearchYPsa % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) else: url = ypServerUrlBusinessSearch % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) if categoryOrName == "Category": if surrounding == "Yes": url = ypServerUrlBusinessSearchCategoryYPsa % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) else: url = ypServerUrlBusinessSearchCategory % (urllib.quote(name),urllib.quote(cityOrZip),urllib.quote(state)) htmlText = getHttp(url, retryCount=3) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411.businessSearch(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url) return res, data
def retrieveBusinessSearchByUrl(urlIn): res = RETRIEVE_FAILED data = None # witch server? type = "?" url = "" if urlIn.startswith("yplist.php"): url = wy_com_address+"/%s" % urlIn type = "yp" elif urlIn.startswith("/servlet"): url = "http://www.dexonline.com%s" % urlIn type = "dex" elif urlIn.startswith("http://www.switchboard.com"): url = urlIn type = "switch" # retrieve htmlText = None if type == "yp": htmlText = getHttp(url, retryCount=3) elif type == "dex" or type == "switch": htmlText = getHttp(url, retryCount=3) # no? if htmlText is None: return (RETRIEVE_FAILED, None) # parse if type == "yp": res, data = m411.businessSearch(htmlText) elif type == "dex": res, data = m411_by411.businessSearchDex(htmlText) elif type == "switch": res, data = m411_by411.parseSwitchboardBusiness(htmlText) # ending if res == UNKNOWN_FORMAT: logParsingFailure("411-Business-Search-By-Url", urlIn , htmlText, url) return res, data
def retrieveFullSearch(flyNo, airlines, airportFrom, airportTo, dateUp, timeUp): global g_airlinesTable query = [flyNo, airlines, airportFrom, airportTo, dateUp, timeUp] # date in format: m/d/year # time in hh:mm format month, day, year = dateUp.split("/") hour, minutes = timeUp.split(":") apm = "am" hour = int(hour) if hour >= 12: apm = "pm" if hour > 12: hour -= 12 elif hour == 0: hour = 12 timeUp = "%d:%s%s" % (hour, minutes.zfill(2), apm) if airlines == "select airlines": airlines = "" airportFrom = airportFrom.strip().replace(" ", "+") airportTo = airportTo.strip().replace(" ", "+") airlines = g_airlinesTable[airlines] url = "http://dps1.travelocity.com/dparflifo.ctl?CMonth=%s&CDayOfMonth=%s&CYear=%s&LANG=EN&last_pgd_page=dparrqst.pgd&dep_arp_name=%s&arr_arp_name=%s&dep_dt_mn_1=%s&dep_dt_dy_1=%s&dep_tm_1=%s&aln_name=%s&flt_num=%s&CDayOfMonth=%s&x=66&y=10" % ( month, day, year, airportFrom, airportTo, monthFromNumber(month), day, urllib.quote(timeUp), airlines, flyNo, day) htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = parseTravelocity(htmlTxt, "s+flights:toc;" + url, query) if UNKNOWN_FORMAT == resultType: logParsingFailure("flights", (flyNo + ";" + airlines + ";" + airportFrom + ";" + airportTo + ";" + dateUp + ";" + timeUp), htmlTxt, url) return resultType, resultBody
def retrieveListsOfBestsBrowse(fieldValue, modulesInfo): translate = { "books": "http://listsofbests.com/lists/1/", "movies": "http://listsofbests.com/lists/2/", "music": "http://listsofbests.com/lists/3/" } url = "" category = "Books" try: url = translate[fieldValue.lower()] except: (id, category) = fieldValue.split(";") category = _symbolToCategory(category) url = "http://listsofbests.com/list/%s/" % id htmlTxt = getHttp(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = parseListsOfBests(htmlTxt, category, modulesInfo) if UNKNOWN_FORMAT == resultType: logParsingFailure("listsofbestsbrowse", fieldValue, url, htmlTxt) return resultType, resultBody
def _retrieve_dex_business(name,cityOrZip,state,surrounding,categoryOrName): ## from www.dexonline.com ## no zip accepted: if cityOrZip.isdigit() and len(cityOrZip)==5: log(SEV_EXC, "_retrieve_dex_business doesn't support cityOrZip='%s'" % cityOrZip) return RETRIEVE_FAILED, None url = "" sur = "false" if surrounding == "Yes": sur = "true" if categoryOrName == "Name": url = dexServerUrlBusinessSearch % (urllib.quote(cityOrZip),urllib.quote(state), sur, urllib.quote(name)) elif categoryOrName == "Category": url = dexServerUrlBusinessSearchCategory % (sur, urllib.quote(name), urllib.quote(cityOrZip),urllib.quote(state)) htmlText = getHttp(url) if htmlText is None: return (RETRIEVE_FAILED, None) res, data = m411_by411.businessSearchDex(htmlText) if res == UNKNOWN_FORMAT: logParsingFailure("411-Business-Search", name+","+cityOrZip+","+state+","+surrounding+","+categoryOrName, htmlText, url) return res, data
def getJokesList(fieldValue): parts = fieldValue.split(";") if 6 > len(parts): return INVALID_REQUEST, None userRating = parts[0].strip() userOrder = parts[1].strip() userExplicitnessList = parts[2].strip().split(" ") userTypesList = parts[3].strip().split(" ") userCategoriesList = parts[4].strip().split(" ") userKeyword = string.join(parts[5:], ";").strip() if "rating" != userOrder and "rank" != userOrder: return INVALID_REQUEST, None if "0" > userRating or "9" < userRating or 1 != len(userRating): return INVALID_REQUEST, None # rating rating = str(int(userRating) * 2) # sort order order = "" if "rating" == userOrder: order = "rtd" elif "rank" == userOrder and 0 < len(userKeyword): order = "rd" # keyword keyword = userKeyword # explicitness explicitnessList = ["Clean", "Tame", "Racy"] explicitnessOut = [] for item in userExplicitnessList: if 2 < len(item): index = explicitnessList.index(item) if -1 < index and len(explicitnessList) > index: explicitnessOut.append(str(index)) else: return INVALID_REQUEST, None explicitness = string.join(explicitnessOut, ",") # category categoriesList = [ "Blonde", "Entertainment", "Men/Women", "Insults", "Yo-Mama", "Lawyer", "News&Politics", "Redneck", "Barroom", "Gross", "Sports", "Foreign", "Whatever", "Medical", "Sexuality", "Animals", "Children", "Anti-Joke", "Bush", "College", "Farm", "Business", "Religious", "Tech" ] categoriesOut = [] for item in userCategoriesList: if 2 < len(item): index = categoriesList.index(item) if -1 < index and len(categoriesList) > index: categoriesOut.append( str(index + 14)) # categories are in <14;37> range else: return INVALID_REQUEST, None categories = string.join(categoriesOut, ",") # type typesList = [ "Articles", "One-Liners", "QandA", "Sketches", "Stories", "Lists" ] typesOut = [] for item in userTypesList: if 2 < len(item): index = typesList.index(item) if -1 < index and len(typesList) > index: typesOut.append(str(index)) else: return INVALID_REQUEST, None types = string.join(typesOut, ",") jokesUrl = "http://jokes.comedycentral.com/search/results_output.asp?p=1&c=%s&e=%s&t=%s&r=%s&o=%s&k=%s" url = jokesUrl % (categories, explicitness, types, rating, order, urllib.quote(keyword)) htmlTxt = retrieveHttpResponseWithRedirectionHandleException(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = _parseList(htmlTxt) if resultType == UNKNOWN_FORMAT: logParsingFailure(fieldName, fieldValue, htmlTxt, url) return resultType, resultBody
def getJokesList(fieldValue): parts = fieldValue.split(";") if 6 > len(parts): return INVALID_REQUEST, None userRating = parts[0].strip() userOrder = parts[1].strip() userExplicitnessList = parts[2].strip().split(" ") userTypesList = parts[3].strip().split(" ") userCategoriesList = parts[4].strip().split(" ") userKeyword = string.join(parts[5:], ";").strip() if "rating" != userOrder and "rank" != userOrder: return INVALID_REQUEST, None if "0" > userRating or "9" < userRating or 1 != len(userRating): return INVALID_REQUEST, None # rating rating = str(int(userRating) * 2) # sort order order = "" if "rating" == userOrder: order = "rtd" elif "rank" == userOrder and 0 < len(userKeyword): order = "rd" # keyword keyword = userKeyword # explicitness explicitnessList = ["Clean", "Tame", "Racy"] explicitnessOut = [] for item in userExplicitnessList: if 2 < len(item): index = explicitnessList.index(item) if -1 < index and len(explicitnessList) > index: explicitnessOut.append(str(index)) else: return INVALID_REQUEST, None explicitness = string.join(explicitnessOut, ",") # category categoriesList = [ "Blonde", "Entertainment", "Men/Women", "Insults", "Yo-Mama", "Lawyer", "News&Politics", "Redneck", "Barroom", "Gross", "Sports", "Foreign", "Whatever", "Medical", "Sexuality", "Animals", "Children", "Anti-Joke", "Bush", "College", "Farm", "Business", "Religious", "Tech", ] categoriesOut = [] for item in userCategoriesList: if 2 < len(item): index = categoriesList.index(item) if -1 < index and len(categoriesList) > index: categoriesOut.append(str(index + 14)) # categories are in <14;37> range else: return INVALID_REQUEST, None categories = string.join(categoriesOut, ",") # type typesList = ["Articles", "One-Liners", "QandA", "Sketches", "Stories", "Lists"] typesOut = [] for item in userTypesList: if 2 < len(item): index = typesList.index(item) if -1 < index and len(typesList) > index: typesOut.append(str(index)) else: return INVALID_REQUEST, None types = string.join(typesOut, ",") jokesUrl = "http://jokes.comedycentral.com/search/results_output.asp?p=1&c=%s&e=%s&t=%s&r=%s&o=%s&k=%s" url = jokesUrl % (categories, explicitness, types, rating, order, urllib.quote(keyword)) htmlTxt = retrieveHttpResponseWithRedirectionHandleException(url) if None == htmlTxt: return MODULE_DOWN, None resultType, resultBody = _parseList(htmlTxt) if resultType == UNKNOWN_FORMAT: logParsingFailure(fieldName, fieldValue, htmlTxt, url) return resultType, resultBody