def jinni_search(query): logging.info(u'Doing a normal search for "{0}"'.format(query)) # File "/usr/lib/python2.6/urllib.py", line 1269, in urlencode # v = quote_plus(str(v)) # UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 1: ordinal not in range(128) # # See: http://mail.python.org/pipermail/baypiggies/2007-April/002102.html url = "http://www.jinni.com/discovery.html?{0}".format(urllib.urlencode({ "query": query.encode("utf-8") })) request = urllib2.Request(url) response = open_url(request) content = response.read() document = lxml.html.soupparser.fromstring(content) # Find the script tag that contains the search results and parse it try: script_text = [script.text for script in document.xpath('//script[not(@src)]') if "obj_collageEntry" in script.text][0] # PyNarcissus doesn't handle unicode properly: # # File "jsparser.py", line 197, in __init__ # self.source = str(s) # UnicodeEncodeError: 'ascii' codec can't encode characters in position 31704-31706: ordinal not in range(128) # # So encoding to UTF-8 first js_tree = parse_js(script_text.encode("utf-8")) results = convert(js_tree).values() except IndexError, ex: # No search results available results = []
def jinni_findSuggestionsWithFilters(query): logging.info(u'Doing a suggestion search for "{0}"...'.format(query)) url = "http://www.jinni.com/dwr/call/plaincall/AjaxController.findSuggestionsWithFilters.dwr" values = { # Both the httpSessionId and scriptSessionId need to be submitted # or the server will respond with a "HTTP Error 501: Not Implemented". # However, they are not validated. # FIXME: when logged in for some reason you do need to send along a valid httpSessionId "httpSessionId": [cookie.value for cookie in cj if cookie.name == "JSESSIONID"][0], "scriptSessionId": "", # i.e. 3C675DDBB02222BE8CB51E2415259E99878 "callCount": "1", "page": "/discovery.html", "c0-scriptName": "AjaxController", "c0-methodName": "findSuggestionsWithFilters", "c0-id": "0", "c0-param0": "string:{0}".format(query.encode("utf-8")), "c0-e1": "null:null", "c0-e2": "boolean:false", "c0-e3": "boolean:false", "c0-e4": "boolean:false", "c0-e5": "Array:[]", "c0-param1": "Object_Object:{contentTypeFilter:reference:c0-e1, onlineContentFilter:reference:c0-e2, dvdContentFilter:reference:c0-e3, theaterContentFilter:reference:c0-e4, contentAffiliates:reference:c0-e5}", "batchId": "2" } data = urllib.urlencode(values) request = urllib2.Request(url, data) response = open_url(request) content = response.read() js_tree = parse_js(content) tree = convert(js_tree) evaluate(js_tree, tree) results = tree["s1"] return results