def _url_decode_impl(pair_iter, charset, keep_blank_values, errors): for pair in pair_iter: if not pair: continue equal = b'=' if equal in pair: key, value = pair.split(equal, 1) else: if not keep_blank_values: continue key = pair value = b'' yield unquote_plus(safe_str(key)), unquote_plus(safe_str(value), charset, errors)
def parse_multipart_headers(iterable): """Parses multipart headers from an iterable that yields lines (including the trailing newline symbol). The iterable has to be newline terminated. The iterable will stop at the line where the headers ended so it can be further consumed. :param iterable: iterable of strings that are newline terminated """ def _line_parse(l): """Removes line ending characters and returns a tuple (`stripped_line`, `is_terminated`). """ if l[-2:] in ['\r\n']: return l[:-2], True elif l[-1:] in ['\r', '\n']: return l[:-1], True return l, False result = [] for line in iterable: line, line_terminated = _line_parse(safe_str(line)) if not line_terminated: raise ValueError('unexpected end of line in multipart header.') if not line: break elif line[0] in ' \t' and result: key, value = result[-1] result[-1] = (key, value + '\n ' + line[1:]) else: parts = line.split(':', 1) if len(parts) == 2: result.append((parts[0].strip(), parts[1].strip())) return HeadersDict(result)
def assign_merge_keys(): """private helper function for dedup()""" for res in self.results: # Merge keys are M + md5hash(some stuff). This distinguishes them from # the stable IDs, which are just md5hash(someotherstuff). #res.merge_key = 'M' + hashlib.md5(safe_str(res.title) + # safe_str(res.snippet) + # safe_str(res.location)).hexdigest() res.merge_key = 'M' + hashlib.md5(safe_str(res.title) + safe_str(res.snippet)).hexdigest() res.url_sig = utils.signature(res.url + res.merge_key) # we will be sorting & de-duping the merged results # by start date so we need an epoch time res.t_startdate = res.startdate.timetuple() # month_day used by django res.month_day = (time.strftime("%B", res.t_startdate) + " " + str(int(time.strftime("%d", res.t_startdate)))) # this is for the list of any results merged with this one res.merged_list = [] res.merged_debug = []
def translate(apiServers, apiKey, msg): ''' Translates msg to English. Returns a tuple of: (original-language-code, original-language-fullname, translated-msg) Special values: `original-language-code` may have the values: - "[INDETERMINATE]": If the language of `msg` can't be determined. - "[TRANSLATION_FAIL]": If the translation process threw an exception. In this case, `original-language-fullname` will have the exception message. ''' try: if not _languages: _load_languages(apiServers, apiKey) # Detect the language. We won't use the entire string, since we pay per # character, and the #characters-to-accuracy curve is probably logarithmic. # Note that truncating the request means we don't have to worry about the # max request size yet. detected = _make_request(apiServers, apiKey, 'detect', {'q': msg[:200]}) from_lang = detected['data']['detections'][0][0]['language'] # 'zh-CN' will be returned as a detected language, but it is not in the # _languages set. So we might need to massage the detected language. if from_lang not in _languages: from_lang = from_lang.split('-')[0] if from_lang not in _languages: # This probably means that the detection failed return ('[INDETERMINATE]', 'Language could not be determined', None) if from_lang == _TARGET_LANGUAGE: # msg is already in the target language return (from_lang, _languages[from_lang], msg) msg_translated = _translate_request_helper(apiServers, apiKey, from_lang, msg) return (from_lang, _languages[from_lang], msg_translated) except Exception as e: return ('[TRANSLATION_FAIL]', utils.safe_str(e), None)
def _make_request(apiServers, apiKey, action, params=None): ''' Make the specified request, employing server failover if necessary. `action` must be one of ['languages', 'detect', 'translate']. `params` must be None or a dict of query parameters. Throws exception on error. ''' global _lastGoodApiServer assert(action in ('languages', 'detect', 'translate')) original_action = action if action == 'translate': # 'translate' is the API's default action. action = '' else: action = '/' + action if not params: params = {} params['key'] = apiKey params['target'] = _TARGET_LANGUAGE # Without this, the input is assumed to be HTML and newlines get stripped. params['format'] = 'text' # If `apiServers` is empty, we not doing failover. if not apiServers: apiServers = ['www.googleapis.com'] # If we have a _lastGoodApiServer, then move it to the front of the # failover list. if _lastGoodApiServer in apiServers: apiServers.remove(_lastGoodApiServer) apiServers.insert(0, _lastGoodApiServer) # We have to set the Host header so that Google will accept requests to # "server1.googleapis.com", etc. headers = {'Host': 'www.googleapis.com'} # This header is required to make a POST request. # See https://developers.google.com/translate/v2/using_rest if _USE_POST_REQUEST: headers['X-HTTP-Method-Override'] = 'GET' ex = None # Fail over between available servers for apiServer in apiServers: success = True url = 'https://%s/language/translate/v2%s' % (apiServer, action) try: if _USE_POST_REQUEST: req = requests.post(url, headers=headers, data=params) else: req = requests.get(url, headers=headers, params=params) extra_fail = _extra_fail_check(original_action, params, req) if extra_fail != False: success = False err = 'translate request not ok; failing over: %s; %d; %s' \ % (apiServer, req.status_code, extra_fail) # logger.error(err) # don't log -- not useful ex = Exception(err) elif req.ok: _lastGoodApiServer = apiServer break else: success = False err = 'translate request not ok; failing over: %s; %d; %s; %s' \ % (apiServer, req.status_code, req.reason, req.text) # logger.error(err) # don't log -- not useful ex = Exception(err) # These exceptions are the ones we've seen when the API server is # being flaky. except (requests.ConnectionError, requests.Timeout) as ex: success = False # logger.error('%s.py: API error; failing over: %s' % (__name__, utils.safe_str(ex))) # don't log -- not useful except Exception as ex: # Unexpected error. Not going to fail over. logger.error('%s.py: request error: %s' % (__name__, utils.safe_str(ex))) raise if not success: # We failed over through all our servers with no luck. Re-raise the # last exception. raise ex if ex else Exception('translation fail') return req.json()