Beispiel #1
0
 def _url_decode_impl(pair_iter, charset, keep_blank_values, errors):
     for pair in pair_iter:
         if not pair:
             continue
         equal = b'='
         if equal in pair:
             key, value = pair.split(equal, 1)
         else:
             if not keep_blank_values:
                 continue
             key = pair
             value = b''
         yield unquote_plus(safe_str(key)), unquote_plus(safe_str(value),
                                                         charset, errors)
Beispiel #2
0
    def parse_multipart_headers(iterable):
        """Parses multipart headers from an iterable that yields lines (including
        the trailing newline symbol).  The iterable has to be newline terminated.

        The iterable will stop at the line where the headers ended so it can be
        further consumed.

        :param iterable: iterable of strings that are newline terminated
        """
        def _line_parse(l):
            """Removes line ending characters and returns a tuple (`stripped_line`,
            `is_terminated`).
            """
            if l[-2:] in ['\r\n']:
                return l[:-2], True
            elif l[-1:] in ['\r', '\n']:
                return l[:-1], True
            return l, False

        result = []
        for line in iterable:
            line, line_terminated = _line_parse(safe_str(line))
            if not line_terminated:
                raise ValueError('unexpected end of line in multipart header.')
            if not line:
                break
            elif line[0] in ' \t' and result:
                key, value = result[-1]
                result[-1] = (key, value + '\n ' + line[1:])
            else:
                parts = line.split(':', 1)
                if len(parts) == 2:
                    result.append((parts[0].strip(), parts[1].strip()))

        return HeadersDict(result)
Beispiel #3
0
 def assign_merge_keys():
   """private helper function for dedup()"""
   for res in self.results:
     # Merge keys are M + md5hash(some stuff). This distinguishes them from
     # the stable IDs, which are just md5hash(someotherstuff).
     #res.merge_key = 'M' + hashlib.md5(safe_str(res.title) +
     #                                  safe_str(res.snippet) +
     #                                  safe_str(res.location)).hexdigest()
     res.merge_key = 'M' + hashlib.md5(safe_str(res.title) +
                                       safe_str(res.snippet)).hexdigest()
     res.url_sig = utils.signature(res.url + res.merge_key)
     # we will be sorting & de-duping the merged results
     # by start date so we need an epoch time
     res.t_startdate = res.startdate.timetuple()
     # month_day used by django
     res.month_day = (time.strftime("%B", res.t_startdate) + " " +
                      str(int(time.strftime("%d", res.t_startdate))))
     # this is for the list of any results merged with this one
     res.merged_list = []
     res.merged_debug = []
def translate(apiServers, apiKey, msg):
    '''
    Translates msg to English. Returns a tuple of:
      (original-language-code, original-language-fullname, translated-msg)

    Special values: `original-language-code` may have the values:
      - "[INDETERMINATE]": If the language of `msg` can't be determined.
      - "[TRANSLATION_FAIL]": If the translation process threw an exception.
        In this case, `original-language-fullname` will have the exception
        message.
    '''

    try:
        if not _languages:
            _load_languages(apiServers, apiKey)

        # Detect the language. We won't use the entire string, since we pay per
        # character, and the #characters-to-accuracy curve is probably logarithmic.
        # Note that truncating the request means we don't have to worry about the
        # max request size yet.
        detected = _make_request(apiServers, apiKey,
                                 'detect', {'q': msg[:200]})
        from_lang = detected['data']['detections'][0][0]['language']

        # 'zh-CN' will be returned as a detected language, but it is not in the
        # _languages set. So we might need to massage the detected language.
        if from_lang not in _languages:
            from_lang = from_lang.split('-')[0]
            if from_lang not in _languages:
                # This probably means that the detection failed
                return ('[INDETERMINATE]',
                        'Language could not be determined',
                        None)

        if from_lang == _TARGET_LANGUAGE:
            # msg is already in the target language
            return (from_lang, _languages[from_lang], msg)

        msg_translated = _translate_request_helper(apiServers, apiKey,
                                                   from_lang, msg)

        return (from_lang, _languages[from_lang], msg_translated)
    except Exception as e:
        return ('[TRANSLATION_FAIL]', utils.safe_str(e), None)
def _make_request(apiServers, apiKey, action, params=None):
    '''
    Make the specified request, employing server failover if necessary.
    `action` must be one of ['languages', 'detect', 'translate'].
    `params` must be None or a dict of query parameters.
    Throws exception on error.
    '''

    global _lastGoodApiServer

    assert(action in ('languages', 'detect', 'translate'))

    original_action = action
    if action == 'translate':
        # 'translate' is the API's default action.
        action = ''
    else:
        action = '/' + action

    if not params:
        params = {}

    params['key'] = apiKey
    params['target'] = _TARGET_LANGUAGE

    # Without this, the input is assumed to be HTML and newlines get stripped.
    params['format'] = 'text'

    # If `apiServers` is empty, we not doing failover.
    if not apiServers:
        apiServers = ['www.googleapis.com']

    # If we have a _lastGoodApiServer, then move it to the front of the
    # failover list.
    if _lastGoodApiServer in apiServers:
        apiServers.remove(_lastGoodApiServer)
        apiServers.insert(0, _lastGoodApiServer)

    # We have to set the Host header so that Google will accept requests to
    # "server1.googleapis.com", etc.
    headers = {'Host': 'www.googleapis.com'}

    # This header is required to make a POST request.
    # See https://developers.google.com/translate/v2/using_rest
    if _USE_POST_REQUEST:
        headers['X-HTTP-Method-Override'] = 'GET'

    ex = None

    # Fail over between available servers
    for apiServer in apiServers:
        success = True

        url = 'https://%s/language/translate/v2%s' % (apiServer, action)

        try:
            if _USE_POST_REQUEST:
                req = requests.post(url, headers=headers, data=params)
            else:
                req = requests.get(url, headers=headers, params=params)

            extra_fail = _extra_fail_check(original_action, params, req)

            if extra_fail != False:
                success = False
                err = 'translate request not ok; failing over: %s; %d; %s' \
                        % (apiServer, req.status_code, extra_fail)
                # logger.error(err)  # don't log -- not useful
                ex = Exception(err)
            elif req.ok:
                _lastGoodApiServer = apiServer
                break
            else:
                success = False
                err = 'translate request not ok; failing over: %s; %d; %s; %s' \
                        % (apiServer, req.status_code, req.reason, req.text)
                # logger.error(err)  # don't log -- not useful
                ex = Exception(err)

        # These exceptions are the ones we've seen when the API server is
        # being flaky.
        except (requests.ConnectionError, requests.Timeout) as ex:
            success = False
            # logger.error('%s.py: API error; failing over: %s' % (__name__, utils.safe_str(ex)))  # don't log -- not useful
        except Exception as ex:
            # Unexpected error. Not going to fail over.
            logger.error('%s.py: request error: %s' % (__name__, utils.safe_str(ex)))
            raise

    if not success:
        # We failed over through all our servers with no luck. Re-raise the
        # last exception.
        raise ex if ex else Exception('translation fail')

    return req.json()