コード例 #1
0
def search():
    meta = get_meta()
    filters = get_filters()

    # get counts
    count_total = query(count_all=True)
    count_filtered = query(filters=filters, count_filtered=True)

    # build response
    results = query(meta=meta, filters=filters)

    headers = {
        "Query-Count-Total": str(count_total),
        "Query-Count-Filtered": str(count_filtered),
        "Cache-Control": "max-age=259200",
        "Access-Control-Expose-Headers": "Query-Count-Total, Query-Count-Filtered, Query-Next"
    }

    if count_filtered > meta["page"] * meta["limit"]:
        next_url_params = {
            'page': str(meta["page"] + 1),
            'page_size': str(meta["limit"]),
            'order_by': meta["order_by"],
            'order_by_direction': meta["order_by_direction"]
        }
        next_url_params.update(filters)
        next_request = PreparedRequest()
        next_request.prepare_url(request.base_url, next_url_params)
        headers['Query-Next'] = next_request.url

    return Response(result_generator(results), headers=[(k, v) for k, v in headers.iteritems()],
                    mimetype='text/csv')
コード例 #2
0
ファイル: common.py プロジェクト: czajowaty/curry-bot
def is_valid_url(url):
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(url, None)
        return True
    except Exception as e:
        return False
コード例 #3
0
ファイル: confirmSignUp.py プロジェクト: sappana2003/CFF
def confirm_sign_up():
    from chalicelib.main import app, USER_POOL_ID, COGNITO_CLIENT_ID

    confirmation_code = app.current_request.query_params["code"]
    username = app.current_request.query_params["username"]
    user = cognito_idp_client.admin_get_user(UserPoolId=USER_POOL_ID,
                                             Username=username)
    redirect_url = "http://chinmayamission.com"
    for attribute in user["UserAttributes"]:
        if attribute["Name"] == "website":
            redirect_url = attribute["Value"]
    try:
        # Exceptions: https://docs.aws.amazon.com/cognito-user-identity-pools/latest/APIReference/API_ConfirmSignUp.html
        cognito_idp_client.confirm_sign_up(
            ClientId=COGNITO_CLIENT_ID,
            Username=username,
            ConfirmationCode=confirmation_code,
        )
    except Exception as e:
        params = {
            "confirmSignupErrorMessage": e.response["Error"]["Message"],
            "confirmSignupErrorCode": e.response["Error"]["Code"],
        }
        # Use PreparedRequest so it preserves the existing query string in redirect_url
        req = PreparedRequest()
        req.prepare_url(redirect_url, params)
        return Response(status_code=302,
                        body="",
                        headers={"Location": req.url})

    return Response(status_code=302,
                    body="",
                    headers={"Location": redirect_url})
コード例 #4
0
ファイル: client.py プロジェクト: ifooth/iam-python-sdk
    def _call_api(self, http_func, host, path, data, headers, timeout=None):
        url = "{host}{path}".format(host=host, path=path)

        begin = time.time()

        # add extra params in url if not empty
        if self._extra_url_params:
            preReq = PreparedRequest()
            preReq.prepare_url(url, self._extra_url_params)
            url = preReq.url

        ok, message, _data = http_func(url,
                                       data,
                                       headers=headers,
                                       timeout=timeout)

        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("do http request: method=`%s`, url=`%s`, data=`%s`",
                         http_func.__name__, url, json.dumps(data))
            logger.debug(
                "http request result: ok=`%s`, message=`%s`, _data=`%s`", ok,
                message, json.dumps(_data))
            logger.debug("http request took %s ms",
                         int((time.time() - begin) * 1000))

        if not ok:
            return False, message or "verify from iam server fail", None

        if _data.get("code") != 0:
            return False, _data.get("message") or "iam api fail", None

        _d = _data.get("data")

        return True, "ok", _d
コード例 #5
0
ファイル: utils.py プロジェクト: tehnotcpu/CirnoBot
def check_url(url):
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(url, None)
        return True
    except requests.exceptions.MissingSchema:
        return False
コード例 #6
0
    def _call_api(self, method, path, params=None, data=None):
        """Execute an API request.

        Arguments:
            method (str): HTTP request type
            path (str): API request path
            data (dict): Data to send in dictionary format

        Returns:
            Response: The requests response object
        """
        url = 'https://{}/api/{}'.format(self._host, path)
        if params:
            req = PreparedRequest()
            req.prepare_url(url, params)
            url = req.url
        if self._session is None:
            self.connect()
        response = self._session.request(method=method,
                                         url=url,
                                         json=data,
                                         verify=self._verify_ssl,
                                         timeout=self._timeout)
        try:
            response.raise_for_status()
            return response
        # TODO CHECK FOR TOKEN EXPIRATION
        except Exception as exception:
            raise exception
コード例 #7
0
ファイル: youtube.py プロジェクト: Exide-PC/bot_exide
def _search(query: str):
    url = "https://www.googleapis.com/youtube/v3/search"
    results = []

    params = {'key': token, 'part': 'snippet', 'q': query, 'maxResults': 15}
    req = PreparedRequest()
    req.prepare_url(url, params)

    json = requests.get(req.url).json()
    ids = []

    for item in json['items']:
        videoId = item['id'].get('videoId')
        playlistId = item['id'].get('playlistId')
        if (not videoId and not playlistId): continue
        results.append({
            'id': videoId if videoId != None else playlistId,
            'title': html.unescape(item['snippet']['title']),
            'isPlaylist': playlistId != None
        })
        if (videoId):
            ids.append(videoId)

    durations = get_video_durations(ids)

    for i in range(len(results)):
        item = results[i]
        if (not item['isPlaylist']):
            item['duration'] = durations[item['id']]

    return results
コード例 #8
0
ファイル: string_lib.py プロジェクト: RahulShah9191/excel
def drop_params_from_url(url,
                         params,
                         drop_keys,
                         assert_param_is_present=False):
    out_url = ""
    try:
        pre_req = PreparedRequest()
        pre_req.prepare_url(url, params)
        u = urlparse(pre_req.url)
        current_params = parse_qs(u.query)

        if assert_param_is_present:
            keys_diff = set(drop_keys) - set(current_params)
            assert keys_diff == set(), f"Keys not present : {keys_diff}"

        final_params = {
            k: v
            for k, v in current_params.items() if k not in drop_keys
        }
        current_url = u._replace(params="")._replace(query="")._replace(
            fragment="").geturl()
        pre_req.prepare_url(current_url, final_params)
        out_url = pre_req.url
    except Exception as e:
        print(f"\n Exception occured: {e}!! \n URL: {url} \n PARAMS: {params}")
    return out_url
コード例 #9
0
    def get_redirect_url(self, *args, **kwargs):
        """
        Redirects user to relative social auth provider for logout process.
        In-case no auth provider is found or the logout_url is missing in provider's configurations
        the user is redirected to edX's default logout page '/logout'
        """
        backend_name = getattr(settings, 'COLARAZ_AUTH_PROVIDER_BACKEND_NAME',
                               None)
        if third_party_auth.is_enabled(
        ) and backend_name and self.request.session.has_key('id_token'):
            provider = [
                enabled
                for enabled in third_party_auth.provider.Registry.enabled()
                if enabled.backend_name == backend_name
            ]
            if provider:
                logout_url = json.loads(
                    getattr(provider[0], 'other_settings',
                            '{}')).get('logout_url')
                if logout_url:
                    redirect_to = self.request.META.get(
                        'HTTP_REFERER') or get_site_base_url(self.request)
                    params = {
                        'id_token_hint': self.request.session['id_token'],
                        'post_logout_redirect_uri': redirect_to
                    }
                    req = PreparedRequest()
                    req.prepare_url(logout_url, params)

                    return req.url

        return reverse('logout')
コード例 #10
0
 def _get_response(self, url, data):
     req = PreparedRequest()
     req.prepare_url(url, data)
     result = requests.get(unquote(req.url)).content
     logging.warn('-getresponse')
     logging.warn(result)
     return True
コード例 #11
0
def getAthleteName(name):
    name = name.replace(" ", "_")
    request = PreparedRequest()
    request.prepare_url("https://en.wikipedia.org/wiki/", name)

    url_link = "https://en.wikipedia.org/wiki/" + name
    return name, url_link
コード例 #12
0
ファイル: utils.py プロジェクト: Junohea/CirnoBot
def check_url(url):
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(url, None)
        return True
    except requests.exceptions.MissingSchema:
        return False
コード例 #13
0
ファイル: scrape.py プロジェクト: wsbuck/fanfoo-scraper
def scrapeYahoo(season, week, position):
    url = 'https://football.fantasysports.yahoo.com/f1/47241/players'
    drop_cols = [
        0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 17, 19, 20, 24, 25, 30, 31, 32, 33,
        34, 35, 36, 37
    ]
    col_names = [
        'name', 'pass_att', 'pass_comp', 'pass_yds', 'pass_td', 'pass_int',
        'pass_sack', 'rush_att', 'rush_yds', 'rush_td', 'rec_tgt', 'rec_rec',
        'rec_yds', 'rec_td'
    ]
    all_players = pd.DataFrame(columns=col_names)

    for count in range(0, 300, 25):
        querystring = {
            'sort': 'PTS',
            'sdir': '1',
            'status': 'A',
            'pos': '{}'.format(position),
            'stat1': 'S_PW_{}'.format(week),
            'jsenabled': '0',
            'count': '{}'.format(count)
        }
        req = PreparedRequest()
        req.prepare_url(url, querystring)
        tables = pd.read_html(req.url)
        players = tables[1]
        players.drop(players.columns[drop_cols], axis=1, inplace=True)
        players.columns = col_names
        players['name'] = players['name'].apply(name_cleanup)
        if len(players) == 0:
            break
        all_players = all_players.append(players, ignore_index=True)

    return all_players
コード例 #14
0
ファイル: __init__.py プロジェクト: yusufusta/eksipy
    def addParamsToUrl(self, url: str, params: dict) -> str:
        """
        Belirtilen parametreleri adrese ekler.
        """

        req = PreparedRequest()
        req.prepare_url(url, params)
        return req.url
コード例 #15
0
def apipara(a):
    req = PreparedRequest()
    url = "http://integra-net4/BMJ_ICE_Test/assignjob"
    params = {'ID': a}
    req.prepare_url(url, params)
    content = urllib.request.urlopen(req.url).read()
    data = json.loads(content)
    return data['success'], data['msg']
コード例 #16
0
def append_params_to_url(DO_WE_ADD_PARAMS_REUPLOAD, url): ## Used for appending random strings as query parameters to URLS in the reposting module. This gives a unique variation of the URL.
    if DO_WE_ADD_PARAMS_REUPLOAD:
        params = {random_char(5):random_char(5)}
        req = PreparedRequest()
        req.prepare_url(url, params)
        return req.url
    else:
        return url
コード例 #17
0
ファイル: webdriver.py プロジェクト: ws1993/superset
    def get_screenshot(
        self,
        url: str,
        element_name: str,
        user: "******",
    ) -> Optional[bytes]:
        params = {"standalone": DashboardStandaloneMode.REPORT.value}
        req = PreparedRequest()
        req.prepare_url(url, params)
        url = req.url or ""

        driver = self.auth(user)
        driver.set_window_size(*self._window)
        driver.get(url)
        img: Optional[bytes] = None
        selenium_headstart = current_app.config[
            "SCREENSHOT_SELENIUM_HEADSTART"]
        logger.debug("Sleeping for %i seconds", selenium_headstart)
        sleep(selenium_headstart)

        try:
            logger.debug("Wait for the presence of %s", element_name)
            element = WebDriverWait(driver,
                                    self._screenshot_locate_wait).until(
                                        EC.presence_of_element_located(
                                            (By.CLASS_NAME, element_name)))
            logger.debug("Wait for .loading to be done")
            WebDriverWait(driver, self._screenshot_load_wait).until_not(
                EC.presence_of_all_elements_located(
                    (By.CLASS_NAME, "loading")))
            logger.debug("Wait for chart to have content")
            WebDriverWait(driver, self._screenshot_locate_wait).until(
                EC.visibility_of_all_elements_located(
                    (By.CLASS_NAME, "slice_container")))
            selenium_animation_wait = current_app.config[
                "SCREENSHOT_SELENIUM_ANIMATION_WAIT"]
            logger.debug("Wait %i seconds for chart animation",
                         selenium_animation_wait)
            sleep(selenium_animation_wait)
            logger.info("Taking a PNG screenshot of url %s", url)
            img = element.screenshot_as_png
        except TimeoutException:
            logger.warning("Selenium timed out requesting url %s",
                           url,
                           exc_info=True)
            img = element.screenshot_as_png
        except StaleElementReferenceException:
            logger.error(
                "Selenium got a stale element while requesting url %s",
                url,
                exc_info=True,
            )
        except WebDriverException as ex:
            logger.error(ex, exc_info=True)
        finally:
            self.destroy(driver,
                         current_app.config["SCREENSHOT_SELENIUM_RETRIES"])
        return img
コード例 #18
0
def apipara(a):
    req = PreparedRequest()
    url = "any url"
    params = {'ID':a}
    req.prepare_url(url, params)
    content = urllib.request.urlopen(req.url).read()
    data = json.loads(content)
    print(data['success'])#I Extract json content
    print(data['msg'])#I Extract json content
コード例 #19
0
def check_url(url):
    prepared_request = PreparedRequest(
    )  # create an instance of the PreparedRequest class of the requests library
    try:
        prepared_request.prepare_url(url,
                                     None)  # checking for the correct of url
        return True
    except:
        return False
コード例 #20
0
ファイル: string_lib.py プロジェクト: RahulShah9191/excel
def add_params_to_url(url, params):
    out_url = ""
    try:
        req = PreparedRequest()
        req.prepare_url(url, params)
        out_url = req.url
    except Exception as e:
        print(f"\n Exception occured: {e}!! \n URL: {url} \n PARAMS: {params}")
    return out_url
コード例 #21
0
def validate_url(text: str):
    """
	Validates a URL.
	"""
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(text, None)
        return True
    except MissingSchema:
        return False
コード例 #22
0
def session_check(session_id):
    if 'uuid' in session and session['uuid'] == session_id:
        session['valid'] = True
        return redirect(request.args.get('follow'), code=307)
    else:
        follow_url = request.args.get('follow')
        req = PreparedRequest()
        req.prepare_url(follow_url, {'cookies_disabled': 1})
        session.pop('_permanent', None)
        return redirect(req.url, code=307)
コード例 #23
0
    def get_order_requirements(
        did: str,
        service_endpoint: str,
        consumer_address: str,
        service_id: Union[str, int],
        service_type: str,
        token_address: str,
        userdata: Optional[Dict] = None,
    ) -> Optional[OrderRequirements]:
        """

        :param did:
        :param service_endpoint:
        :param consumer_address: hex str the ethereum account address of the consumer
        :param service_id:
        :param service_type:
        :param token_address:
        :return: OrderRequirements instance -- named tuple (amount, data_token_address, receiver_address, nonce),

        """

        req = PreparedRequest()
        params = {
            "documentId": did,
            "serviceId": service_id,
            "serviceType": service_type,
            "dataToken": token_address,
            "consumerAddress": consumer_address,
        }

        if userdata:
            userdata = json.dumps(userdata)
            params["userdata"] = userdata

        req.prepare_url(service_endpoint, params)
        initialize_url = req.url

        logger.info(f"invoke the initialize endpoint with this url: {initialize_url}")
        response = DataServiceProvider._http_method("get", initialize_url)
        # The returned json should contain information about the required number of tokens
        # to consume `service_id`. If service is not available there will be an error or
        # the returned json is empty.
        if response.status_code != 200:
            return None
        order = dict(response.json())

        return OrderRequirements(
            to_wei(
                Decimal(order["numTokens"])
            ),  # comes as float, needs to be converted
            order["dataToken"],
            order["to"],
            int(order["nonce"]),
            order.get("computeAddress"),
        )
コード例 #24
0
    def get_checked_url(self):
        """
        Validate URL. See https://stackoverflow.com/questions/827557/how-do-you-validate-a-url-with-a-regular-expression-in-python
        """

        prepared_request = PreparedRequest()
        try:
            prepared_request.prepare_url(self.url, None)
            return prepared_request.url
        except (requests.exceptions.MissingSchema, Exception):
            raise requests.HTTPError
コード例 #25
0
 def replace_parameter_value(self, url, parameter_name, value):
     """
     Replaces value of the parameter in query string with desired value.
     """
     parts = urlparse(url)
     query_dict = dict(parse_qsl(parts.query))
     query_dict[parameter_name] = value
     req = PreparedRequest()
     req.prepare_url(parts.scheme + "://" + parts.netloc + parts.path,
                     query_dict)
     return req.url
コード例 #26
0
def params_consumer(params_q, complete_q, access_stats, stop):
    #print(stop.value)
    while True:
        try:
            entry, rid, orig_url = params_q.get(True, 0.01)
        except:
            continue
        url = entry["url"]
        params = entry["params"]
        text = entry["text"]

        #print("PARAMS HANDLER " + url + ", " + str(len(params)))

        if len(params) == 0:
            #print("PARAMS COMPLETED " + url + " AT ROW " + str(rid) + " WITH PARAMS POST PROCESSING")
            complete_q.put((url, rid, orig_url))
            continue
        
        o = urlparse(url)
        domain = o.netloc
        can_hit = safe_to_access(domain, access_stats)

        if can_hit:
            # randomly pick a parameter to ignore
            # see if it changes things
            param_to_remove = random.choice(list(params.keys()))
            param_to_remove_val = params[param_to_remove]
            del params[param_to_remove]

            req = PreparedRequest()
            req.prepare_url(url, params)
            new_url = req.url # url, plus the parameters we added
        
            try:
                access_stats[domain] = time.time_ns()
                r = requests.get(new_url)

                changed = webpages_different(text, r.text)
                if changed: # a change was detected
                    #print("DETECTED A CHANGE")
                    req.prepare_url(url, {param_to_remove: param_to_remove_val})
                    # param_to_remove has been identified as necessary
                    params_q.put(({"params": params, "url": req.url, "text": text}, rid, orig_url)) # URL now contains the removed param as a permanent fixture
                else:
                    #print("NO CHANGE DETECTED")
                    params_q.put(({"params": params, "url": url, "text": text}, rid, orig_url))
            except:
                #print("Error with " + url)
                params_q.put((entry, rid))
        else:
            params_q.put((entry, rid))

        time.sleep(0.05)
    print("params_consumer done")
コード例 #27
0
 def _build_url(self, **params):
     endpoint = f'{self.base_url}/query'
     parameters = {
         'function': 'TIME_SERIES_INTRADAY',
         'outputsize': 'full',
         'apikey': self.token,
     }
     parameters.update(**params)
     prepared_request = PreparedRequest()
     prepared_request.prepare_url(endpoint, parameters)
     return prepared_request.url
コード例 #28
0
def build_get_features_url(base_url, layername):
    params = {
        "service": "WFS",
        "version": "1.0.0",
        "request": "GetFeature",
        "typeName": layername,
        "outputFormat": "application/json",
    }
    req = PreparedRequest()
    req.prepare_url(base_url, params)
    return unquote(req.url)
コード例 #29
0
ファイル: utils.py プロジェクト: ysf002/seahub
def get_cad_dict(request, username, repo_id, file_path):

    return_dict = {}
    return_dict['cad_mobile_size_limit'] = CAD_MOBILE_SIZE_LIMIT
    return_dict['cad_pc_size_limit'] = CAD_PC_SIZE_LIMIT

    req = PreparedRequest()
    param_dict = {'repo_id': repo_id, 'file_path': file_path}
    req.prepare_url(CAD_HOST + reverse('CadApiFileContentView'), param_dict)
    return_dict['doc_url'] = req.url

    return return_dict
コード例 #30
0
    def update_query_string_param(self, url, param, value):
        """Updates query string parameter by name."""
        parts = urlparse(url)
        params = parse_qs(parts.query)

        if param in params:
            params[param] = value

        req = PreparedRequest()
        req.prepare_url(parts.scheme + "://" + parts.netloc + parts.path,
                        params)
        return req.url
コード例 #31
0
ファイル: youtube.py プロジェクト: Exide-PC/bot_exide
def get_video_title(videoId: str) -> str:
    url = "https://www.googleapis.com/youtube/v3/videos"

    params = {'key': token, 'id': videoId, 'part': 'contentDetails,snippet'}
    req = PreparedRequest()
    req.prepare_url(url, params)

    json = requests.get(req.url).json()
    if (len(json['items']) == 0):
        raise ExecutionException('Video not found')

    return json['items'][0]['snippet']['title']
コード例 #32
0
    def oauth_access_token_url(self):
        """Generate the OAuth access token url."""

        url = self._access_token_url.format(base_url=self.base_url)

        params = [
            ('client_id', self.credentials.api_key),
            ('client_secret', self.credentials.secret),
            ('code', self.credentials.code)
        ]

        parser = PreparedRequest()
        parser.prepare_url(url=url, params=params)

        return parser.url
コード例 #33
0
    def oauth_authorize_url(self, redirect_to=None):
        """Generates the oauth authorize url.

        :param redirect_to: URL shopify will redirect to once authorized.
        """

        url = self._authorize_url.format(base_url=self.base_url)

        params = [
            ('client_id', self.credentials.api_key),
            ('scope', ",".join(self.credentials.scope)),
            ('redirect_uri', redirect_to)
        ]

        request = PreparedRequest()
        request.prepare_url(url=url, params=params)

        return request.url
コード例 #34
0
ファイル: responseloader.py プロジェクト: ikreymer/pywb
    def load_resource(self, cdx, params):
        load_url = cdx.get('load_url')
        if not load_url:
            return None

        if params.get('content_type') == VideoLoader.CONTENT_TYPE:
            return None

        if self.forward_proxy_prefix and not cdx.get('is_live'):
            load_url = self.forward_proxy_prefix + load_url

        input_req = params['_input_req']

        req_headers = input_req.get_req_headers()

        dt = timestamp_to_datetime(cdx['timestamp'])

        if cdx.get('memento_url'):
            req_headers['Accept-Datetime'] = datetime_to_http_date(dt)

        method = input_req.get_req_method()
        data = input_req.get_req_body()

        p = PreparedRequest()
        try:
            p.prepare_url(load_url, None)
        except:
            raise LiveResourceException(load_url)
        p.prepare_headers(None)
        p.prepare_auth(None, load_url)

        auth = p.headers.get('Authorization')
        if auth:
            req_headers['Authorization'] = auth

        load_url = p.url

        # host is set to the actual host for live loading
        # ensure it is set to the load_url host
        if not cdx.get('is_live'):
            #req_headers.pop('Host', '')
            req_headers['Host'] = urlsplit(p.url).netloc

            referrer = cdx.get('set_referrer')
            if referrer:
                req_headers['Referer'] = referrer

        upstream_res = self._do_request_with_redir_check(method, load_url,
                                                         data, req_headers,
                                                         params, cdx)

        memento_dt = upstream_res.headers.get('Memento-Datetime')
        if memento_dt:
            dt = http_date_to_datetime(memento_dt)
            cdx['timestamp'] = datetime_to_timestamp(dt)
        elif cdx.get('memento_url'):
        # if 'memento_url' set and no Memento-Datetime header present
        # then its an error
            return None

        agg_type = upstream_res.headers.get('Warcserver-Type')
        if agg_type == 'warc':
            cdx['source'] = unquote(upstream_res.headers.get('Warcserver-Source-Coll'))
            return None, upstream_res.headers, upstream_res

        if upstream_res.version == 11:
            version = '1.1'
        else:
            version = '1.0'

        status = 'HTTP/{version} {status} {reason}\r\n'
        status = status.format(version=version,
                               status=upstream_res.status,
                               reason=upstream_res.reason)

        http_headers_buff = status

        orig_resp = upstream_res._original_response

        try:  #pragma: no cover
        #PY 3
            resp_headers = orig_resp.headers._headers
            for n, v in resp_headers:
                nl = n.lower()
                if nl in self.SKIP_HEADERS:
                    continue

                if nl in self.UNREWRITE_HEADERS:
                    v = self.unrewrite_header(cdx, v)

                http_headers_buff += n + ': ' + v + '\r\n'

            http_headers_buff += '\r\n'

            try:
                # http headers could be encoded as utf-8 (though non-standard)
                # first try utf-8 encoding
                http_headers_buff = http_headers_buff.encode('utf-8')
            except:
                # then, fall back to latin-1
                http_headers_buff = http_headers_buff.encode('latin-1')

        except:  #pragma: no cover
        #PY 2
            resp_headers = orig_resp.msg.headers

            for line in resp_headers:
                n, v = line.split(':', 1)
                n = n.lower()
                v = v.strip()

                if n in self.SKIP_HEADERS:
                    continue

                new_v = v
                if n in self.UNREWRITE_HEADERS:
                    new_v = self.unrewrite_header(cdx, v)

                if new_v != v:
                    http_headers_buff += n + ': ' + new_v + '\r\n'
                else:
                    http_headers_buff += line

            # if python2, already byte headers, so leave as is
            http_headers_buff += '\r\n'

        try:
            fp = upstream_res._fp.fp
            if hasattr(fp, 'raw'):  #pragma: no cover
                fp = fp.raw
            remote_ip = fp._sock.getpeername()[0]
        except:  #pragma: no cover
            remote_ip = None

        warc_headers = {}

        warc_headers['WARC-Type'] = 'response'
        warc_headers['WARC-Record-ID'] = self._make_warc_id()
        warc_headers['WARC-Target-URI'] = cdx['url']
        warc_headers['WARC-Date'] = datetime_to_iso_date(dt)

        if not cdx.get('is_live'):
            now = datetime.datetime.utcnow()
            warc_headers['WARC-Source-URI'] = cdx.get('load_url')
            warc_headers['WARC-Creation-Date'] = datetime_to_iso_date(now)

        if remote_ip:
            warc_headers['WARC-IP-Address'] = remote_ip

        ct = upstream_res.headers.get('Content-Type')
        if ct:
            metadata = self.get_custom_metadata(ct, dt)
            if metadata:
                warc_headers['WARC-JSON-Metadata'] = json.dumps(metadata)

        warc_headers['Content-Type'] = 'application/http; msgtype=response'

        if method == 'HEAD':
            content_len = 0
        else:
            content_len = upstream_res.headers.get('Content-Length', -1)

        self._set_content_len(content_len,
                              warc_headers,
                              len(http_headers_buff))

        warc_headers = StatusAndHeaders('WARC/1.0', warc_headers.items())
        return (warc_headers, http_headers_buff, upstream_res)
コード例 #35
0
    def load_resource(self, cdx, params):
        load_url = cdx.get('load_url')
        if not load_url:
            return None

        if params.get('content_type') == VideoLoader.CONTENT_TYPE:
            return None

        input_req = params['_input_req']

        req_headers = input_req.get_req_headers()

        dt = timestamp_to_datetime(cdx['timestamp'])

        if cdx.get('memento_url'):
            req_headers['Accept-Datetime'] = datetime_to_http_date(dt)

        method = input_req.get_req_method()
        data = input_req.get_req_body()

        p = PreparedRequest()
        p.prepare_url(load_url, None)
        p.prepare_headers(None)
        p.prepare_auth(None, load_url)

        auth = p.headers.get('Authorization')
        if auth:
            req_headers['Authorization'] = auth

        load_url = p.url

        try:
            upstream_res = self.pool.urlopen(method=method,
                                             url=load_url,
                                             body=data,
                                             headers=req_headers,
                                             redirect=False,
                                             assert_same_host=False,
                                             preload_content=False,
                                             decode_content=False,
                                             retries=self.num_retries,
                                             timeout=params.get('_timeout'))

        except Exception as e:
            raise LiveResourceException(load_url)

        memento_dt = upstream_res.headers.get('Memento-Datetime')
        if memento_dt:
            dt = http_date_to_datetime(memento_dt)
            cdx['timestamp'] = datetime_to_timestamp(dt)
        elif cdx.get('memento_url'):
        # if 'memento_url' set and no Memento-Datetime header present
        # then its an error
            return None

        agg_type = upstream_res.headers.get('WebAgg-Type')
        if agg_type == 'warc':
            cdx['source'] = unquote(upstream_res.headers.get('WebAgg-Source-Coll'))
            return None, upstream_res.headers, upstream_res

        self.raise_on_self_redirect(params, cdx,
                                    str(upstream_res.status),
                                    upstream_res.headers.get('Location'))


        if upstream_res.version == 11:
            version = '1.1'
        else:
            version = '1.0'

        status = 'HTTP/{version} {status} {reason}\r\n'
        status = status.format(version=version,
                               status=upstream_res.status,
                               reason=upstream_res.reason)

        http_headers_buff = status

        orig_resp = upstream_res._original_response

        try:  #pragma: no cover
        #PY 3
            resp_headers = orig_resp.headers._headers
            for n, v in resp_headers:
                if n.lower() in self.SKIP_HEADERS:
                    continue

                http_headers_buff += n + ': ' + v + '\r\n'
        except:  #pragma: no cover
        #PY 2
            resp_headers = orig_resp.msg.headers
            for n, v in zip(orig_resp.getheaders(), resp_headers):
                if n in self.SKIP_HEADERS:
                    continue

                http_headers_buff += v

        http_headers_buff += '\r\n'
        http_headers_buff = http_headers_buff.encode('latin-1')

        try:
            fp = upstream_res._fp.fp
            if hasattr(fp, 'raw'):  #pragma: no cover
                fp = fp.raw
            remote_ip = fp._sock.getpeername()[0]
        except:  #pragma: no cover
            remote_ip = None

        warc_headers = {}

        warc_headers['WARC-Type'] = 'response'
        warc_headers['WARC-Record-ID'] = self._make_warc_id()
        warc_headers['WARC-Target-URI'] = cdx['url']
        warc_headers['WARC-Date'] = datetime_to_iso_date(dt)
        if remote_ip:
            warc_headers['WARC-IP-Address'] = remote_ip

        warc_headers['Content-Type'] = 'application/http; msgtype=response'

        self._set_content_len(upstream_res.headers.get('Content-Length', -1),
                              warc_headers,
                              len(http_headers_buff))

        warc_headers = StatusAndHeaders('WARC/1.0', warc_headers.items())
        return (warc_headers, http_headers_buff, upstream_res)
コード例 #36
0
ファイル: api.py プロジェクト: qq40660/ChinaAPI
 def _request_url(cls, url, params):
     pre = PreparedRequest()
     pre.prepare_url(url, params)
     return pre.url