Example #1
0
 def _get_response(self, url, data):
     req = PreparedRequest()
     req.prepare_url(url, data)
     result = requests.get(unquote(req.url)).content
     logging.warn('-getresponse')
     logging.warn(result)
     return True
Example #2
0
    def _call_api(self, method, path, params=None, data=None):
        """Execute an API request.

        Arguments:
            method (str): HTTP request type
            path (str): API request path
            data (dict): Data to send in dictionary format

        Returns:
            Response: The requests response object
        """
        url = 'https://{}/api/{}'.format(self._host, path)
        if params:
            req = PreparedRequest()
            req.prepare_url(url, params)
            url = req.url
        if self._session is None:
            self.connect()
        response = self._session.request(method=method,
                                         url=url,
                                         json=data,
                                         verify=self._verify_ssl,
                                         timeout=self._timeout)
        try:
            response.raise_for_status()
            return response
        # TODO CHECK FOR TOKEN EXPIRATION
        except Exception as exception:
            raise exception
Example #3
0
def confirm_sign_up():
    from chalicelib.main import app, USER_POOL_ID, COGNITO_CLIENT_ID

    confirmation_code = app.current_request.query_params["code"]
    username = app.current_request.query_params["username"]
    user = cognito_idp_client.admin_get_user(UserPoolId=USER_POOL_ID,
                                             Username=username)
    redirect_url = "http://chinmayamission.com"
    for attribute in user["UserAttributes"]:
        if attribute["Name"] == "website":
            redirect_url = attribute["Value"]
    try:
        # Exceptions: https://docs.aws.amazon.com/cognito-user-identity-pools/latest/APIReference/API_ConfirmSignUp.html
        cognito_idp_client.confirm_sign_up(
            ClientId=COGNITO_CLIENT_ID,
            Username=username,
            ConfirmationCode=confirmation_code,
        )
    except Exception as e:
        params = {
            "confirmSignupErrorMessage": e.response["Error"]["Message"],
            "confirmSignupErrorCode": e.response["Error"]["Code"],
        }
        # Use PreparedRequest so it preserves the existing query string in redirect_url
        req = PreparedRequest()
        req.prepare_url(redirect_url, params)
        return Response(status_code=302,
                        body="",
                        headers={"Location": req.url})

    return Response(status_code=302,
                    body="",
                    headers={"Location": redirect_url})
Example #4
0
    def rebuild_auth(self, prepared_request: PreparedRequest,
                     response: Response) -> None:
        """
        Override Session.rebuild_auth. Strips the Authorization header if neither
        original URL nor redirected URL belong to an Earthdata Login (EDL) host. Also
        allows the default requests behavior of searching for relevant .netrc
        credentials if and only if a username and password weren't provided during
        object instantiation.

        Args:
            prepared_request: Object for the redirection destination.
            response: Object for the where we just came from.
        """

        headers = prepared_request.headers
        redirect_hostname = cast(str, urlparse(prepared_request.url).hostname)
        original_hostname = cast(str, urlparse(response.request.url).hostname)

        if ('Authorization' in headers
                and (original_hostname != redirect_hostname)
                and not _is_edl_hostname(redirect_hostname)):
            del headers['Authorization']

        if self.auth is None:
            # .netrc might have more auth for us on our new host.
            new_auth = get_netrc_auth(
                prepared_request.url) if self.trust_env else None
            if new_auth is not None:
                prepared_request.prepare_auth(new_auth)

        return
def getAthleteName(name):
    name = name.replace(" ", "_")
    request = PreparedRequest()
    request.prepare_url("https://en.wikipedia.org/wiki/", name)

    url_link = "https://en.wikipedia.org/wiki/" + name
    return name, url_link
Example #6
0
    def get_redirect_url(self, *args, **kwargs):
        """
        Redirects user to relative social auth provider for logout process.
        In-case no auth provider is found or the logout_url is missing in provider's configurations
        the user is redirected to edX's default logout page '/logout'
        """
        backend_name = getattr(settings, 'COLARAZ_AUTH_PROVIDER_BACKEND_NAME',
                               None)
        if third_party_auth.is_enabled(
        ) and backend_name and self.request.session.has_key('id_token'):
            provider = [
                enabled
                for enabled in third_party_auth.provider.Registry.enabled()
                if enabled.backend_name == backend_name
            ]
            if provider:
                logout_url = json.loads(
                    getattr(provider[0], 'other_settings',
                            '{}')).get('logout_url')
                if logout_url:
                    redirect_to = self.request.META.get(
                        'HTTP_REFERER') or get_site_base_url(self.request)
                    params = {
                        'id_token_hint': self.request.session['id_token'],
                        'post_logout_redirect_uri': redirect_to
                    }
                    req = PreparedRequest()
                    req.prepare_url(logout_url, params)

                    return req.url

        return reverse('logout')
Example #7
0
def deserialize_prepared_request(serialized):
    p = PreparedRequest()
    p.body = serialized['body']
    p.headers = CaseInsensitiveDict(serialized['headers'])
    p.method = serialized['method']
    p.url = serialized['uri']
    return p
Example #8
0
def send_verify_email(user, token):
    req = PreparedRequest()
    host = request.host_url
    req.prepare(url=f'{host}verify',
                params={
                    'user': user.public_id,
                    'auth': token
                })

    if current_app.config['TESTING']:
        return req.url

    file = rel_path('../util/password-reset.html', __file__)
    with open(file) as f:
        html_template = Template(f.read())
    html = html_template.render(
        link=req.url,
        host=host,
        header='Verify Your Account',
        name=user.name_first,
        body=
        "Tap the button below to verify your account, the link expires within 30 minutes.",
        button_text="Verify Account",
    )
    email_text = MIMEText(html, 'html')

    send_email(user.email, email_text, "Verify your account")

    if current_app.config['DEBUG']:
        return req.url
Example #9
0
def scrapeYahoo(season, week, position):
    url = 'https://football.fantasysports.yahoo.com/f1/47241/players'
    drop_cols = [
        0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 17, 19, 20, 24, 25, 30, 31, 32, 33,
        34, 35, 36, 37
    ]
    col_names = [
        'name', 'pass_att', 'pass_comp', 'pass_yds', 'pass_td', 'pass_int',
        'pass_sack', 'rush_att', 'rush_yds', 'rush_td', 'rec_tgt', 'rec_rec',
        'rec_yds', 'rec_td'
    ]
    all_players = pd.DataFrame(columns=col_names)

    for count in range(0, 300, 25):
        querystring = {
            'sort': 'PTS',
            'sdir': '1',
            'status': 'A',
            'pos': '{}'.format(position),
            'stat1': 'S_PW_{}'.format(week),
            'jsenabled': '0',
            'count': '{}'.format(count)
        }
        req = PreparedRequest()
        req.prepare_url(url, querystring)
        tables = pd.read_html(req.url)
        players = tables[1]
        players.drop(players.columns[drop_cols], axis=1, inplace=True)
        players.columns = col_names
        players['name'] = players['name'].apply(name_cleanup)
        if len(players) == 0:
            break
        all_players = all_players.append(players, ignore_index=True)

    return all_players
Example #10
0
    def _call_api(self, http_func, host, path, data, headers, timeout=None):
        url = "{host}{path}".format(host=host, path=path)

        begin = time.time()

        # add extra params in url if not empty
        if self._extra_url_params:
            preReq = PreparedRequest()
            preReq.prepare_url(url, self._extra_url_params)
            url = preReq.url

        ok, message, _data = http_func(url,
                                       data,
                                       headers=headers,
                                       timeout=timeout)

        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("do http request: method=`%s`, url=`%s`, data=`%s`",
                         http_func.__name__, url, json.dumps(data))
            logger.debug(
                "http request result: ok=`%s`, message=`%s`, _data=`%s`", ok,
                message, json.dumps(_data))
            logger.debug("http request took %s ms",
                         int((time.time() - begin) * 1000))

        if not ok:
            return False, message or "verify from iam server fail", None

        if _data.get("code") != 0:
            return False, _data.get("message") or "iam api fail", None

        _d = _data.get("data")

        return True, "ok", _d
Example #11
0
def test_prepared_copy(kwargs):
    p = PreparedRequest()
    if kwargs:
        p.prepare(**kwargs)
    copy = p.copy()
    for attr in ('method', 'url', 'headers', '_cookies', 'body', 'hooks'):
        assert getattr(p, attr) == getattr(copy, attr)
Example #12
0
    def f(
        method="GET",
        url="http://mockapi.com",
        headers=None,
        files=None,
        data=None,
        params=None,
        auth=None,
        cookies=None,
        hooks=None,
        json=None,
    ) -> PreparedRequest:
        request = PreparedRequest()
        request.prepare(
            method=method,
            url=url,
            headers=headers,
            files=files,
            data=data,
            params=params,
            auth=auth,
            cookies=cookies,
            hooks=hooks,
            json=json,
        )

        return request
Example #13
0
def test_prepared_copy(kwargs):
    p = PreparedRequest()
    if kwargs:
        p.prepare(**kwargs)
    copy = p.copy()
    for attr in ('method', 'url', 'headers', '_cookies', 'body', 'hooks'):
        assert getattr(p, attr) == getattr(copy, attr)
Example #14
0
def test_prepared_request_no_cookies_copy():
    p = PreparedRequest()
    p.prepare(method='GET',
              url='http://www.example.com',
              data='foo=bar',
              hooks=default_hooks())
    assert_copy(p, p.copy())
Example #15
0
def is_valid_url(url):
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(url, None)
        return True
    except Exception as e:
        return False
Example #16
0
def _search(query: str):
    url = "https://www.googleapis.com/youtube/v3/search"
    results = []

    params = {'key': token, 'part': 'snippet', 'q': query, 'maxResults': 15}
    req = PreparedRequest()
    req.prepare_url(url, params)

    json = requests.get(req.url).json()
    ids = []

    for item in json['items']:
        videoId = item['id'].get('videoId')
        playlistId = item['id'].get('playlistId')
        if (not videoId and not playlistId): continue
        results.append({
            'id': videoId if videoId != None else playlistId,
            'title': html.unescape(item['snippet']['title']),
            'isPlaylist': playlistId != None
        })
        if (videoId):
            ids.append(videoId)

    durations = get_video_durations(ids)

    for i in range(len(results)):
        item = results[i]
        if (not item['isPlaylist']):
            item['duration'] = durations[item['id']]

    return results
Example #17
0
def check_url(url):
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(url, None)
        return True
    except requests.exceptions.MissingSchema:
        return False
Example #18
0
def check_url(url):
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(url, None)
        return True
    except requests.exceptions.MissingSchema:
        return False
Example #19
0
def search():
    meta = get_meta()
    filters = get_filters()

    # get counts
    count_total = query(count_all=True)
    count_filtered = query(filters=filters, count_filtered=True)

    # build response
    results = query(meta=meta, filters=filters)

    headers = {
        "Query-Count-Total": str(count_total),
        "Query-Count-Filtered": str(count_filtered),
        "Cache-Control": "max-age=259200",
        "Access-Control-Expose-Headers": "Query-Count-Total, Query-Count-Filtered, Query-Next"
    }

    if count_filtered > meta["page"] * meta["limit"]:
        next_url_params = {
            'page': str(meta["page"] + 1),
            'page_size': str(meta["limit"]),
            'order_by': meta["order_by"],
            'order_by_direction': meta["order_by_direction"]
        }
        next_url_params.update(filters)
        next_request = PreparedRequest()
        next_request.prepare_url(request.base_url, next_url_params)
        headers['Query-Next'] = next_request.url

    return Response(result_generator(results), headers=[(k, v) for k, v in headers.iteritems()],
                    mimetype='text/csv')
Example #20
0
def send_reset_email(user, token):
    # user = user_service.get_a_user(public_id)
    req = PreparedRequest()
    host = request.host_url
    req.prepare(url=f'{host}password_reset',
                params={
                    'user': user.public_id,
                    'auth': token
                })

    if current_app.config['TESTING']:
        return req.url

    file = rel_path('../util/password-reset.html', __file__)
    with open(file) as f:
        html_template = Template(f.read())
    html = html_template.render(
        link=req.url,
        host=host,
        header='Reset Your Password',
        name=user.name_first,
        body="Tap the button below to reset your password. "
        "If you didn't request a new password, you can safely delete this email.",
        button_text="Reset Password",
    )
    email_text = MIMEText(html, 'html')

    send_email(user.email, email_text, "Reset your password")

    if current_app.config['DEBUG']:
        return req.url
def apipara(a):
    req = PreparedRequest()
    url = "http://integra-net4/BMJ_ICE_Test/assignjob"
    params = {'ID': a}
    req.prepare_url(url, params)
    content = urllib.request.urlopen(req.url).read()
    data = json.loads(content)
    return data['success'], data['msg']
 def test_no_keep_alive_by_default(self):
     p = PreparedRequest()
     p.prepare(
         method='GET',
         url='http://www.example.com',
         hooks=default_hooks()
     )
     assert 'Connection' not in p.headers
Example #23
0
    def prepare(self,
            method=None, url=None, headers=None, files=None, data=None,
            params=None, auth=None, cookies=None, hooks=None, json=None):
        """Prepares the entire request with the given parameters."""

        _PreparedRequest.prepare(self, method, url, headers, files, data,
            params, auth, cookies, hooks, json)
        self.adapt_prepare()
Example #24
0
    def addParamsToUrl(self, url: str, params: dict) -> str:
        """
        Belirtilen parametreleri adrese ekler.
        """

        req = PreparedRequest()
        req.prepare_url(url, params)
        return req.url
Example #25
0
def append_params_to_url(DO_WE_ADD_PARAMS_REUPLOAD, url): ## Used for appending random strings as query parameters to URLS in the reposting module. This gives a unique variation of the URL.
    if DO_WE_ADD_PARAMS_REUPLOAD:
        params = {random_char(5):random_char(5)}
        req = PreparedRequest()
        req.prepare_url(url, params)
        return req.url
    else:
        return url
Example #26
0
    def get_screenshot(
        self,
        url: str,
        element_name: str,
        user: "******",
    ) -> Optional[bytes]:
        params = {"standalone": DashboardStandaloneMode.REPORT.value}
        req = PreparedRequest()
        req.prepare_url(url, params)
        url = req.url or ""

        driver = self.auth(user)
        driver.set_window_size(*self._window)
        driver.get(url)
        img: Optional[bytes] = None
        selenium_headstart = current_app.config[
            "SCREENSHOT_SELENIUM_HEADSTART"]
        logger.debug("Sleeping for %i seconds", selenium_headstart)
        sleep(selenium_headstart)

        try:
            logger.debug("Wait for the presence of %s", element_name)
            element = WebDriverWait(driver,
                                    self._screenshot_locate_wait).until(
                                        EC.presence_of_element_located(
                                            (By.CLASS_NAME, element_name)))
            logger.debug("Wait for .loading to be done")
            WebDriverWait(driver, self._screenshot_load_wait).until_not(
                EC.presence_of_all_elements_located(
                    (By.CLASS_NAME, "loading")))
            logger.debug("Wait for chart to have content")
            WebDriverWait(driver, self._screenshot_locate_wait).until(
                EC.visibility_of_all_elements_located(
                    (By.CLASS_NAME, "slice_container")))
            selenium_animation_wait = current_app.config[
                "SCREENSHOT_SELENIUM_ANIMATION_WAIT"]
            logger.debug("Wait %i seconds for chart animation",
                         selenium_animation_wait)
            sleep(selenium_animation_wait)
            logger.info("Taking a PNG screenshot of url %s", url)
            img = element.screenshot_as_png
        except TimeoutException:
            logger.warning("Selenium timed out requesting url %s",
                           url,
                           exc_info=True)
            img = element.screenshot_as_png
        except StaleElementReferenceException:
            logger.error(
                "Selenium got a stale element while requesting url %s",
                url,
                exc_info=True,
            )
        except WebDriverException as ex:
            logger.error(ex, exc_info=True)
        finally:
            self.destroy(driver,
                         current_app.config["SCREENSHOT_SELENIUM_RETRIES"])
        return img
Example #27
0
def test_data_argument_accepts_tuples(list_of_tuples):
    """
    Ensure that the data argument will accept tuples of strings
    and properly encode them.
    """
    for data in list_of_tuples:
        p = PreparedRequest()
        p.prepare(method="GET", url="http://www.example.com", data=data, hooks=default_hooks())
        assert p.body == urlencode(data)
def test_prepared_request_no_cookies_copy():
    p = PreparedRequest()
    p.prepare(
        method='GET',
        url='http://www.example.com',
        data='foo=bar',
        hooks=default_hooks()
    )
    assert_copy(p, p.copy())
Example #29
0
def apipara(a):
    req = PreparedRequest()
    url = "any url"
    params = {'ID':a}
    req.prepare_url(url, params)
    content = urllib.request.urlopen(req.url).read()
    data = json.loads(content)
    print(data['success'])#I Extract json content
    print(data['msg'])#I Extract json content
Example #30
0
def add_params_to_url(url, params):
    out_url = ""
    try:
        req = PreparedRequest()
        req.prepare_url(url, params)
        out_url = req.url
    except Exception as e:
        print(f"\n Exception occured: {e}!! \n URL: {url} \n PARAMS: {params}")
    return out_url
Example #31
0
def check_url(url):
    prepared_request = PreparedRequest(
    )  # create an instance of the PreparedRequest class of the requests library
    try:
        prepared_request.prepare_url(url,
                                     None)  # checking for the correct of url
        return True
    except:
        return False
Example #32
0
def validate_url(text: str):
    """
	Validates a URL.
	"""
    prepared_request = PreparedRequest()
    try:
        prepared_request.prepare_url(text, None)
        return True
    except MissingSchema:
        return False
Example #33
0
def session_check(session_id):
    if 'uuid' in session and session['uuid'] == session_id:
        session['valid'] = True
        return redirect(request.args.get('follow'), code=307)
    else:
        follow_url = request.args.get('follow')
        req = PreparedRequest()
        req.prepare_url(follow_url, {'cookies_disabled': 1})
        session.pop('_permanent', None)
        return redirect(req.url, code=307)
def test_data_argument_accepts_tuples(data):
    """Ensure that the data argument will accept tuples of strings
    and properly encode them.
    """
    p = PreparedRequest()
    p.prepare(method='GET',
              url='http://www.example.com',
              data=data,
              hooks=default_hooks())
    assert p.body == urlencode(data)
Example #35
0
    def __init__(self):
        _PreparedRequest.__init__(self)
        self.headers = default_headers()
        self.ssl_options = None

        self.host = None
        self.port = None
        self.af = None
        self.decompress = None
        self.start_line = None
Example #36
0
    def get_order_requirements(
        did: str,
        service_endpoint: str,
        consumer_address: str,
        service_id: Union[str, int],
        service_type: str,
        token_address: str,
        userdata: Optional[Dict] = None,
    ) -> Optional[OrderRequirements]:
        """

        :param did:
        :param service_endpoint:
        :param consumer_address: hex str the ethereum account address of the consumer
        :param service_id:
        :param service_type:
        :param token_address:
        :return: OrderRequirements instance -- named tuple (amount, data_token_address, receiver_address, nonce),

        """

        req = PreparedRequest()
        params = {
            "documentId": did,
            "serviceId": service_id,
            "serviceType": service_type,
            "dataToken": token_address,
            "consumerAddress": consumer_address,
        }

        if userdata:
            userdata = json.dumps(userdata)
            params["userdata"] = userdata

        req.prepare_url(service_endpoint, params)
        initialize_url = req.url

        logger.info(f"invoke the initialize endpoint with this url: {initialize_url}")
        response = DataServiceProvider._http_method("get", initialize_url)
        # The returned json should contain information about the required number of tokens
        # to consume `service_id`. If service is not available there will be an error or
        # the returned json is empty.
        if response.status_code != 200:
            return None
        order = dict(response.json())

        return OrderRequirements(
            to_wei(
                Decimal(order["numTokens"])
            ),  # comes as float, needs to be converted
            order["dataToken"],
            order["to"],
            int(order["nonce"]),
            order.get("computeAddress"),
        )
Example #37
0
def test_data_argument_accepts_tuples(data):
    """Ensure that the data argument will accept tuples of strings
    and properly encode them.
    """
    p = PreparedRequest()
    p.prepare(
        method='GET',
        url='http://www.example.com',
        data=data,
        hooks=default_hooks()
    )
    assert p.body == urlencode(data)
Example #38
0
    def oauth_access_token_url(self):
        """Generate the OAuth access token url."""

        url = self._access_token_url.format(base_url=self.base_url)

        params = [
            ('client_id', self.credentials.api_key),
            ('client_secret', self.credentials.secret),
            ('code', self.credentials.code)
        ]

        parser = PreparedRequest()
        parser.prepare_url(url=url, params=params)

        return parser.url
Example #39
0
    def _format_args(self):
        """将params/data的数据拼接在url里 """
        if not self.record_params:
            return ''

        args_text = ""
        try:
            sign = "&" if '?' in self.url else "?"
            if self.params:
                args_text = sign + PreparedRequest._encode_params(self.params)
            elif self.data:
                args_text = sign + PreparedRequest._encode_params(self.data)
        except:
            pass

        return args_text
Example #40
0
    def oauth_authorize_url(self, redirect_to=None):
        """Generates the oauth authorize url.

        :param redirect_to: URL shopify will redirect to once authorized.
        """

        url = self._authorize_url.format(base_url=self.base_url)

        params = [
            ('client_id', self.credentials.api_key),
            ('scope', ",".join(self.credentials.scope)),
            ('redirect_uri', redirect_to)
        ]

        request = PreparedRequest()
        request.prepare_url(url=url, params=params)

        return request.url
Example #41
0
    def prepare_request(self, request):
        """Constructs a :class:`PreparedRequest <PreparedRequest>` for
        transmission and returns it. The :class:`PreparedRequest` has settings
        merged from the :class:`Request <Request>` instance and those of the
        :class:`Session`.

        :param request: :class:`Request` instance to prepare with this
                        session's settings.
        """
        cookies = request.cookies or {}

        # Bootstrap CookieJar.
        if not isinstance(cookies, cookielib.CookieJar):
            cookies = cookiejar_from_dict(cookies)

        # Merge with session cookies
        merged_cookies = RequestsCookieJar()
        merged_cookies.update(self.cookies)
        merged_cookies.update(cookies)


        # Set environment's basic authentication if not explicitly set.
        auth = request.auth
        if self.trust_env and not auth and not self.auth:
            auth = get_netrc_auth(request.url)

        p = PreparedRequest()
        p.prepare(
            method=request.method.upper(),
            url=request.url,
            files=request.files,
            data=request.data,
            json=request.json,
            headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict),
            params=merge_setting(request.params, self.params),
            auth=merge_setting(auth, self.auth),
            cookies=merged_cookies,
            hooks=merge_hooks(request.hooks, self.hooks),
        )
        return p
Example #42
0
def copy_request(request):
    """Copy a Requests PreparedRequest."""
    new_request = PreparedRequest()

    new_request.method = request.method
    new_request.url = request.url
    new_request.body = request.body
    new_request.hooks = request.hooks
    new_request.headers = request.headers.copy()

    return new_request
Example #43
0
File: util.py Project: bboe/betamax
def deserialize_prepared_request(serialized):
    p = PreparedRequest()
    p._cookies = RequestsCookieJar()
    body = serialized['body']
    if isinstance(body, dict):
        original_body = body.get('string')
        p.body = original_body or base64.b64decode(
            body.get('base64_string', '').encode())
    else:
        p.body = body
    h = [(k, from_list(v)) for k, v in serialized['headers'].items()]
    p.headers = CaseInsensitiveDict(h)
    p.method = serialized['method']
    p.url = serialized['uri']
    return p
Example #44
0
    def load_resource(self, cdx, params):
        load_url = cdx.get('load_url')
        if not load_url:
            return None

        if params.get('content_type') == VideoLoader.CONTENT_TYPE:
            return None

        if self.forward_proxy_prefix and not cdx.get('is_live'):
            load_url = self.forward_proxy_prefix + load_url

        input_req = params['_input_req']

        req_headers = input_req.get_req_headers()

        dt = timestamp_to_datetime(cdx['timestamp'])

        if cdx.get('memento_url'):
            req_headers['Accept-Datetime'] = datetime_to_http_date(dt)

        method = input_req.get_req_method()
        data = input_req.get_req_body()

        p = PreparedRequest()
        try:
            p.prepare_url(load_url, None)
        except:
            raise LiveResourceException(load_url)
        p.prepare_headers(None)
        p.prepare_auth(None, load_url)

        auth = p.headers.get('Authorization')
        if auth:
            req_headers['Authorization'] = auth

        load_url = p.url

        # host is set to the actual host for live loading
        # ensure it is set to the load_url host
        if not cdx.get('is_live'):
            #req_headers.pop('Host', '')
            req_headers['Host'] = urlsplit(p.url).netloc

            referrer = cdx.get('set_referrer')
            if referrer:
                req_headers['Referer'] = referrer

        upstream_res = self._do_request_with_redir_check(method, load_url,
                                                         data, req_headers,
                                                         params, cdx)

        memento_dt = upstream_res.headers.get('Memento-Datetime')
        if memento_dt:
            dt = http_date_to_datetime(memento_dt)
            cdx['timestamp'] = datetime_to_timestamp(dt)
        elif cdx.get('memento_url'):
        # if 'memento_url' set and no Memento-Datetime header present
        # then its an error
            return None

        agg_type = upstream_res.headers.get('Warcserver-Type')
        if agg_type == 'warc':
            cdx['source'] = unquote(upstream_res.headers.get('Warcserver-Source-Coll'))
            return None, upstream_res.headers, upstream_res

        if upstream_res.version == 11:
            version = '1.1'
        else:
            version = '1.0'

        status = 'HTTP/{version} {status} {reason}\r\n'
        status = status.format(version=version,
                               status=upstream_res.status,
                               reason=upstream_res.reason)

        http_headers_buff = status

        orig_resp = upstream_res._original_response

        try:  #pragma: no cover
        #PY 3
            resp_headers = orig_resp.headers._headers
            for n, v in resp_headers:
                nl = n.lower()
                if nl in self.SKIP_HEADERS:
                    continue

                if nl in self.UNREWRITE_HEADERS:
                    v = self.unrewrite_header(cdx, v)

                http_headers_buff += n + ': ' + v + '\r\n'

            http_headers_buff += '\r\n'

            try:
                # http headers could be encoded as utf-8 (though non-standard)
                # first try utf-8 encoding
                http_headers_buff = http_headers_buff.encode('utf-8')
            except:
                # then, fall back to latin-1
                http_headers_buff = http_headers_buff.encode('latin-1')

        except:  #pragma: no cover
        #PY 2
            resp_headers = orig_resp.msg.headers

            for line in resp_headers:
                n, v = line.split(':', 1)
                n = n.lower()
                v = v.strip()

                if n in self.SKIP_HEADERS:
                    continue

                new_v = v
                if n in self.UNREWRITE_HEADERS:
                    new_v = self.unrewrite_header(cdx, v)

                if new_v != v:
                    http_headers_buff += n + ': ' + new_v + '\r\n'
                else:
                    http_headers_buff += line

            # if python2, already byte headers, so leave as is
            http_headers_buff += '\r\n'

        try:
            fp = upstream_res._fp.fp
            if hasattr(fp, 'raw'):  #pragma: no cover
                fp = fp.raw
            remote_ip = fp._sock.getpeername()[0]
        except:  #pragma: no cover
            remote_ip = None

        warc_headers = {}

        warc_headers['WARC-Type'] = 'response'
        warc_headers['WARC-Record-ID'] = self._make_warc_id()
        warc_headers['WARC-Target-URI'] = cdx['url']
        warc_headers['WARC-Date'] = datetime_to_iso_date(dt)

        if not cdx.get('is_live'):
            now = datetime.datetime.utcnow()
            warc_headers['WARC-Source-URI'] = cdx.get('load_url')
            warc_headers['WARC-Creation-Date'] = datetime_to_iso_date(now)

        if remote_ip:
            warc_headers['WARC-IP-Address'] = remote_ip

        ct = upstream_res.headers.get('Content-Type')
        if ct:
            metadata = self.get_custom_metadata(ct, dt)
            if metadata:
                warc_headers['WARC-JSON-Metadata'] = json.dumps(metadata)

        warc_headers['Content-Type'] = 'application/http; msgtype=response'

        if method == 'HEAD':
            content_len = 0
        else:
            content_len = upstream_res.headers.get('Content-Length', -1)

        self._set_content_len(content_len,
                              warc_headers,
                              len(http_headers_buff))

        warc_headers = StatusAndHeaders('WARC/1.0', warc_headers.items())
        return (warc_headers, http_headers_buff, upstream_res)
Example #45
0
def test_prepared_request_complete_copy():
    p = PreparedRequest()
    p.prepare(method="GET", url="http://www.example.com", data="foo=bar", hooks=default_hooks(), cookies={"foo": "bar"})
    assert_copy(p, p.copy())
Example #46
0
def test_prepared_request_empty_copy():
    p = PreparedRequest()
    assert_copy(p, p.copy())
Example #47
0
 def _request_url(cls, url, params):
     pre = PreparedRequest()
     pre.prepare_url(url, params)
     return pre.url
    def load_resource(self, cdx, params):
        load_url = cdx.get('load_url')
        if not load_url:
            return None

        if params.get('content_type') == VideoLoader.CONTENT_TYPE:
            return None

        input_req = params['_input_req']

        req_headers = input_req.get_req_headers()

        dt = timestamp_to_datetime(cdx['timestamp'])

        if cdx.get('memento_url'):
            req_headers['Accept-Datetime'] = datetime_to_http_date(dt)

        method = input_req.get_req_method()
        data = input_req.get_req_body()

        p = PreparedRequest()
        p.prepare_url(load_url, None)
        p.prepare_headers(None)
        p.prepare_auth(None, load_url)

        auth = p.headers.get('Authorization')
        if auth:
            req_headers['Authorization'] = auth

        load_url = p.url

        try:
            upstream_res = self.pool.urlopen(method=method,
                                             url=load_url,
                                             body=data,
                                             headers=req_headers,
                                             redirect=False,
                                             assert_same_host=False,
                                             preload_content=False,
                                             decode_content=False,
                                             retries=self.num_retries,
                                             timeout=params.get('_timeout'))

        except Exception as e:
            raise LiveResourceException(load_url)

        memento_dt = upstream_res.headers.get('Memento-Datetime')
        if memento_dt:
            dt = http_date_to_datetime(memento_dt)
            cdx['timestamp'] = datetime_to_timestamp(dt)
        elif cdx.get('memento_url'):
        # if 'memento_url' set and no Memento-Datetime header present
        # then its an error
            return None

        agg_type = upstream_res.headers.get('WebAgg-Type')
        if agg_type == 'warc':
            cdx['source'] = unquote(upstream_res.headers.get('WebAgg-Source-Coll'))
            return None, upstream_res.headers, upstream_res

        self.raise_on_self_redirect(params, cdx,
                                    str(upstream_res.status),
                                    upstream_res.headers.get('Location'))


        if upstream_res.version == 11:
            version = '1.1'
        else:
            version = '1.0'

        status = 'HTTP/{version} {status} {reason}\r\n'
        status = status.format(version=version,
                               status=upstream_res.status,
                               reason=upstream_res.reason)

        http_headers_buff = status

        orig_resp = upstream_res._original_response

        try:  #pragma: no cover
        #PY 3
            resp_headers = orig_resp.headers._headers
            for n, v in resp_headers:
                if n.lower() in self.SKIP_HEADERS:
                    continue

                http_headers_buff += n + ': ' + v + '\r\n'
        except:  #pragma: no cover
        #PY 2
            resp_headers = orig_resp.msg.headers
            for n, v in zip(orig_resp.getheaders(), resp_headers):
                if n in self.SKIP_HEADERS:
                    continue

                http_headers_buff += v

        http_headers_buff += '\r\n'
        http_headers_buff = http_headers_buff.encode('latin-1')

        try:
            fp = upstream_res._fp.fp
            if hasattr(fp, 'raw'):  #pragma: no cover
                fp = fp.raw
            remote_ip = fp._sock.getpeername()[0]
        except:  #pragma: no cover
            remote_ip = None

        warc_headers = {}

        warc_headers['WARC-Type'] = 'response'
        warc_headers['WARC-Record-ID'] = self._make_warc_id()
        warc_headers['WARC-Target-URI'] = cdx['url']
        warc_headers['WARC-Date'] = datetime_to_iso_date(dt)
        if remote_ip:
            warc_headers['WARC-IP-Address'] = remote_ip

        warc_headers['Content-Type'] = 'application/http; msgtype=response'

        self._set_content_len(upstream_res.headers.get('Content-Length', -1),
                              warc_headers,
                              len(http_headers_buff))

        warc_headers = StatusAndHeaders('WARC/1.0', warc_headers.items())
        return (warc_headers, http_headers_buff, upstream_res)