def do_open(self, http_class, request):
        """Return the response object for the given request.

        Overrides the HTTPHandler method of the same name to return a
        FakeResponse instead of creating any network connections.

        Args:
            http_class: The http protocol being used.
            request: A urllib2.Request object.
        Returns:
            A FakeResponse object.
        """
        self.__class__.request = request  # Store the most recent request object
        if self._response_map:
            key = create_response_key(
                request.get_method(), request.get_full_url(), request.headers)
            if str(key) in self._response_map:
                (code, body, response_headers) = self._response_map[str(key)]
                return FakeResponse(code, body, response_headers)
            else:
                raise Error('Unknown request %s %s'
                            '\nrequest:%s\nresponse_map:%s' % (
                            request.get_method(), request.get_full_url(),
                            str(key), pformat(iter(self._response_map.keys()))))
        elif isinstance(self._response, Exception):
            raise self
        else:
            return self._response
def glsrequest(uri, method, data=None):
    '''
    Returns xml node tree as Element instance.
    
    'uri' may be absolute or relative to _BASEURI.
    'method' in ('GET', 'POST', 'PUT')
    'data' can be a string or Element instance
    '''
    if method not in {'GET', 'POST', 'PUT'}:
        raise GlslibException(MSGUNSUPPORTEDMETHOD % method)
    if not uri.startswith(_BASEURI):
        uri = _BASEURI.rstrip('/') + '/' + uri.lstrip('/')
    request = urllib.request.Request(uri)
    request.add_header("Authorization", "Basic %s" % _AUTHSTR)
    if etree.iselement(data):
        # tostring generates bytestring (as required for data)
        data = etree.tostring(data)
        request.add_header('Content-Type', 'application/xml')
    request.add_data(data)
    request.get_method = lambda: method
    msg = '%s %s\n%s\n%s' % (request.get_method(), 
                             request.get_full_url(),
                             request.headers, 
                             data.decode('utf-8') if data else '')
    logger.debug(msg)
    try:
        r = urllib.request.urlopen(request)
        return etree.XML(r.read())
    except urllib.error.HTTPError as httperr:
        logger.error(httperr.read())
        raise
    except urllib.error.URLError as urlerr:
        logger.error(request.get_full_url())
        raise
Example #3
0
 def default_open(self,request):
     if ((request.get_method() == "GET") and 
         (CachedResponse.ExistsInCache(self.cacheLocation, request.get_full_url()))):
         # print "CacheHandler: Returning CACHED response for %s" % request.get_full_url()
         return CachedResponse(self.cacheLocation, request.get_full_url(), setCacheHeader=True)
     else:
         return urllib.request.urlopen(request.get_full_url())
    def _send_request(self, url, data=None):
        """Make the rpc. Creates a request object from a base url contatenated
        with the additional url information provided by the argument. Adds
        headers, such as content type, length, ..., as well as the OAuth2.0
        header. For PUT calls, converts the data object to JSON and encodes.

        Arguments:
        url (str): Specific url details to add to the base url for the request.
        data (dict): Dictionary with details required for the request.
        """
        base_url = "https://api.tdameritrade.com/v1/"
        url = base_url + url
        request = urllib.request.Request(url)
        request.add_header("Authorization",
                           "Bearer {}".format(self.oauth_hash).encode("utf-8"))
        if data is None:
            self._logger.info("URL: %s", request.get_full_url())
            self._logger.debug("headers: %s", request.headers)
            response = urllib.request.urlopen(request)
            self.message = json.loads(response.read().decode("utf-8"))
        else:
            request.add_header("""Content-Type", "application/json;\
                                  charset=utf-8""")
            data = json.dumps(data).encode("utf-8")
            request.add_header("Content-Length", len(data))
            self._logger.info("URL: %s", request.get_full_url())
            self._logger.debug("headers: %s", request.headers)
            self._logger.debug("data: %s", data)
            response = urllib.request.urlopen(request, data=data)
        status = response.getcode()
        if (status == 200 or status == 201):
            self._logger.info("response: %s", self.message)
        else:
            self._logger.error("response: %s", self.message)
Example #5
0
 def http_response(self, request, response):
     if request.get_method() == "GET":
         if 'd-cache' not in response.info():
             CachedResponse.StoreInCache(self.cacheLocation, request.get_full_url(), response)
             return CachedResponse(self.cacheLocation, request.get_full_url(), setCacheHeader=False)
         else:
             return CachedResponse(self.cacheLocation, request.get_full_url(), setCacheHeader=True)
     else:
         return response
Example #6
0
 def default_open(self, request):
     '''Respond to the request by first checking if there is a cached response otherwise defer to http handler'''
     if ((request.get_method() == "GET") and (CachedResponse.ExistsInCache(
             self.cacheLocation, request.get_full_url()))):
         # print "CacheHandler: Returning CACHED response for %s" % request.get_full_url()
         return CachedResponse(self.cacheLocation,
                               request.get_full_url(),
                               setCacheHeader=True)
     else:
         return None  # let the next handler try to handle the request
Example #7
0
 def queryApi(urlSuffix, isPretty = False):
     prettyPrintParam = '?pretty=true' if isPretty else ''
     request = urllib.request.Request(OpenShiftQuery.API_URL + urlSuffix + prettyPrintParam,
         headers = {'Authorization': 'Bearer ' + OpenShiftQuery.getToken(), 'Accept': 'application/json'})
     logger.debug('query for: "%s"', request.get_full_url())
     try:
         return urllib.request.urlopen(request, cafile = OpenShiftQuery.CERT_FILE_PATH).read()
     except:
         logger.critical('Cannot query OpenShift API for "%s"', request.get_full_url())
         raise
    def oauth(self, step):
        if step == 1:
            import webbrowser
            authorize_url = "https://auth.tdameritrade.com/auth?response_type=code&redirect_uri=http%3a%2f%2flocalhost%3a8080&client_id=sware%40AMER.OAUTHAP"
            webbrowser.open(authorize_url, new=1, autoraise=True)
            # copy resulting string from after code=. url decode. let that become self._code
            print("type what is after 'code=' in the URI:")
            code = urllib.parse.unquote(input())

            self._code = code
            step = 2
        if step == 2:
            step2url = "https://api.tdameritrade.com/v1/oauth2/token?"
            step2args = {
                'grant_type': 'authorization_code',
                'refresh_token': '',
                'access_type': 'offline',
                'code': self._code,
                'client_id': self._config['tda_api_key'],
                'redirect_uri': 'http://localhost:8080'
            }
            args = urllib.parse.urlencode(step2args).encode("utf-8")

            headers = {"Content-Type": 'application/x-www-form-urlencoded'}

            request = urllib.request.Request(step2url,
                                             data=args,
                                             headers=headers,
                                             method='POST')

            pprint(request.get_full_url())
            pprint(request.headers)

            try:
                response = urllib.request.urlopen(request)
            except urllib.error.HTTPError as e:
                print("v" * 20)
                print("Error occurred fetching {0}".format(
                    request.get_full_url()))
                print(e)
                print("^" * 20)
            else:
                html = response.read()
                d = json.loads(html)
                pprint(d)

                # TODO: abstract config saving out. This is duplicate code from self.refreshToken()
                # configTDA = configparser.ConfigParser()
                configTDA.read('configTDA.ini')
                if not 'OAUTH' in configTDA.sections():
                    configTDA.add_section('OAUTH')
                configTDA.set('OAUTH', 'refresh_token', d['refresh_token'])
                configTDA.set('OAUTH', 'access_token', d['access_token'])
                with open('configTDA.ini', 'w') as configfile:
                    configTDA.write(configfile)
Example #9
0
    def http_request(self, request):
        host, full_url = request.host, request.get_full_url()
        url_path = full_url[full_url.find(host) + len(host):]
        log_url(self.log, "Requesting: ", request.get_full_url(), TRACE_LEVEL)
        self.log.log(self.log_level,
                     "%s %s" % (request.get_method(), url_path))

        for header in request.header_items():
            self.log.log(self.log_level, " . %s: %s" % header[:])

        return request
Example #10
0
def URLOpen(request):
    if sys.version_info.major > 2:
        try:
            return urllib.request.urlopen(request)
        except:
            print('Could not open URL: {0}'.format(request.get_full_url()))
            return None
    else:
        try:
            return urllib2.urlopen(request)
        except:
            print('Could not open URL: {0}'.format(request.get_full_url()))
            return None
Example #11
0
    def http_request(self, request):
        if __debug__:
            host, full_url = request.get_host(), request.get_full_url()
            url_path = full_url[full_url.find(host) + len(host):]
            self.httpout.write("%s\n" % request.get_full_url())
            self.httpout.write("\n")
            self.httpout.write("%s %s\n" % (request.get_method(), url_path))

            for header in request.header_items():
                self.httpout.write("%s: %s\n" % header[:])

            self.httpout.write("\n")

        return request
Example #12
0
 def __getRepositoryItems__(self, item, state='all', page='0', direction='desc'):
     if not self.repository:
         raise Exception("Github", "No repository set")
     request = self.__getRequest__("repos/" + self.repository + "/" + item + "?" +
                                   "page=" + page +
                                   ("&state=" + state if state else "") +
                                   ("&direction=" + direction if direction else ""))
     key = request.get_method() + ":" + request.get_full_url()
     if not key in self.__data_caching__:
         response = urllib.request.urlopen(request)
         self.__data_caching__[request.get_method() + ":" + request.get_full_url()] = {
             "response": response,
             "content": json.loads(response.read().decode("utf-8"))
         }
     return self.__data_caching__[key]["response"], self.__data_caching__[key]["content"]
Example #13
0
    def default_open(self, request):
        """Handles GET requests, if the response is cached it returns it
        """
        if request.get_method() is not "GET":
            return None # let the next handler try to handle the request

        if exists_in_cache(
            self.cache_location, request.get_full_url(), self.max_age
        ):
            return CachedResponse(
                self.cache_location,
                request.get_full_url(),
                set_cache_header = True
            )
        else:
            return None
Example #14
0
def load_baidu():
    url = 'http://www.baidu.com'
    #添加请求头信息
    headers = {
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
        # 'user-name':'Justin'
    }

    #创建请求对象
    request = urllib.request.Request(url, headers=headers)
    #动态添加请求头信息
    request.add_header('user-name', 'Justin')
    #请求网络数据
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("utf-8")
    # print(data)

    #响应头
    # print(response.headers)
    #获取请求头信息
    request_headers = request.headers
    print(request_headers)

    # 获取完整的url
    full_url = request.get_full_url()
    print(full_url)
Example #15
0
 def _send_data(self, request, data, filename, content_type, schema,
                urlencoded):
     self._logger.info(request.get_full_url())
     f = None
     if content_type and urlencoded:
         msg = 'content_type and urlencoded are mutually exclusive'
         raise ValueError(msg)
     if content_type:
         request.add_header('Content-type', content_type)
     elif urlencoded:
         request.add_header('Content-type',
                            'application/x-www-form-urlencoded')
     else:
         request.add_header('Content-type', 'application/octet-stream')
     try:
         if filename:
             f = self._send_file(request, filename, urlencoded)
         else:
             if urlencoded:
                 data = urllib.parse.quote_plus(data)
             f = urllib.request.urlopen(request, data)
     except urllib.error.HTTPError as e:
         raise Urllib2HTTPError(e)
     f = self._new_response(f)
     self._validate_response(f, schema)
     return f
Example #16
0
 def test_instagram_oembed_return_values(self, urlopen):
     urlopen.return_value = self.dummy_response
     result = InstagramOEmbedFinder(
         app_id='123',
         app_secret='abc').find_embed("https://instagr.am/p/CHeRxmnDSYe/")
     self.assertEqual(
         result, {
             'type': 'something',
             'title': 'test_title',
             'author_name': 'test_author',
             'provider_name': 'Instagram',
             'thumbnail_url': 'test_thumbail_url',
             'width': 'test_width',
             'height': 'test_height',
             'html':
             '<blockquote class="instagram-media">Content</blockquote>'
         })
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     self.assertEqual(
         request.get_full_url(),
         "https://graph.facebook.com/v9.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json"
     )
     self.assertEqual(request.get_header('Authorization'), "Bearer 123|abc")
Example #17
0
def login():
    url = 'http://www.qiushibaike.com/session.js'
    postData = {
        'login': '******',
        'password': '******',
        'remember_me': 'checked',
        'duration': '-1'
    }
    cj = http.cookiejar.LWPCookieJar()
    cookie_support = urllib.request.HTTPCookieProcessor(cj)
    opener = urllib.request.build_opener(cookie_support,
                                         urllib.request.HTTPHandler)
    urllib.request.install_opener(opener)
    postDataStr = urllib.parse.urlencode(postData)
    headers = HttpHeaders.headers
    headers['Accept'] = 'application/json, text/javascript, */*; q=0.01'
    headers['Referer'] = 'http://www.qiushibaike.com/'
    headers['Origin'] = 'http://www.qiushibaike.com'
    headers['Host'] = 'www.qiushibaike.com'
    request = urllib.request.Request(url=url,
                                     data=postDataStr.encode("utf-8"),
                                     headers=headers,
                                     method='POST')
    print(request.get_full_url())
    try:
        response = urllib.request.urlopen(request)
    except urllib.error.HTTPError as e:
        print(e.reason, e.code, e.msg)
        print('出现异常,%s已停止运行' % MASTER_NAME)
        return

    text = response.read()
    print(text)
    print(MASTER_NAME + "登录了糗百网页")
def fetchAndParse(request):
    response = urllib.request.urlopen(request)   
    data = response.read().decode('utf-8')
    jsonData = json.loads(data)
    if jsonData['status'] == 'error':
        raise HTTPError(request.get_full_url(), 400, jsonData['data'], None, None)
    return jsonData['data']
Example #19
0
    def _get_parameters_from_request(request):
        if request.get_method() == 'GET':
            pieces_str = urlparse(request.get_full_url()).query
        else:
            pieces_str = request.data.decode('ascii')

        return parse_qs(pieces_str)
def load_data():
    url = "https://www.baidu.com/"  #有useragent时可以加s
    user_agent_list = [
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
    ]

    #创建请求头信息
    #request = urllib.request.Request(url,headers=header)
    # 动态添加headers信息
    request = urllib.request.Request(url)
    rand_user_agent = random.choice(user_agent_list)
    request.add_header("User-Agent",rand_user_agent)
    #请求网络数据
    #响应头
    response = urllib.request.urlopen(request)  #request包含url信息
    data = response.read().decode("utf-8")
    #获取完整url
    final_url = request.get_full_url()
    print(2,final_url) 
   # print(response.headers)
    request_headers = request.headers
    print(request_headers)
    #打印特定信息  !!!!!!首字母大写,其他字母都要小写,不然返回none
    request_header = request.get_header("User-agent")
    print(request_header)
    with open("02_headers.html","w",encoding="utf-8") as f:
        f.write(data)
 def test_make_request(self):
     """v1 requests have correct URL and no Auth header."""
     getter = GetMazaDataAPI1('foo', 'bar')
     request = getter.make_request()
     self.assertEqual('https://uccs.landscape.canonical.com/api/1/foo/bar',
                      request.get_full_url())
     self.assertIs(None, request.headers.get('Authorization'))
Example #22
0
 def test_instagram_oembed_return_values(self, urlopen):
     urlopen.return_value = self.dummy_response
     result = InstagramOEmbedFinder(
         app_id="123",
         app_secret="abc").find_embed("https://instagr.am/p/CHeRxmnDSYe/")
     self.assertEqual(
         result,
         {
             "type": "something",
             "title": "test_title",
             "author_name": "test_author",
             "provider_name": "Instagram",
             "thumbnail_url": "test_thumbail_url",
             "width": "test_width",
             "height": "test_height",
             "html":
             '<blockquote class="instagram-media">Content</blockquote>',
         },
     )
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     self.assertEqual(
         request.get_full_url(),
         "https://graph.facebook.com/v11.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json",
     )
     self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
Example #23
0
def get_search_result_modify_ua():

    #User-Agent列表,请求时随机一个,让服务器不要认为我们是爬虫
    user_agent_list = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
        "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50",
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
    ]

    header = {
        #随机一个UA
        'User-Agent': random.choice(user_agent_list),
        'h_key': 'key',
        'h_val': 'value'
    }

    url = "http://www.baidu.com/s?wd=美女"
    #UnicodeEncodeError: 'ascii' codec can't encode characters in position 10-11: ordinal not in range(128)
    request_url = urllib.parse.quote(url, safe=string.printable)
    #创建请求对象
    request = urllib.request.Request(request_url, headers=header)
    response = urllib.request.urlopen(request)
    #查看响应头信息
    print(response.headers)
    # 读取网页内容
    data = response.read().decode()
    #print(data)

    #完整的url
    print('完整url = ' + request.get_full_url())
    #打印完整的请求头信息
    print(request.headers)
Example #24
0
 def test_facebook_oembed_return_values(self, urlopen):
     urlopen.return_value = self.dummy_response
     result = FacebookOEmbedFinder(
         app_id="123",
         app_secret="abc").find_embed("https://fb.watch/ABC123eew/")
     self.assertEqual(
         result,
         {
             "type": "something",
             "title": "test_title",
             "author_name": "test_author",
             "provider_name": "Facebook",
             "thumbnail_url": None,
             "width": "test_width",
             "height": "test_height",
             "html":
             '<blockquote class="facebook-media">Content</blockquote>',
         },
     )
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     self.assertEqual(
         request.get_full_url(),
         "https://graph.facebook.com/v11.0/oembed_video?url=https%3A%2F%2Ffb.watch%2FABC123eew%2F&format=json",
     )
     self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
Example #25
0
def load_baidu():
    url = "http://www.baidu.com"

    #添加请求头信息
    header = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"
    }
    #创建 指定url的请求对象,添加ua
    request = urllib.request.Request(url, headers=header)

    request = urllib.request.Request(url)
    #动态添加UA
    #request.add_header("User-agent","Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36")

    #请求网络数据
    respons = urllib.request.urlopen(request)
    data = respons.read().decode("utf-8")
    with open("hesders.html", "w", encoding="utf-8") as f:
        f.write(data)

    #响应头
    #print(respons.headers)
    #注意,要求首字母大写,其它字母小写
    request_header = request.get_header("User-agent")
    print(request_header)
    #获取完整的url
    final_url = request.get_full_url()
    print(final_url)
Example #26
0
    def https_open(self, request):
        """
        Send an HTTP request, which can be either GET or POST,
        depending on req.has_data()

        Args:
            request - instance of urllib2.Request
        """
        full_url = request.get_full_url()
        url_parts = parse.urlsplit(full_url)
        robo = None
        if url_parts.netloc in self.robots:
            robo = self.robots[url_parts.netloc]
        else:
            # Getting request url, for checking robots.txt
            host = parse.urlsplit(full_url)[1]
            rurl = parse.urlunparse(("http", host, "/robots.txt", "", ""))
            robo = reppy.cache.RobotsCache()
            robo.fetch(rurl, self.agent_name)
            self.robots[url_parts.netloc] = robo

        # Is url allow for crawler in robots.txt
        if robo.allowed(full_url, self.agent_name):
            # Return result of request
            return request.HTTPHandler.https_open(self, request)
        else:
            raise RuntimeError('Forbidden by robots.txt')
Example #27
0
def load_baidu():
    url = "https://www.baidu.com"
    # 添加请求头的信息
    header = {
        # 浏览器的版本
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36",
        "ha": "he"
    }
    # 创建请求对象
    request = urllib.request.Request(url)
    request.add_header("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36")
    # 请求网络数据
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("utf-8")

    # 获取到完整的url
    final_url = request.get_full_url()
    print(final_url)
    # 获取响应头
    # print(response.headers)
    # request_headers = request.headers
    # print(request_headers)
    # 第二种方式打印headers的信息
    request_headers = request.get_header("User-agent")
    print(request_headers)
    with open("02header.html", "w") as f:
        f.write(data)
Example #28
0
def load_baidu():
    url = "http://www.baidu.com"

    header = {
        # 浏览器基本信息
        "User-Agent":
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }
    # request.get_header('User-Agent', "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36")

    request = urllib.request.Request(url, headers=header)
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode('utf-8')

    # 获取完整的url
    final_url = request.get_full_url()

    #响应头
    #print(response.headers)

    #获取响应头的信息
    # request_header = request.headers

    request_header = request.get_header('User-agent')
    print(request_header)
    with open("02header.html", "w", encoding='utf-8') as f:
        f.write(data)
Example #29
0
def load_baidu():
    url = "http://www.baidu.com"
    response = urllib.request.urlopen(url)
    # 创建请求对象
    # headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"}
    # 动态添加headers信息
    request = urllib.request.Request(url)
    # 动态添加请求头
    request.add_header(
        "User_Agent",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"
    )
    data = response.read().decode("utf-8")
    with open("data.html", "w", encoding="utf-8") as f:
        f.write(data)
    # 查看响应头信息
    # print(response.headers)
    #第二种打印headers的方法
    ret = request.get_header("User-Agent")
    # 获取完整的url
    final_url = request.get_full_url()

    print(final_url)
    #获取请求头信息
    request_headers = request.headers
def load_baidu():
    url = "https://www.baidu.com"
    # 添加请求头信息
    header = {
        # 浏览器的版本
        "User-Agent":
        "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36"
    }

    # 创建请求对象(添加headers方式一)
    #request = urllib.request.Request(url, headers=header)
    # 创建请求对象(添加headers方式二)
    request = urllib.request.Request(url)
    request.add_header(
        "User-Agent",
        "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36"
    )

    final_url = request.get_full_url()
    print(final_url)
    # 请求网络数据
    response = urllib.request.urlopen(request)
    data = response.read().decode("utf-8")

    # 第一种获取请求头的信息
    request_headers1 = request.headers
    # print(request_headers1)

    # 第二种获取请求头的信息(首字母大写,其他字母小写)
    request_headers2 = request.get_header("User-agent")
    # print(request_headers2)

    # 将数据写入文件 超文本写入用字符串 视频音频用wb
    with open("load_baidu.html", "w", encoding="utf-8") as f:
        f.write(data)
def load_baidu():
    url = "https://www.baidu.com"
    header = {
        # 浏览器版本,告诉浏览器我是真实的用户
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
    }

    # 创建请求对象
    request = urllib.request.Request(url, headers=header)
    # print(request)

    # 动态的添加请求头信息
    request.add_header(
        "User-Agent",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
    )

    # 获取请求头信息(打印所有头的信息)
    request_header = request.headers
    print(request_header)

    # 第二种方式打印headers信息
    print(request.get_header("User-agent"))

    # 获取完整的url
    print(request.get_full_url())

    # 请求网络数据(不在此处增加请求头,系统没有此参数)
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("utf-8")
    with open("02header.html", "w", encoding="utf-8") as f:
        f.write(data)
Example #32
0
 def test_endpoint_with_format_param(self, loads, urlopen):
     urlopen.return_value = self.dummy_response
     loads.return_value = {'type': 'video',
                           'url': 'http://www.example.com'}
     result = OEmbedFinder().find_embed("https://vimeo.com/217403396")
     self.assertEqual(result['type'], 'video')
     request = urlopen.call_args[0][0]
     self.assertEqual(request.get_full_url().split('?')[0], "http://www.vimeo.com/api/oembed.json")
Example #33
0
 def test_endpoint_with_format_param(self, loads, urlopen):
     urlopen.return_value = self.dummy_response
     loads.return_value = {'type': 'video', 'url': 'http://www.example.com'}
     result = OEmbedFinder().find_embed("https://vimeo.com/217403396")
     self.assertEqual(result['type'], 'video')
     request = urlopen.call_args[0][0]
     self.assertEqual(request.get_full_url().split('?')[0],
                      "https://www.vimeo.com/api/oembed.json")
Example #34
0
 def get_cached_response(self, request: urllib.request.Request) -> dict:
     """
     Serialize request URL to a subpath relative to self.cache_path
     """
     url = request.get_full_url()
     cur = self._cx.cursor()
     cur.execute("SELECT * FROM response WHERE url = ?", (url, ))
     return cur.fetchone()
Example #35
0
    def testOnlyConneg(self):
        # see issue #82
        query = "prefix whatever: <http://example.org/blah#> ASK { ?s ?p ?o }"
        self.wrapper.setOnlyConneg(False)
        self.wrapper.setQuery(query)
        request = self._get_request(self.wrapper)
        request_params = dict(parse_qsl(urlparse(request.get_full_url()).query))
        for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values
            self.assertTrue(returnFormatSetting in request_params, "URL parameter '%s' was not sent, and it was expected" %returnFormatSetting)

        #ONLY Content Negotiation
        self.wrapper.resetQuery()
        self.wrapper.setOnlyConneg(True)
        self.wrapper.setQuery(query)
        request = self._get_request(self.wrapper)
        request_params = dict(parse_qsl(urlparse(request.get_full_url()).query))
        for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values
            self.assertFalse(returnFormatSetting in request_params, "URL parameter '%s' was sent, and it was not expected (only Content Negotiation)" %returnFormatSetting)
Example #36
0
 def __init__(self, request):
     url = request.get_full_url()
     if not url in canned_response:
         raise Exception("%s not in canned response!" % url)
     self.request = request
     self.code = 200
     self.msg = "OK"
     self.content = io.StringIO(canned_response[url])
     self.read = self.content.read
Example #37
0
 def __init__(self, request):
     url = request.get_full_url()
     if not url in canned_response:
         raise Exception("%s not in canned response!" % url)
     self.request = request
     self.code = 200
     self.msg = "OK"
     self.content = io.StringIO(canned_response[url])
     self.read = self.content.read
 def test_prepare_curl(self):
     """prepare_curl sets URL and auth header."""
     curl = FakeCurl()
     getter = PycURLGetter(curl)
     request = self.make_request()
     getter.prepare_curl(request)
     self.assertEqual(request.get_full_url(), curl.options[pycurl.URL])
     self.assertEqual(['Authorization: Basic cGV0ZTpwYXNz\n'],
                      curl.options[pycurl.HTTPHEADER])
Example #39
0
 def http_request(self, request):
     if not request.has_header(self.AUTH_HEADER):
         url = request.get_full_url()
         user, password = self._creds_mgr.get_credentials(url)
         if user is not None and password is not None:
             creds = base64.b64encode("%s:%s" % (user, password))
             auth = "Basic %s" % creds
             request.add_unredirected_header(self.AUTH_HEADER, auth)
     return request
Example #40
0
 def _send_request(self, method, path, apiurl, schema, **query):
     request = self._build_request(method, path, apiurl, **query)
     self._logger.info(request.get_full_url())
     try:
         f = urllib.request.urlopen(request)
     except urllib.error.HTTPError as e:
         raise Urllib2HTTPError(e)
     f = self._new_response(f)
     self._validate_response(f, schema)
     return f
 def Get(self, request, timeout=None):
   """Accepts an Http request and returns a precanned response."""
   url = request.get_full_url()
   if url == utils.METADATA_URL_PREFIX:
     return 'v1/'
   elif url.startswith(utils.METADATA_V1_URL_PREFIX):
     url = url.replace(utils.METADATA_V1_URL_PREFIX, '')
     if url == 'instance/?recursive=true':
       return self._instance_response
   raise urllib.error.HTTPError
Example #42
0
        def http_request(self, request):
            scheme = request.get_type()
            if scheme not in ["http", "https"]:
                # robots exclusion only applies to HTTP
                return request

            if request.get_selector() == "/robots.txt":
                # /robots.txt is always OK to fetch
                return request

            host = request.get_host()

            # robots.txt requests don't need to be allowed by robots.txt :-)
            origin_req = getattr(request, "_origin_req", None)
            if (origin_req is not None and
                origin_req.get_selector() == "/robots.txt" and
                origin_req.get_host() == host
                ):
                return request

            if host != self._host:
                self.rfp = self.rfp_class()
                try:
                    self.rfp.set_opener(self.parent)
                except AttributeError:
                    debug("%r instance does not support set_opener" %
                          self.rfp.__class__)
                self.rfp.set_url(scheme+"://"+host+"/robots.txt")
                self.rfp.read()
                self._host = host

            ua = request.get_header("User-agent", "")
            if self.rfp.can_fetch(ua, request.get_full_url()):
                return request
            else:
                # XXX This should really have raised URLError.  Too late now...
                msg = "request disallowed by robots.txt"
                raise RobotExclusionError(
                    request,
                    request.get_full_url(),
                    403, msg,
                    self.http_response_class(StringIO()), StringIO(msg))
Example #43
0
def fetch(opener, request):
    resp = opener.open(
            request,
            timeout=FETCH_TIMEOUT,
            )
    
    if resp.geturl() != request.get_full_url() or resp.getcode() != 200:
        raise FetchError('invalid url or invalid code')
    
    text = resp.read(FETCH_MAX_LENGTH).decode('utf-8', 'replace')
    
    return text
Example #44
0
def requestB(opener,url, headers, data, method = 'POST'):
	answer = ''
	retcode = None
	additionalInfo = '[None]'
	contentLenght = None

	if ImportCookie:
		headers = processingCookies(headers)
	data = urllib.parse.urlencode(data)

	if method == 'GET':
		if data:
			url = url + '?' + data
		data = None
	elif method == 'POST':
		headers['Content-Length'] = len(data)
		data = data.encode()

	request = urllib.request.Request(url,data,headers)
	try:
		printRequest(request.get_method(), request.get_full_url())
		if data:
			printData(data)
		
		f = opener.open(request)
		
		headers = f.getheaders()
		code = f.code
		retcode = code
		answer = f.read()
		m = hashlib.md5()
		m.update(answer)
		for h in headers:
			if h[0].lower() == 'content-length':
				contentLenght = h[1]
				additionalInfo = '[' + str(h[1]) + ']'

		printAnswer(code,additionalInfo)
		printHeaders(headers,'Set-Cookie')

	except urllib.error.HTTPError as error:
		for h in error.headers:
			if h.lower() == 'content-length':
				printAnswer(str(error.code) , ' [' + str(error.headers[h]) + ']')
		else:
                	printAnswer(str(error.code) , ' [-1]')
		retcode = error.code
		answer = error.read()
	except urllib.error.URLError as error:
		printAnswer(str(error))
	return answer, retcode
Example #45
0
    def http_response(self, request, response):
        """Gets a HTTP response, if it was a GET request and the status code
        starts with 2 (200 OK etc) it caches it and returns a CachedResponse
        """
        if (request.get_method() == "GET"
            and str(response.code).startswith("2")
        ):
            if 'x-local-cache' not in response.info():
                # Response is not cached
                set_cache_header = store_in_cache(
                    self.cache_location,
                    request.get_full_url(),
                    response
                )
            else:
                set_cache_header = True

            return CachedResponse(
                self.cache_location,
                request.get_full_url(),
                set_cache_header = set_cache_header
            )
        else:
            return response
Example #46
0
 def _SendRawRequest(request, dump_path=None):
   logging.debug('Sending request to %s with data %s',
       request.get_full_url(), request.data)
   resp = urllib.request.urlopen(request)
   url_info = resp.info()
   encoding = url_info.get('Content-Encoding', 'utf-8')
   decoded_raw = resp.read().decode(encoding)
   logging.debug('urlopen returned \n%s\n', decoded_raw)
   if dump_path:
     with open(os.path.join(
         dump_path,
         request.selector[1:request.selector.find('?')].replace('/', '_') + '.raw'),
         'w') as dump:
       dump.write(decoded_raw)
   return decoded_raw
Example #47
0
def urlopen(request, data=None):
    if isinstance(request, str):
        request = urllib.request.Request(request)
    if data is not None and type(data) != bytes:
        raise TypeError('POST data must be bytes')
    # track the last call arguments
    urlopen.request = request
    urlopen.data = data

    url = request.get_full_url()
    parts = urllib.parse.urlparse(url)

    if parts.netloc.split(':')[0] not in ['unittest', 'proxy.xri.net']:
        raise urllib.error.URLError('Wrong host: %s' % parts.netloc)

    query = urllib.parse.parse_qs(parts.query)

    if 'redirect' in query:
        return urlopen(query['redirect'][0])

    try:
        path = parts.path.lstrip('/') or '200.txt'
        if parts.netloc == 'proxy.xri.net':
            path = path.replace('=', '_').replace('*', '_') + '.xri'
        with open(os.path.join(DATAPATH, path), 'rb') as f:
            body = f.read()
    except FileNotFoundError:
        raise urllib.error.HTTPError(url, 404, '%s not found' % path, {}, io.BytesIO())

    status = int(query.get('status', ['200'])[0])
    if 300 <= status < 400:
        raise ValueError('Can\'t return 3xx status', url)
    if 400 <= status:
        raise urllib.error.HTTPError(url, status, 'Requested status: %s' % status, {}, io.BytesIO(body))

    headers = {
        'Server': 'Urlopen-Mock',
        'Date': 'Mon, 21 Jul 2014 19:52:42 GMT',
        'Content-type': TYPES.get(os.path.splitext(path)[1], 'text/plain'),
        'Content-length': len(body),
    }
    extra_headers = query.get('header', [])
    headers.update(h.split(': ', 1) for h in extra_headers)

    return HTTPResponse(url, status, headers, body)
Example #48
0
def statuses_mentions(query=None):
    base_url = "http://api.twitter.com/1/statuses/mentions.json"
    parameters = generate_base_data()
    query_string = ""
    if not query == None:
        query_string = add_params(query)
        parameters.extend(query)

    request = urllib.request.Request(base_url + query_string)
    signature = myoauth.oauth_sign(
        request.get_method(), base_url, parameters, OAUTH_CONSUMER_SECRET, OAUTH_TOKEN_SECRET
    )
    header_string = generate_header_string(parameters, [["oauth_signature", signature]])
    request.add_header("Authorization", header_string)

    print(request.get_header("Authorization"))
    print(request.get_method())
    print(request.get_full_url())
    return urllib.request.urlopen(request)
Example #49
0
    def _request(self, path, parameters):
        # Throw out parameters where the value is not None
        parameters = dict([(k,v) for k,v in parameters.items() if v])

        query_str = urlencode(parameters)

        request = Request("%s%s?%s" % (self.url_base, path, query_str))
        _log.debug("requesting: %s", request.get_full_url())

        data = None
        try:
            response = self.opener.open(request)
            data = response.read()
            response.close()
        except HTTPError as httperror:
            data = httperror.read()
            httperror.close()

        #_log.debug("response: %s", data)

        return ET.fromstring(data.strip().replace("&hellip;", "..."))
Example #50
0
    def _request_raw(self, path, parameters,method="GET", request_body=""):

        if method == "POST":
            request = Request("%s%s" % (self.url_base, path),parameters)
        else:    
            # Throw out parameters where the value is not None
            parameters = dict([(k,v) for k,v in parameters.items() if v])
            query_str = urlencode(parameters)
            request = Request("%s%s?%s" % (self.url_base, path, query_str), request_body)
        
        _log.debug("requesting (%s): %s", method,request.get_full_url())
        data = None
        try:
            response = self.opener.open(request)
            data = response.read().replace("&hellip;", "...").replace("d'éveil", "d éveil")
            response.close()
        except HTTPError as httperror:
            data = httperror.read()
            httperror.close()
        _log.debug("response: %s", data.strip())
        return data.strip()
Example #51
0
 def _SendRequest(request, dump_path=None):
   logging.debug('Sending request to %s with data %s',
       request.get_full_url(), request.data)
   resp = urllib.request.urlopen(request)
   url_info = resp.info()
   encoding = url_info.get('Content-Encoding', 'utf-8')
   raw_json = resp.read().decode(encoding)
   logging.debug('urlopen returned \n%s\n', raw_json)
   if dump_path:
     with open(os.path.join(
         dump_path,
         request.selector[1:request.selector.find('?')].replace('/', '_') + '.json'),
         'w') as dump:
       dump.write(raw_json)
   json_resp = json.loads(raw_json)['reply']
   status = json_resp.get('status', '')
   if status != 'OK':
     raise NessusError('Status was not OK: %s: %s' % (status, json_resp['contents']))
   if 'contents' not in json_resp:
     return ''
   return json_resp['contents']
def main(argv=None):
    '''
    Process the command line arguments and create the JSON dump.

    :param argv: List of arguments, as if specified on the command-line.
                 If None, ``sys.argv[1:]`` is used instead.
    :type argv: list of str
    '''
    # Get command line arguments
    parser = argparse.ArgumentParser(
        description="Transfer all projects/repositories from GitLab to Stash. \
                     Note: This script assumes you have your SSH key \
                     registered with both GitLab and Stash.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        conflict_handler='resolve')
    parser.add_argument('gitlab_url',
                        help='The full URL to your GitLab instance.')
    parser.add_argument('stash_url',
                        help='The full URL to your Stash instance.')
    parser.add_argument('-p', '--password',
                        help='The password to use to authenticate if token is \
                              not specified. If password and token are both \
                              unspecified, you will be prompted to enter a \
                              password.')
    parser.add_argument('-P', '--page_size',
                        help='When retrieving result from GitLab, how many \
                              results should be included in a given page?.',
                        type=int, default=20)
    parser.add_argument('-s', '--verify_ssl',
                        help='Enable SSL certificate verification',
                        action='store_true')
    parser.add_argument('-S', '--skip_existing',
                        help='Do not update existing repositories and just \
                              skip them.',
                        action='store_true')
    parser.add_argument('-t', '--token',
                        help='The private GitLab API token to use for \
                              authentication. Either this or username and \
                              password must be set.')
    parser.add_argument('-u', '--username',
                        help='The username to use for authentication, if token\
                              is unspecified.')
    parser.add_argument('-v', '--verbose',
                        help='Print more status information. For every ' +
                             'additional time this flag is specified, ' +
                             'output gets more verbose.',
                        default=0, action='count')
    parser.add_argument('--version', action='version',
                        version='%(prog)s {0}'.format(__version__))
    args = parser.parse_args(argv)

    args.page_size = max(100, args.page_size)

    # Convert verbose flag to actually logging level
    log_levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(args.verbose, 2)]
    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' +
                                '%(message)s'), level=log_level)

    # Setup authenticated GitLab and Stash instances
    if args.token:
        git = GitLab(args.gitlab_url, token=args.token,
                            verify_ssl=args.verify_ssl)
    else:
        git = None
    if not args.username:
        print('Username: '******'').strip()
    if not args.password:
        args.password = getpass.getpass('Password: '******'Retrieving existing Stash projects...', end="", file=sys.stderr)
    sys.stderr.flush()
    key_set = {proj['key'] for proj in stash.projects}
    stash_project_names = {proj['name'] for proj in stash.projects}
    names_to_keys = {proj['name']: proj['key'] for proj in stash.projects}
    stash_project_names = stash_project_names.union({'~' + user['slug'] for user in stash.admin.users.list()})
    # Initialize users private repositories
    for user in stash.admin.users.list():
        request = urllib.request.Request(args.stash_url+ "users/" + user['slug'] + "/repos?start=0&limit=50")
        print(request.get_full_url())
        base64string = base64.b64encode((args.username + ":" + args.password).encode('ascii'))
        request.add_header("Authorization", "Basic %s" % base64string)
        request.add_header("Referer", args.stash_url + "users/" + user['slug'])
        request.add_header("Accept", "application/json, text/javascript, */*; q=0.01")
        try:
#            result =  urllib.request.urlopen(request)
            print("Personnal repository for : " + user['slug'] + " may be not initialized \n")
        except urllib.error.HTTPError:
            print("repository initialized")
    
    stash_users = {user['slug'] for user in stash.admin.users.list()}
    print('done', file=sys.stderr)
    sys.stderr.flush()
    updated_projects = set()
    repo_to_slugs = {}
    failed_to_clone = set()
    cwd = os.getcwd()
    transfer_count = 0
    skipped_count = 0
    for project in stash_project_names:
        print ('detected projetct : %s' % project, file=sys.stderr)
    print('Processing GitLab projects...', file=sys.stderr)
    sys.stderr.flush()
    for project in gen_all_results(git.getprojectsall,
                                   per_page=args.page_size):
        print('\n' + ('=' * 80) + '\n', file=sys.stderr)
        sys.stderr.flush()
        proj_name = project['namespace']['name']
        if proj_name in stash_users:
            proj_name = "~" + proj_name
        # Create Stash project if it doesn't already exist
        if proj_name not in stash_project_names:
            # Create Stash project key
            key = proj_name
            if key.islower():                key = key.title()
            key = re.sub(r'[^A-Z]', '', key)
            if len(key) < 2:
                key = re.sub(r'[^A-Za-z]', '', proj_name)[0:2].upper()
            added = False
            suffix = 65
            while key in key_set:
                if not added:
                    key += 'A'
                else:
                    suffix += 1
                    key = key[:-1] + chr(suffix)
            key_set.add(key)

            # Actually add the project to Stash
            print('Creating Stash project "%s" with key %s...' %
                  (proj_name, key), end="", file=sys.stderr)
            sys.stderr.flush()
            stash.projects.create(key, proj_name)
            names_to_keys[proj_name] = key
            stash_project_names.add(proj_name)
            print('done', file=sys.stderr)
            sys.stderr.flush()
        else:
            if proj_name[0:1] != "~":
                key = names_to_keys[proj_name]
            else:
                key = proj_name

        stash_project = stash.projects[key]

        # Initialize maping from repository names to slugs for later
        if key not in repo_to_slugs:
            repo_to_slugs[key] = {repo['name']: repo['slug'] for repo in
                                  stash_project.repos}

        # Create Stash-compatible name for repository
        # Repository names are limited to 128 characters.
        # They must start with a letter or number and may contain spaces,
        # hyphens, underscores and periods
        repo_name = project['name']
        if not repo_name[0].isalnum():
            repo_name = 'A ' + repo_name
        repo_name = re.sub(r'[^A-Za-z0-9 _.-]', ' ', repo_name)
        if len(repo_name) > 128:
            repo_name = repo_name[0:128]

        # Add repository to Stash project if it's not already there
        if repo_name not in repo_to_slugs[key]:
            print('Creating Stash repository "%s" in project "%s"...' %
                  (repo_name, proj_name), end="", file=sys.stderr)
            sys.stderr.flush()
            stash_repo = stash_project.repos.create(repo_name)
            repo_to_slugs[key][repo_name] = stash_repo['slug']
            print('done', file=sys.stderr)
            sys.stderr.flush()
        elif args.skip_existing:
            print('Skipping existing Stash repository "%s" in project "%s"' %
                  (repo_name, proj_name), file=sys.stderr)
            sys.stderr.flush()
            skipped_count += 1
            continue
        else:
            print('Updating existing Stash repository "%s" in project "%s"' %
                  (repo_name, proj_name), file=sys.stderr)
            sys.stderr.flush()
            repo_slug = repo_to_slugs[key][repo_name]
            stash_repo = stash_project.repos[repo_slug].get()

        for clone_link in stash_repo['links']['clone']:
            if clone_link['name'] == 'ssh':
                stash_repo_url = clone_link['href']
                break

        with tempfile.TemporaryDirectory() as temp_dir:
            # Clone repository to temporary directory
            print('\nCloning GitLab repository...', file=sys.stderr)
            sys.stderr.flush()
            try:
                subprocess.check_call(['git', 'clone', '--mirror',
                                       project['ssh_url_to_repo'],
                                       temp_dir])
            except subprocess.CalledProcessError:
                print('Failed to clone GitLab repository. This usually when ' +
                      'it does not exist.', file=sys.stderr)
                failed_to_clone.add(project['name_with_namespace'])
                skipped_count += 1
                continue
            os.chdir(temp_dir)

            # Check that repository is not empty
            try:
                subprocess.check_call(['git', 'log', '--format=oneline', '-1'],
                                      stdout=subprocess.DEVNULL,
                                      stderr=subprocess.DEVNULL)
            except subprocess.CalledProcessError:
                print('Repository is empty, so skipping push to Stash.',
                      file=sys.stderr)
                skipped_count += 1
            else:
                # Change remote to Stash and push
                print('\nPushing repository to Stash...', file=sys.stderr)
                sys.stderr.flush()
                subprocess.check_call(['git', 'remote', 'set-url', 'origin',
                                       stash_repo_url])
                subprocess.check_call(['git', 'push', '--mirror'])
                transfer_count += 1

            os.chdir(cwd)

        updated_projects.add(proj_name)


    print('\n' + ('=' * 35) + 'SUMMARY' + ('=' * 35), file=sys.stderr)
    print('{} repositories transferred.\n'.format(transfer_count),
          file=sys.stderr)
    print('{} repositories skipped.\n'.format(skipped_count),
          file=sys.stderr)
    print('Projects created/updated:', file=sys.stderr)
    for proj in sorted(updated_projects):
        print('\t' + proj, file=sys.stderr)
    print('Repositories that we could not clone:', file=sys.stderr)
    for repo_name in sorted(failed_to_clone):
        print('\t' + repo_name, file=sys.stderr)
 def open(self, request, timeout=None):
     assert request.get_full_url() == url
     raise MyException
Example #54
0
    def request(self, path, parameters=None, data=None, method=None, auto_login=True, json_answer=True, filename=None):
        """
        Send a request to the Nuclos server.

        :param path: The path to open.
        :param parameters: A dictionary of parameters to add to the request URL.
        :param data: The data to add. If this is given the request will automatically be a POST request.
        :param method: The HTTP method to use. If not set this will be GET or POST, depending on the data.
        :param auto_login: Try to log in automatically in case of a 401 error.
        :param json_answer: Parse the servers answer as JSON.
        :param filename: A file to save the downloaded data in.
        :return: The answer of the server. None in case of an error.
        :raise: URLError in case of an HTTP error. Returns None instead if the 'handle_http_errors' option is set.
        """
        if not self.session_id and auto_login:
            self.login()

        url = path
        if not url.startswith("http"):
            url = self._build_url(path, parameters)
        request = urllib.request.Request(url)
        if json_answer:
            request.add_header("Accept", "application/json")
        if data:
            request.data = json.dumps(data).encode("utf-8")
            request.add_header("Content-Type", "application/json")
        if method:
            request.method = method
        if method and request.data and method not in ["POST", "PUT"]:
            logging.warning("Overriding the POST method while sending data!")
        if self.session_id:
            request.add_header("Cookie", "JSESSIONID=" + str(self.session_id))

        logging.debug("Sending {} request to {}.".format(request.get_method(), request.get_full_url()))
        if request.data:
            logging.debug("Sending data {}.".format(request.data))

        try:
            result = urllib.request.urlopen(request)

            if filename is not None:
                with open(filename, "wb") as f:
                    shutil.copyfileobj(result, f)
                    return None

            answer = result.read().decode()
            if answer:
                logging.debug("Received answer {}".format(answer))
            if not json_answer:
                return answer
            try:
                return json.loads(answer)
            except ValueError:
                logging.error("Invalid JSON in '{}'.".format(answer))
                return None
        except urllib.request.HTTPError as e:
            if e.code == 401 and auto_login:
                logging.info("Unauthorized. Trying to log in again.")
                self.session_id = None
                self.login()
                return self.request(path, data=data, method=method, auto_login=False, json_answer=json_answer)
            elif e.code == 403:
                raise NuclosAuthenticationException()
            else:
                logging.error("HTTP Error {}: {}".format(e.code, e.reason))
                raise NuclosHTTPException(e)
 def default_open(self, request):
     if config.TRACE_API_CALLS:
         logger.info("%s" % (request.get_full_url(),))
     request.start_time = time.time()
     return None
Example #56
0
def encodeASP(text): """ base64 encode function for (ASP).NET """	isbytes = True
 if not isinstance(text, bytes):		text = base64.urlsafe_b64encode(text.encode())		isbytes = False  else:		text = base64.urlsafe_b64encode(text)	count = len(re.findall(b'=',text)) for i in range(count):		text = text[:-1]	text = text + str(count).encode()
 if isbytes: return text else: return text.decode()
class SmartRedirectHandler(urllib.request.HTTPRedirectHandler): def http_error_301(self, req, fp, code, msg, headers):
 self.preProcessingRedirection(req, fp, code, msg, headers)		result = super(SmartRedirectHandler, self).http_error_301(req, fp, code, msg, headers) self.postProcessingRedirection(result)
 return result  def http_error_302(self, req, fp, code, msg, headers): self.preProcessingRedirection(req, fp, code, msg, headers)		result = super(SmartRedirectHandler, self).http_error_302(req, fp, code, msg, headers) self.postProcessingRedirection(result) return result
 def preProcessingRedirection(self, req, fp, code, msg, headers):		location = '' for i in headers._headers: if i[0] == 'Location':				location = i[1].strip() 		req.add_header('Host',urlparse(location).netloc)
		printAnswer(code, str(msg) + " " + location)		printHeaders(headers._headers,'Set-Cookie')
 def postProcessingRedirection(self, result):		printRequest("GET", result.geturl())
def stringToHexCSV(s):	hexs = s.encode('hex')	ret = ' '.join(hexs[i:i+2] for i in range(0, len(hexs), 2)) return ret
def defaultCreateOpener(withCookieJar = True, withBurpProxy = True): global cookieJar
 if withCookieJar:		cookieJar = urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar())
	proxy_handler = None  if withBurpProxy:		proxy_handler = urllib.request.ProxyHandler({'https': 'https://127.0.0.1:8080/', 'http': 'http://127.0.0.1:8080/'}) 	ret = None if withCookieJar and withBurpProxy:		ret = urllib.request.build_opener(proxy_handler, SmartRedirectHandler(), cookieJar) elif withCookieJar:		ret = urllib.request.build_opener(SmartRedirectHandler(), cookieJar) elif withBurpProxy:		ret = urllib.request.build_opener(proxy_handler, SmartRedirectHandler()) return ret
def processingCookies(headers):	cookies = headers['Cookie']	final = '' if type(cookies) == type(""): return for c in cookies.keys():		final += " " + c + "=" + cookies[c] + ";" 	headers['Cookie'] = final
 return headers
createOpener = defaultCreateOpener
def requestC(opener,url, headers, data, method = 'POST'):	[answer, code] = requestB(opener,url, headers, data, method) return answer
def requestB(opener,url, headers, data, method = 'POST'):	answer = ''	retcode = None	additionalInfo = '[None]'	contentLenght = None
 if ImportCookie:		headers = processingCookies(headers)	data = urllib.parse.urlencode(data)
 if method == 'GET': if data:			url = url + '?' + data		data = None elif method == 'POST':		headers['Content-Length'] = len(data)		data = data.encode()
	request = urllib.request.Request(url,data,headers) try:		printRequest(request.get_method(), request.get_full_url()) if data:			printData(data) 		f = opener.open(request) 		headers = f.getheaders()		code = f.code		retcode = code		answer = f.read()		m = hashlib.md5()		m.update(answer) for h in headers: if h[0].lower() == 'content-length':				contentLenght = h[1]				additionalInfo = '[' + str(h[1]) + ']'
		printAnswer(code,additionalInfo)		printHeaders(headers,'Set-Cookie')
 except urllib.error.HTTPError as error: for h in error.headers: if h.lower() == 'content-length':				printAnswer(str(error.code) , ' [' + str(error.headers[h]) + ']') else:                	printAnswer(str(error.code) , ' [-1]')		retcode = error.code		answer = error.read() except urllib.error.URLError as error:		printAnswer(str(error)) return answer, retcode
def parseBurpData(fileName): global Protocol	url = ''	host = ''	data = None	contentType = 'None'	headers = {}	indata = None try:		indata = open(fileName,"r") except IOError as error: print(str(error))		sys.exit(1)
	line = indata.readline()	res = line.partition(' ')	method = res[0]	printDebug('method ' + method)	res = res[2].rpartition(' ')	uri = res[0]	printDebug('URI: ' + str(uri))
 if Protocol == None:		rulo = urlparse(uri)		printOut('Scheme not given, trying to guess it from burp request.') if rulo.scheme != 'http' or rulo.scheme != 'https':			printOut('** Could not determine the scheme from the HTTP request, please configure one **')			sys.exit(1) else:			Protocol = rulo.scheme
	line = indata.readline() while line.strip():		res = line.partition(':') if res[0] == 'Host':			host = res[2].strip() if res[0] == 'Content-Type':			contentType = res[2].strip() if res[0] == 'Cookie': if ImportCookie:				cookies = res[2].split(';') for c in cookies:					tm = c.strip().partition('=') if res[0] not in headers:						headers[res[0]] = {tm[0]:tm[2]} else:						headers[res[0]][tm[0]] = tm[2]				line = indata.readline() continue		headers[res[0]] = res[2].strip()		line = indata.readline()  if method == 'POST':		url = Protocol + '://' + host + uri		data = indata.read().strip()
 if len(data) == 0:			data = None else:			urlencodedcontenttype = re.compile('application\/x-www-form-urlencoded') if urlencodedcontenttype.match(contentType):				data = urllib.parse.parse_qs(data) for d in list(data.keys()): if len(data[d]) > 1:						printOut("Multiple value for the same field. Odd... taking the first one")					data[d] = data[d][0] elif contentType == 'text/xml; charset=UTF-8':				data = parseString(data) else:				printOut('Unknown Content type: ' + str(contentType))		
 elif method == 'GET':		res = uri.rpartition('?')		uri = res[0] if len(res) == 3: if uri == '':				uri = res[2]			data = urllib.parse.parse_qs(res[2]) for d in list(data.keys()): if len(data[d]) > 1:					printOut("Multiple value for the same field. Odd... taking the first one")				data[d] = data[d][0] 		url = Protocol + '://' + host + uri 	indata.close()
 return url, headers,data, method
Example #57
0
    def __write_capture(self, request, response):

        ohandle = io.StringIO()
        response_body = b''
        saved_exception = None
        try:
            ohandle.write('<capture>\n')
            ohandle.write('<request>\n')
            method = request.get_method()
            url = request.get_full_url() 
            parsed = urlparse.urlsplit(url)
            relative_url = parsed.path
            if parsed.query:
                relative_url += '?' + parsed.query
            if parsed.fragment:
                # TODO: will this ever happen?
                relative_url += '#' + parsed.fragment

            host = None
            request_body = None

            if hasattr(request, 'get_host'):
                host = request.get_host()
                # support 3.3
                if request.has_data():
                    request_body = request.get_data()
            else:
                host = request.host
                request_body = request.data
            
            ohandle.write('<method>%s</method>\n' % escape(method))
            ohandle.write('<url>%s</url>\n' % escape(url))
            ohandle.write('<host>%s</host>\n' % escape(host))
            try:
                # ghetto
                addr = response.fp.raw._sock.getpeername()
                if addr:
                    ohandle.write('<hostip>%s</hostip>\n' % escape(addr[0]))
            except Exception as error:
                pass
            ohandle.write('<datetime>%s</datetime>\n' % escape(time.asctime(time.gmtime())+' GMT')) # TODO: can we calculate request time and elapsed?
            request_headers = '%s %s HTTP/1.1\r\n' % (method, relative_url) # TODO: is there access to the HTTP version?
            for item in request.header_items():
                request_headers += item[0] + ': ' + '\r\n\t'.join(item[1:]) + '\r\n'

            if self.re_nonprintable_str.search(request_headers):
                ohandle.write('<headers encoding="base64">%s</headers>\n' % base64.b64encode(request_headers.encode('utf-8')).decode('ascii'))
            else:
                ohandle.write('<headers>%s</headers>\n' % escape(request_headers))
            if request_body is not None:
                if self.re_nonprintable.search(request_body):
                    ohandle.write('<body encoding="base64">%s</body>\n' % base64.b64encode(request_body).decode('ascii'))
                else:
                    ohandle.write('<body>%s</body>\n' % escape(request_body.decode('ascii')))
            ohandle.write('</request>\n')
            ohandle.write('<response>\n')
            status = int(response.getcode())
            ohandle.write('<status>%d</status>\n' % status)
            headers = response.info()
            if 'HEAD' == method or status < 200 or status in (204, 304,):
                response_body = b''
            else:
                try:
                    response_body = response.read()
                except urllib2.IncompleteRead as e:
                    saved_exception = e
            response_headers = 'HTTP/1.1 %d %s\r\n' % (status, response.msg) # TODO: is there access to the HTTP version?
            response_headers += headers.as_string()
            content_type = headers.get('Content-Type')
            content_length = headers.get('Content-Length')

            if content_type:
                ohandle.write('<content_type>%s</content_type>\n' % escape(content_type))
            if content_length:
                ohandle.write('<content_length>%d</content_length>\n' % int(content_length))

            if self.re_nonprintable_str.search(response_headers):
                ohandle.write('<headers encoding="base64">%s</headers>\n' % base64.b64encode(response_headers.encode('utf-8')).decode('ascii'))
            else:
                ohandle.write('<headers>%s</headers>\n' % escape(response_headers))
            if response_body:
                if self.re_nonprintable.search(response_body):
                    ohandle.write('<body encoding="base64">%s</body>\n' % base64.b64encode(response_body).decode('ascii'))
                else:
                    ohandle.write('<body>%s</body>\n' % escape(response_body.decode('ascii')))

            ohandle.write('</response>\n')
            ohandle.write('</capture>\n')

            self.ofhandle.write(ohandle.getvalue().encode('utf-8'))
            ohandle.close()
            
            self.write_count += 1
            if 0 == (self.write_count % self.cut_count):
                self.close()
                self.open_file()

        except Exception as e:
            sys.stderr.write('*** unhandled error in RaftCaptureProcessor: %s\n' % (e))

        if saved_exception:
            raise(saved_exception)

        return response_body