def __init__(self, base_url, username=None, password=None,
                 default_prefix='/rest/v1', biospassword=None, sessionkey=None):
        """Initialization of the base class RestClientBase

        :param base_url: The url of the remote system
        :type base_url: str
        :param username: The username used for authentication
        :type username: str
        :param password: The password used for authentication
        :type password: str
        :param default_prefix: The default root point
        :type default_prefix: str
        :param biospassword: biospassword for base_url if needed
        :type biospassword: str
        :param sessionkey: sessionkey for the current login of base_url
        :type sessionkey: str

        """

        self.__base_url = base_url
        self.__username = username
        self.__password = password
        self.__biospassword = biospassword
        self.__url = urlparse2.urlparse(base_url)
        self.__session_key = sessionkey
        self.__authorization_key = None
        self.__session_location = None
        self._conn = None
        self._conn_count = 0
        self.login_url = None
        self.default_prefix = default_prefix

        self.__init_connection()
        self.get_root_object()
        self.__destroy_connection()
Beispiel #2
0
    def fillDetails(self):
        sep = urlparse(self.accused)
        #protocol
        if sep.scheme != '':
            self.details["protocol"] = sep.scheme

        #hostname
        if bool(re.search(r'\d.\d.', self.without(sep.netloc))):
            self.containsIp = 1
            self.details["ipaddress"] = sep.netloc
        else:
            self.details['hostname'] = sep.netloc
            self.length['hostnameLength'] = len(sep.netloc)
            self.details['topLevelDomain'] = get_tld(self.without(sep.netloc),
                                                     fix_protocol=True)
            self.length['topLevelDomainLength'] = len(
                self.details['topLevelDomain'])
            self.details['primaryDomain'] = get_fld(self.without(sep.netloc),
                                                    fix_protocol=True)
            self.length['primaryDomainLength'] = len(
                self.details['primaryDomain'])
            try:
                self.details["ipaddress"] = socket.gethostbyname(sep.netloc)
            except:
                pass
        #path & querry
        self.details['path'] = sep.path
        self.details['query'] = sep.query
        self.details['noOfQuery'] = len(sep.query.split('&'))
        if sep.query == '':
            self.details['noOfQuery'] -= 1
        self.length['pathLength'] = len(self.details['path']) + len(
            self.details['query'])
        self.tokens()
Beispiel #3
0
    def __init__(self, base_url, username=None, password=None,
                                default_prefix='/redfish/v1/', sessionkey=None):
        """Initialization of the base class RestClientBase

        :param base_url: The URL of the remote system
        :type base_url: str
        :param username: The user name used for authentication
        :type username: str
        :param password: The password used for authentication
        :type password: str
        :param default_prefix: The default root point
        :type default_prefix: str
        :param sessionkey: session key for the current login of base_url
        :type sessionkey: str

        """

        self.__base_url = base_url
        self.__username = username
        self.__password = password
        self.__url = urlparse2.urlparse(base_url)
        self.__session_key = sessionkey
        self.__authorization_key = None
        self.__session_location = None
        self._conn = None
        self._conn_count = 0
        self.login_url = None
        self.default_prefix = default_prefix

        self.__init_connection()
        self.get_root_object()
        self.__destroy_connection()
Beispiel #4
0
    def _extract_url(self):
        u"""根据网址进行链接解析.

        Args:
            url: 待分析网页网址

        Returns:
            本链接网页内的同域名网址列表
        """
        while int(self.client.get('image_max_num')) is not 0:
            if self.client.llen('web_url_goto') is 0:
                time.sleep(1)
            else:
                url = self.client.rpop('web_url_goto')
                try:
                    html = urllib2.urlopen(url).read()
                except:
                    logging.warning("url cant open: %s" % url)
                    continue
                domain = urlparse2.urlparse(url).netloc
                web_url_list = self._extract_web_url(html, url, domain)
                image_url_list = self._extract_img_url(html, domain)
                for web_url in web_url_list:
                    if int(self.client.sismember('web_url_visited', web_url)) is 0:
                        self.client.sadd('web_url_visited', web_url)
                        self.client.lpush('web_url_goto', web_url)
                for image_url in image_url_list:
                    if int(self.client.sismember('image_url_visited', image_url)) is 0:
                        self.client.sadd('image_url_visited', image_url)
                        self.client.lpush('image_url_goto', image_url)
                        logging.info("%s--->%s" % (url, image_url))
Beispiel #5
0
 def _download_request(self, request, spider):
     proxies = {}
     proxy = request.meta.get('proxy', '')
     if proxy:
         for p in self.proxies:
             if p.find(proxy) != -1:
                 scheme = urlparse(p).scheme
                 proxies[scheme] = p
                 break
     timeout = request.meta.get('download_timeout', self.timeout)
     url = request.url
     method = request.method
     headers = headers_scrapy2dict(request.headers)
     data = request.body
     session = self._session or requests.sessions.Session()
     st = time.time()
     requests_response = session.request(method, url, headers=headers, data=data, timeout=timeout, proxies=proxies)
     et = time.time()
     cost = et - st
     request.meta['download_latency'] = cost
     headers = Headers(dict(requests_response.headers))
     respcls = responsetypes.from_args(headers=headers,
                                       url=requests_response.url,
                                       body=requests_response.content)
     response_url = requests_response.url.encode(requests_response.encoding)
     response = respcls(url=response_url,
                        status=requests_response.status_code,
                        headers=headers,
                        body=requests_response.content, )
     return response
Beispiel #6
0
    def __init__(self, url, nr_sockets, counter):

        super(Striker, self).__init__()

        self.counter = counter
        self.nr_socks = nr_sockets

        parsedUrl = urlparse2.urlparse(url)

        if parsedUrl.scheme == 'https':
            self.ssl = True

        self.host = parsedUrl.netloc.split(':')[0]
        self.url = parsedUrl.path

        self.port = parsedUrl.port

        if not self.port:
            self.port = 80 if not self.ssl else 443

        self.referers = [
            'http://www.google.com/', 'http://www.bing.com/',
            'http://www.baidu.com/', 'http://www.yandex.com/',
            'http://' + self.host + '/'
        ]
def get_json(url, wjson):
    parsed_u = urlparse(url)
    if wjson == True:
        # it's a specific .json? link
        url = "{}://{}{}.json?{}".format(parsed_u.scheme, parsed_u.netloc,
                                         parsed_u.path, parsed_u.query)
    print "Requesting {}".format(url)
    content = requests.get(url, headers=REQUEST_HEADERS)
    data = json.loads(content.content)
    return data
Beispiel #8
0
def parseFromSite(address):
        print(address)
        req = urllib2.Request(address, headers={'User-Agent' : "Magic Browser"}) 
        url = urllib2.urlopen(req).read()
        soup = BeautifulSoup(url, "lxml")
        for line in soup.find_all('a'):
                o = urlparse(address)
                try:
                        if o.hostname in line.get('href') or '.' not in line.get('href') and line.get('href') is not "":
                                if(o.hostname in line.get('href')):
                                        parseFromPage(line.get('href'))
                                else:
                                        parseFromPage(address+line.get('href'))                               
                except:
                        pass
Beispiel #9
0
    def _extract_web_url(self, html, url, domain):
        u"""从html内容中解析出同域名网址列表.

        Args:
            html: 待解析的内容
            url: 爬取页面的地址
            domain: 当前网站域名

        Return:
            html内容中的同域名网址

        """

        url_list = []
        content = BeautifulSoup(html).findAll('a')
        for item in content:
            href = item.get('href')
            ans = urlparse2.urljoin(url, href)
            ans_netloc = urlparse2.urlparse(ans).netloc
            if domain == ans_netloc:
                url_list.append(ans)
        return url_list
 def _get_msg_id_from_url(self, url):
     msg_id = ''
     try:
         msg_id = os.path.split(urlparse(url).path)[1]
     finally:
         return msg_id
Beispiel #11
0
def getID(url):

    pUrl=urlparse2.urlparse(url)
    return urlparse2.parse_qs(pUrl.query)['id'][0]
Beispiel #12
0
    def _rest_request(self, path, method='GET', args=None, body=None, \
                                                                headers=None):
        """Rest request main function

        :param path: path within tree
        :type path: str
        :param method: method to be implemented
        :type method: str
        :param args: the arguments for method
        :type args: dict
        :param body: body payload for the rest call
        :type body: dict
        :param headers: provide additional headers
        :type headers: dict
        :returns: returns a RestResponse object

        """
        headers = self._get_req_headers(headers)
        reqpath = path.replace('//', '/')

        if body is not None:
            if isinstance(body, dict) or isinstance(body, list):
                headers['Content-Type'] = u'application/json'
                body = json.dumps(body)
            else:
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(body)

            if method == 'PUT':
                resp = self._rest_request(path=path)

                try:
                    if resp.getheader('content-encoding') == 'gzip':
                        buf = StringIO()
                        gfile = gzip.GzipFile(mode='wb', fileobj=buf)

                        try:
                            gfile.write(str(body))
                        finally:
                            gfile.close()

                        compresseddata = buf.getvalue()
                        if compresseddata:
                            data = bytearray()
                            data.extend(buffer(compresseddata))
                            body = data
                except BaseException as excp:
                    LOGGER.error('Error occur while compressing body: %s', excp)
                    raise

            headers['Content-Length'] = len(body)

        if args:
            if method == 'GET':
                reqpath += '?' + urllib.urlencode(args)
            elif method == 'PUT' or method == 'POST' or method == 'PATCH':
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(args)

        restreq = RestRequest(reqpath, method=method, body=body)

        attempts = 0
        while attempts < self.MAX_RETRY:
            if logging.getLogger().isEnabledFor(logging.DEBUG):
                try:
                    LOGGER.debug('HTTP REQUEST: %s\n\tPATH: %s\n\tBODY: %s'% \
                                (restreq.method, restreq.path, restreq.body))
                except:
                    LOGGER.debug('HTTP REQUEST: %s\n\tPATH: %s\n\tBODY: %s'% \
                                (restreq.method, restreq.path, 'binary body'))
            attempts = attempts + 1
            LOGGER.info('Attempt %s of %s', attempts, path)

            try:
                while True:
                    if self._conn is None:
                        self.__init_connection()

                    self._conn.request(method.upper(), reqpath, body=body, \
                                                                headers=headers)
                    self._conn_count += 1

                    inittime = time.clock()
                    resp = self._conn.getresponse()
                    endtime = time.clock()
                    LOGGER.info('Response Time to %s: %s seconds.'% \
                                        (restreq.path, str(endtime-inittime)))

                    if resp.getheader('Connection') == 'close':
                        self.__destroy_connection()
                    if resp.status not in range(300, 399) or \
                                                            resp.status == 304:
                        break

                    newloc = resp.getheader('location')
                    newurl = urlparse2.urlparse(newloc)

                    reqpath = newurl.path
                    self.__init_connection(newurl)

                restresp = RestResponse(restreq, resp)

                try:
                    if restresp.getheader('content-encoding') == "gzip":
                        compressedfile = StringIO(restresp.text)
                        decompressedfile = gzip.GzipFile(fileobj=compressedfile)
                        restresp.text = decompressedfile.read()
                except Exception as excp:
                    LOGGER.error('Error occur while decompressing body: %s', \
                                                                        excp)
                    raise DecompressResponseError()
            except Exception as excp:
                if isinstance(excp, DecompressResponseError):
                    raise

                LOGGER.info('Retrying %s [%s]'% (path, excp))
                time.sleep(1)

                self.__init_connection()
                continue
            else:
                break

        self.__destroy_connection()
        if attempts < self.MAX_RETRY:
            if logging.getLogger().isEnabledFor(logging.DEBUG):
                headerstr = ''

                for header in restresp._http_response.msg.headers:
                    headerstr += '\t' + header.rstrip() + '\n'

                try:
                    LOGGER.debug('HTTP RESPONSE for %s:\nCode: %s\nHeaders:\n' \
                             '%s\nBody Response of %s: %s'%\
                             (restresp.request.path,\
                            str(restresp._http_response.status)+ ' ' + \
                            restresp._http_response.reason, \
                            headerstr, restresp.request.path, restresp.read))
                except:
                    LOGGER.debug('HTTP RESPONSE:\nCode:%s', (restresp))

            return restresp
        else:
            raise RetriesExhaustedError()
    def _rest_request(self, path, method='GET', args=None, body=None,
                      headers=None, optionalpassword=None,
                      providerheader=None):
        """Rest request main function

        :param path: path within tree
        :type path: str
        :param method: method to be implemented
        :type method: str
        :param args: the arguments for method
        :type args: dict
        :param body: body payload for the rest call
        :type body: dict
        :param headers: provide additional headers
        :type headers: dict
        :param optionalpassword: provide password for authentication
        :type optionalpassword: str
        :param provideheader: provider id for the header
        :type providerheader: str

        """
        headers = self._get_req_headers(headers, providerheader, \
                                                            optionalpassword)
        reqpath = path.replace('//', '/')

        if body:
            if isinstance(body, dict) or isinstance(body, list):
                headers['Content-Type'] = u'application/json'
                body = json.dumps(body)
            else:
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(body)

            if method == 'PUT':
                resp = self._rest_request(path=path)
                try:
                    if resp.getheader('content-encoding') == 'gzip':
                        buf = StringIO()
                        gfile = gzip.GzipFile(mode='wb', fileobj=buf)

                        try:
                            gfile.write(str(body))
                        finally:
                            gfile.close()

                        compresseddata = buf.getvalue()
                        if compresseddata:
                            data = bytearray()
                            data.extend(buffer(compresseddata))
                            body = data
                except BaseException as excp:
                    LOGGER.error('Error occur while compressing body: %s', excp)
                    raise

            headers['Content-Length'] = len(body)

        if args:
            if method == 'GET':
                reqpath += '?' + urllib.urlencode(args)
            elif method == 'PUT' or method == 'POST' or method == 'PATCH':
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(args)

        restreq = RestRequest(reqpath, method=method, body=body)

        attempts = 0
        while attempts < self.MAX_RETRY:

            if logging.getLogger().isEnabledFor(logging.DEBUG):
                LOGGER.debug('REQ %s', (restreq))

            attempts = attempts + 1

            try:
                while True:
                    if self._conn is None:
                        self.__init_connection()

                    self._conn.request(method.upper(), reqpath, body=body, \
                                                                headers=headers)
                    self._conn_count += 1
                    resp = self._conn.getresponse()

                    if resp.getheader('Connection') == 'close':
                        self.__destroy_connection()
                    if resp.status not in range(300, 399):
                        break

                    newloc = resp.getheader('location')
                    newurl = urlparse2.urlparse(newloc)
                    reqpath = newurl.path
                    self.__init_connection(newurl)

                restresp = RestResponse(restreq, resp)

                try:
                    if restresp.getheader('content-encoding') == "gzip":
                        compressedfile = StringIO(restresp.text)
                        decompressedfile = gzip.GzipFile(fileobj=compressedfile)
                        restresp.text = decompressedfile.read()
                except Exception as excp:
                    LOGGER.error('Error occur while decompressing body: %s', \
                                                                        excp)
                    raise DecompressResponseError()
            except Exception as excp:
                if isinstance(excp, DecompressResponseError):
                    raise

                LOGGER.info('Retrying [%s]', excp)
                time.sleep(1)

                self.__init_connection()
                continue
            else:
                break

        self.__destroy_connection()
        if attempts < self.MAX_RETRY:

            if logging.getLogger().isEnabledFor(logging.DEBUG):
                LOGGER.debug('RESP %s', (restresp))

            return restresp
        else:
            raise RetriesExhaustedError()
    def _rest_request(self, path='', method="GET", args=None, body=None,
                      headers=None, optionalpassword=None,
                      providerheader=None):
        """Rest request for blob store client

        :param path: path within tree
        :type path: str
        :param method: method to be implemented
        :type method: str
        :param args: the arguments for method
        :type args: dict
        :param body: body payload for the rest call
        :type body: dict
        :param headers: provide additional headers
        :type headers: dict
        :param optionalpassword: provide password for authentication
        :type optionalpassword: str
        :param provideheader: provider id for the header
        :type providerheader: str

        """
        headers = self._get_req_headers(headers, providerheader,
                                        optionalpassword)

        if (not self.is_redfish and
                self.default_prefix in path and path[-1] == '/'):
            path = path[0:-1]
        elif (self.is_redfish and
              self.default_prefix in path and path[-1] != '/'):
            path = path + '/'
        else:
            pass

        reqpath = path.replace('//', '/')

        if body:
            if isinstance(body, dict) or isinstance(body, list):
                headers['Content-Type'] = u'application/json'
                body = json.dumps(body)
            else:
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(body)

            if method == 'PUT':
                resp = self._rest_request(path=path)

                try:
                    if resp.getheader('content-encoding') == 'gzip':
                        buf = StringIO()
                        gfile = gzip.GzipFile(mode='wb', fileobj=buf)

                        try:
                            gfile.write(str(body))
                        finally:
                            gfile.close()

                        compresseddata = buf.getvalue()
                        if compresseddata:
                            data = bytearray()
                            data.extend(buffer(compresseddata))
                            body = data
                except BaseException as excp:
                    LOGGER.error('Error occur while compressing body: %s', excp)
                    raise

            headers['Content-Length'] = len(body)

        self._method = method
        str1 = '%s %s %s\r\n' % (method, reqpath,\
                                Blobstore2RestClient._http_vsn_str)

        str1 += 'Host: \r\n'
        str1 += 'Accept-Encoding: identity\r\n'
        for header, value in headers.iteritems():
            str1 += '%s: %s\r\n' % (header, value)

        str1 += '\r\n'

        if body and len(body) > 0:
            if isinstance(body, bytearray):
                str1 = str1.encode("ASCII") + body
            else:
                str1 += body

        bs2 = BlobStore2()
        if not isinstance(str1, bytearray):
            str1 = str1.encode("ASCII")

        resp_txt = bs2.rest_immediate(str1)

        #Dummy response to support a bad host response
        if len(resp_txt) == 0:
            resp_txt = "HTTP/1.1 500 Not Found\r\nAllow: " \
            "GET\r\nCache-Control: no-cache\r\nContent-length: " \
            "0\r\nContent-type: text/html\r\nDate: Tues, 1 Apr 2025 " \
            "00:00:01 GMT\r\nServer: " \
            "HP-iLO-Server/1.30\r\nX_HP-CHRP-Service-Version: 1.0.3\r\n\r\n\r\n"

        restreq = RestRequest(reqpath, method=method, body=body)
        rest_response = RisRestResponse(restreq, resp_txt)

        try:
            if rest_response.getheader('content-encoding') == 'gzip':
                compressedfile = StringIO(rest_response.text)
                decompressedfile = gzip.GzipFile(fileobj=compressedfile)
                rest_response.text = decompressedfile.read()
        except StandardError:
            pass

        if rest_response.status in range(300, 399):
            newloc = rest_response.getheader("location")
            newurl = urlparse2.urlparse(newloc)

            rest_response = self._rest_request(newurl.path, \
                               method, args, body, headers, \
                               optionalpassword, providerheader)

        return rest_response
Beispiel #15
0
    def _load(self, path, skipcrawl=False, originaluri=None, includelogs=False,\
                        skipinit=False, loadtype='href', loadcomplete=False):
        """Helper function to main load function.

        :param path: path to start load from.
        :type path: str.
        :param skipcrawl: flag to determine if load should traverse found links.
        :type skipcrawl: boolean.
        :param originaluri: variable to assist in determining originating path.
        :type originaluri: str.
        :param includelogs: flag to determine if logs should be downloaded also.
        :type includelogs: boolean.
        :param skipinit: flag to determine if first run of load.
        :type skipinit: boolean.
        :param loadtype: flag to determine if load is meant for only href items.
        :type loadtype: str.
        :param loadcomplete: flag to download the entire monolith
        :type loadcomplete: boolean

        """
        if path.endswith("?page=1"):
            return
        elif not includelogs:
            if "/Logs/" in path:
                return

        #TODO: need to find a better way to support non ascii characters
        path = path.replace("|", "%7C")

        #remove fragments
        newpath = urlparse2.urlparse(path)
        newpath.fragment = ''
        path = urlparse2.urlunparse(newpath)

        LOGGER.debug(u'_loading %s', path)

        if not self.reload:
            if path.lower() in self._visited_urls:
                return

        resp = self._client.get(path)

        if resp.status != 200:
            path = path + '/'
            resp = self._client.get(path)

            if resp.status == 401:
                raise SessionExpiredRis("Invalid session. Please logout and "\
                                        "log back in or include credentials.")
            elif resp.status != 200:
                return

        self.queue.put((resp, path, skipinit, self))

        if loadtype == 'href':
            #follow all the href attributes
            jsonpath_expr = jsonpath_rw.parse(u"$..'@odata.id'")
            matches = jsonpath_expr.find(resp.dict)

            if 'links' in resp.dict and 'NextPage' in resp.dict['links']:
                if originaluri:
                    next_link_uri = originaluri + '?page=' + \
                                    str(resp.dict['links']['NextPage']['page'])
                    href = u'%s' % next_link_uri

                    self._load(href, originaluri=originaluri, \
                               includelogs=includelogs, skipcrawl=skipcrawl, \
                               skipinit=skipinit)
                else:
                    next_link_uri = path + '?page=' + \
                                    str(resp.dict['links']['NextPage']['page'])

                    href = u'%s' % next_link_uri
                    self._load(href, originaluri=path, includelogs=includelogs,\
                                        skipcrawl=skipcrawl, skipinit=skipinit)

            if not skipcrawl:
                for match in matches:
                    if str(match.full_path) == "*****@*****.**" or \
                            str(match.full_path) == "*****@*****.**":
                        continue

                    if match.value == path:
                        continue

                    href = u'%s' % match.value
                    self._load(href, skipcrawl=skipcrawl, \
                           originaluri=originaluri, includelogs=includelogs, \
                           skipinit=skipinit)

            if loadcomplete:
                for match in matches:
                    self._load(match.value, skipcrawl=skipcrawl, originaluri=\
                       originaluri, includelogs=includelogs, skipinit=skipinit)
    def _rest_request(self, path='', method="GET", args=None, body=None,
                      headers=None, optionalpassword=None, providerheader=None):
        """Rest request for blob store client

        :param path: path within tree
        :type path: str
        :param method: method to be implemented
        :type method: str
        :param args: the arguments for method
        :type args: dict
        :param body: body payload for the rest call
        :type body: dict
        :param headers: provide additional headers
        :type headers: dict
        :param optionalpassword: provide password for authentication
        :type optionalpassword: str
        :param provideheader: provider id for the header
        :type providerheader: str
        :return: returns a RestResponse object

        """
        headers = self._get_req_headers(headers, providerheader, \
                                                            optionalpassword)

        if not self.is_redfish and self.default_prefix in path and \
                                                                path[-1] == '/':
            path = path[0:-1]
        elif self.is_redfish and self.default_prefix in path and \
                                                                path[-1] != '/':
            path = path + '/'
        else:
            pass

        reqpath = path.replace('//', '/')

        if body is not None:
            if isinstance(body, dict) or isinstance(body, list):
                headers['Content-Type'] = u'application/json'
                body = json.dumps(body)
            else:
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(body)

            if method == 'PUT':
                resp = self._rest_request(path=path)

                try:
                    if resp.getheader('content-encoding') == 'gzip':
                        buf = StringIO()
                        gfile = gzip.GzipFile(mode='wb', fileobj=buf)

                        try:
                            gfile.write(str(body))
                        finally:
                            gfile.close()

                        compresseddata = buf.getvalue()
                        if compresseddata:
                            data = bytearray()
                            data.extend(buffer(compresseddata))
                            body = data
                except BaseException as excp:
                    LOGGER.error('Error occur while compressing body: %s', excp)
                    raise

            headers['Content-Length'] = len(body)

        if args:
            if method == 'GET':
                reqpath += '?' + urllib.urlencode(args)
            elif method == 'PUT' or method == 'POST' or method == 'PATCH':
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(args)

        str1 = '%s %s %s\r\n' % (method, reqpath, \
                                            Blobstore2RestClient._http_vsn_str)

        str1 += 'Host: \r\n'
        str1 += 'Accept-Encoding: identity\r\n'
        for header, value in headers.iteritems():
            str1 += '%s: %s\r\n' % (header, value)

        str1 += '\r\n'

        if body and len(body) > 0:
            if isinstance(body, bytearray):
                str1 = str1.encode("ASCII") + body
            else:
                str1 += body

        bs2 = BlobStore2()
        if not isinstance(str1, bytearray):
            str1 = str1.encode("ASCII")
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            LOGGER.debug('Blobstore REQUEST: %s\n\tPATH: %s\n\tBODY: %s'% \
                         (method, path, body))

        inittime = time.clock()
        resp_txt = bs2.rest_immediate(str1)
        endtime = time.clock()

        LOGGER.info("iLO Response Time to %s: %s secs."% \
                                                (path, str(endtime-inittime)))
        #Dummy response to support a bad host response
        if len(resp_txt) == 0:
            resp_txt = "HTTP/1.1 500 Not Found\r\nAllow: " \
            "GET\r\nCache-Control: no-cache\r\nContent-length: " \
            "0\r\nContent-type: text/html\r\nDate: Tues, 1 Apr 2025 " \
            "00:00:01 GMT\r\nServer: " \
            "HP-iLO-Server/1.30\r\nX_HP-CHRP-Service-Version: 1.0.3\r\n\r\n\r\n"

        restreq = RestRequest(reqpath, method=method, body=body)
        rest_response = RisRestResponse(restreq, resp_txt)

        if rest_response.status in range(300, 399) and \
                                                    rest_response.status != 304:
            newloc = rest_response.getheader("location")
            newurl = urlparse2.urlparse(newloc)

            rest_response = self._rest_request(newurl.path, method, args, \
                               body, headers, optionalpassword, providerheader)

        try:
            if rest_response.getheader('content-encoding') == 'gzip':
                compressedfile = StringIO(rest_response.text)
                decompressedfile = gzip.GzipFile(fileobj=compressedfile)
                rest_response.text = decompressedfile.read()
        except StandardError:
            pass
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            headerstr = ''
            for header in rest_response._http_response.msg.headers:
                headerstr += '\t' + header.rstrip() + '\n'
            try:
                LOGGER.debug('Blobstore RESPONSE for %s:\nCode: %s\nHeaders:\n%s'\
                         '\nBody of %s: %s'%\
                         (rest_response.request.path,\
                        str(rest_response._http_response.status)+ ' ' + \
                        rest_response._http_response.reason, \
                        headerstr, rest_response.request.path, rest_response.read))
            except:
                LOGGER.debug('Blobstore RESPONSE for %s:\nCode:%s'% \
                             (rest_response.request.path, rest_response))
        return rest_response
Beispiel #17
0
def run_test_case(s3_client, my_test_case):
    # -----------------------
    # Download test case query
    # -----------------------
    with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as test_case_object:
        logger.info("Downloading test case from: " + 's3://' + s3_testcases_bucket + '/' + s3_testcases_path + '/' +
                    my_test_case['query'])
        try:
            s3_client.download_fileobj(s3_testcases_bucket, s3_testcases_path + '/' + my_test_case['query'],
                                   test_case_object)
        except Exception as e:
            logger.error("Failed to download S3 file object " + my_test_case['query'] + " because of error: %s" % e)
            raise e

        test_case_object.seek(0)
        test_query = test_case_object.read().replace('\n', '')
        logger.debug('Downloaded test case using temp file: ' + test_case_object.name)
    logger.debug('Test query: ' + test_query)

    # -----------------------
    # Download test fixture
    # -----------------------
    with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as test_fixture_object:
        logger.info("Downloading test result from: " + 's3://' + s3_testcases_bucket + '/' + s3_testcases_path + '/' +
                    my_test_case['fixture'])

        try:
            s3_client.download_fileobj(s3_testcases_bucket, s3_testcases_path + '/' + my_test_case['fixture'],
                                   test_fixture_object)
        except Exception as e:
            logger.error("Failed to download S3 file object " + my_test_case['fixture'] + " because of error: %s" % e)
            raise e

        test_fixture_object.seek(0)
        test_fixture = test_fixture_object.read()
        logger.debug('Downloaded test fixture using temp file: ' + test_fixture_object.name)
    logger.debug('Test fixture: ' + test_fixture)

    # -----------------------
    # Open Athena Connection
    # -----------------------
    try:
        logger.debug("Attempting to open connection to Athena")
        athena_client = boto3.client('athena')
        logger.debug("Connection to Athena successfully opened")
    except ClientError as e:
        logger.error("Failed to connect to Athena because of error: %s" % e)
        raise e
    except Exception as e:
        logger.error("Failed to connect to Athena because of error: %s" % e)
        raise e

    # -----------------------
    # Execute Athena query
    # -----------------------
    try:
        logger.debug("Attempting to submit query to Athena")
        response = athena_client.start_query_execution(
            QueryString=test_query,
            ResultConfiguration={'OutputLocation': 's3://' + s3_testruns_bucket + '/' + s3_testruns_path}
        )
        logger.debug("Query submitted to Athena successfully")
    except ClientError as e:
        if e.response['Error']['Code'] == 'InternalServerException':
            logger.error("Query failed submission to Athena due to an InternalServerException")
            raise e
        else:
            logger.error("Query failed submission to Athena due to an unexpected error: %s" % e)
            raise e
    finally:
        # Check status and log progress
        query_id = response['QueryExecutionId']
        if response['ResponseMetadata']['HTTPStatusCode'] != 200:
            logger.error("HTTP error response code: " + str(response['ResponseMetadata']['HTTPStatusCode']))
        logger.info("Query execution id: " + query_id)

    # -----------------------
    # Lookup Athena query information
    # to get exact output_location
    # -----------------------
    try:
        logger.debug("Attempting to query information about query: " + query_id)
        response = athena_client.get_query_execution(
            QueryExecutionId=query_id
        )
        logger.debug("Retrieved information about query: " + query_id)
        # Check status and log progress
        # if response['ResponseMetadata']['HTTPStatusCode'] != 200:
        #     logger.error("HTTP error response code: " + str(response['ResponseMetadata']['HTTPStatusCode']))
        output_location = response['QueryExecution']['ResultConfiguration']['OutputLocation']
        logger.debug("Athena query output location: " + output_location)
        output_url = urlparse(output_location)
        output_bucket = output_url.netloc
        output_object = output_url.path.strip("/")
        logger.debug("Parsed Athena output: Bucket=" + output_bucket + " Object=" + output_object)
    except ClientError as e:
        if e.response['Error']['Code'] == 'InternalServerException':
            logger.error("Failed to retrieve information about query: " + query_id + "due to InternalServerException")
            raise e
        else:
            logger.error("Failed to retrieve information about query: " + query_id + "due to unexpected error: %s" % e)
            raise e

    # -----------------------
    # Wait for Query Execution in S3
    # -----------------------
    logger.info("Begin waiting for Bucket=" + output_bucket + " Object=" + output_object)
    try:
        logger.debug('Creating S3 Waiter client object')
        waiter = s3_client.get_waiter('object_exists')
        logger.debug('Done Creating S3 Waiter client object')
    except ClientError as e:
        logger.error("Failed to create waiter client client because of error: %s" % e)
        raise e
    except Exception as e:
        logger.error("Failed to create waiter client client because of error: %s" % e)
        raise e

    try:
        logger.debug(
            "Creating waiter for S3 Object: Bucket=" + output_bucket + " Object=" + output_object)
        waiter.wait(
            Bucket=output_bucket,
            Key=output_object,
            WaiterConfig={
                'Delay': float(waiter_delay),
                'MaxAttempts': waiter_attempts
            }
        )
    except ClientError as e:
        logger.error("Failed to create waiter because of error: %s" % e)
        raise e
    except Exception as e:
        logger.error("Failed to create waiter because of error: %s" % e)
        raise e

    logger.debug(
        "Finished waiting for S3 Object: Bucket=" + output_bucket + " Object=" + output_object)

    # -----------------------
    # Download test result
    # -----------------------
    with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as test_result_object:
        logger.debug(
            "Downloading test result from:  Bucket=" + output_bucket + " Object=" + output_object)
        try:
            s3_client.download_fileobj(output_bucket, output_object, test_result_object)
            test_result_object.seek(0)
            test_result = test_result_object.read()
        except ClientError as e:
            logger.error("Failed to download S3 file object because of error: %s" % e)
            raise e
        except Exception as e:
            logger.error("Failed to download S3 file object because of error: %s" % e)
            raise e
        logger.debug('Downloaded test result using temp file: ' + test_result_object.name)
    logger.debug('Test results: ' + test_result)

    # Determine Diffs
    if test_fixture == test_result:
        logger.log(STATUS, 'Query \"' + my_test_case['query'] + "\" with fixture \"" + my_test_case[
            'fixture'] + '\" test passes validation')
        return 0
    else:
        logger.error('Query \"' + my_test_case['query'] + "\" with fixture \"" + my_test_case[
            'fixture'] + '\" test fails validation')
        deepdiff = DeepDiff(test_fixture, test_result)
        print ("------------------------  Begin Diff  ------------------------")
        print (deepdiff["values_changed"]["root"]["diff"].encode('utf-8'))
        print ("-------------------------  End Diff  -------------------------")
        return 1
 def get_cache_dirname(self):
     """The rest client's current base url converted to path"""
     parts = urlparse2.urlparse(self.get_base_url())
     pathstr = '%s/%s' % (parts.netloc, parts.path)
     return pathstr.replace('//', '/')
Beispiel #19
0
def parse_domain(url):
    parsed = urlparse(url)
    if parsed.scheme:
        return parsed.hostname
Beispiel #20
0
def _url2path(url):
    """ Function to convert given url to path """
    parts = urlparse2.urlparse(url)
    pathstr = '%s/%s' % (parts.netloc, parts.path)
    return pathstr.replace('//', '/')
    def _load(self, path, skipcrawl=False, originaluri=None, includelogs=False,\
                        skipinit=False, loadtype='href', loadcomplete=False):
        """Helper function to main load function.

        :param path: path to start load from.
        :type path: str.
        :param skipcrawl: flag to determine if load should traverse found links.
        :type skipcrawl: boolean.
        :param originaluri: variable to assist in determining originating path.
        :type originaluri: str.
        :param includelogs: flag to determine if logs should be downloaded also.
        :type includelogs: boolean.
        :param skipinit: flag to determine if first run of load.
        :type skipinit: boolean.
        :param loadtype: flag to determine if load is meant for only href items.
        :type loadtype: str.
        :param loadcomplete: flag to download the entire monolith
        :type loadcomplete: boolean

        """
        if path.endswith("?page=1"):
            return
        elif not includelogs:
            if "/Logs/" in path:
                return

        #TODO: need to find a better way to support non ascii characters
        path = path.replace("|", "%7C")
        #remove fragments
        newpath = urlparse2.urlparse(path)
        newpath.fragment = ''
        path = urlparse2.urlunparse(newpath)

        LOGGER.debug(u'_loading %s', path)

        if not self.reload:
            if path.lower() in self._visited_urls:
                return

        resp = self._client.get(path)

        if resp.status != 200 and path.lower() == self._client.typepath.defs.\
                                                                    biospath:
            raise BiosUnregisteredError()
        elif resp.status != 200:
            path = path + '/'
            resp = self._client.get(path)

            if resp.status == 401:
                raise SessionExpiredRis("Invalid session. Please logout and "\
                                        "log back in or include credentials.")
            elif resp.status != 200:
                return

        if loadtype == "ref":
            self.parse_schema(resp)

        self.queue.put((resp, path, skipinit, self))

        if loadtype == 'href':
            #follow all the href attributes
            if self.is_redfish:
                jsonpath_expr = jsonpath_rw.parse(u"$..'@odata.id'")
            else:
                jsonpath_expr = jsonpath_rw.parse(u'$..href')
            matches = jsonpath_expr.find(resp.dict)

            if 'links' in resp.dict and 'NextPage' in resp.dict['links']:
                if originaluri:
                    next_link_uri = originaluri + '?page=' + \
                                    str(resp.dict['links']['NextPage']['page'])
                    href = u'%s' % next_link_uri

                    self._load(href, originaluri=originaluri, \
                               includelogs=includelogs, skipcrawl=skipcrawl, \
                               skipinit=skipinit)
                else:
                    next_link_uri = path + '?page=' + \
                                    str(resp.dict['links']['NextPage']['page'])

                    href = u'%s' % next_link_uri
                    self._load(href, originaluri=path, includelogs=includelogs,\
                                        skipcrawl=skipcrawl, skipinit=skipinit)

            (newversion, dirmatch) = self.check_for_directory(matches)
            if not newversion and not skipcrawl:
                for match in matches:
                    if path == "/rest/v1":
                        if str(match.full_path) == "links.Schemas.href" or \
                                str(match.full_path) == "links.Registries.href":
                            continue
                    else:
                        if str(match.full_path) == "*****@*****.**" or \
                                str(match.full_path) == "*****@*****.**":
                            continue

                    if match.value == path:
                        continue

                    href = u'%s' % match.value
                    self._load(href, skipcrawl=skipcrawl, \
                           originaluri=originaluri, includelogs=includelogs, \
                           skipinit=skipinit)
            elif not skipcrawl:
                href = u'%s' % dirmatch.value
                self._load(href, skipcrawl=skipcrawl, originaluri=originaluri, \
                                    includelogs=includelogs, skipinit=skipinit)
            if loadcomplete:
                for match in matches:
                    self._load(match.value, skipcrawl=skipcrawl, originaluri=\
                       originaluri, includelogs=includelogs, skipinit=skipinit)
    def parse_list_page(self, response):
        multi_xpath = '/html/body/div[@id and @class="c"]'
        html5_response = response_html5parse(response)
        hxs = HtmlXPathSelector(html5_response)
        multi_hxs = hxs.select(multi_xpath)
        list_url = response.url
        query = response.meta.get('query')
        for hxs in multi_hxs:
            nick = ''.join(hxs.select('./div[1]/a//text()').extract())
            user_url = ''.join(hxs.select('./div[1]/a/@href').extract())
            user_url = urllib.unquote(user_url).strip()
            user_url_up = urlparse(user_url)
            user_url_up.query = ''
            user_url = urlunparse(user_url_up)
            div3 = hxs.select('./div[3]')
            if div3:
                content = ''.join(div3.select('.//text()').extract()[1:-10])
            else:
                content = ''.join(hxs.select('./div[1]/span//text()').extract())
            misc1 = hxs.select('.//a//text()')
            zan_count, zhuanfa_count, pinglun_count = self._ana_misc1(misc1)
            misc2 = hxs.select('.//span[@class="ct"]//text()')
            time, from_info = self._ana_misc2(misc2)
            misc3 = hxs.select('.//a[@class="cc"]/@href')
            own_msg_id, forward_msg_id = self._get_msg_id(misc3)
            own_user_id, forward_user_id = self._get_user_id(misc3)
            if forward_msg_id and forward_user_id:
                is_forward = True
                forward_msg_url1 = 'http://weibo.com/%s/%s' % (forward_user_id, forward_msg_id)
                forward_msg_url2 = 'http://weibo.cn/%s/%s' % (forward_user_id, forward_msg_id)
            else:
                is_forward = False
                forward_msg_url1 = ''
                forward_msg_url2 = ''
            doc = {
                'data_source': '新浪微博搜索',
                'nick': nick,
                'user_url': user_url,
                'content': content,
                'zan_count': zan_count,
                'zhuanfa_count': zhuanfa_count,
                'pinglun_count': pinglun_count,
                'time': time,
                'from_info': from_info,
                'own_user_id': own_user_id,
                'own_msg_id': own_msg_id,
                'own_msg_url1': 'http://weibo.com/%s/%s' % (own_user_id, own_msg_id),
                'own_msg_url2': 'http://weibo.cn/%s/%s' % (own_user_id, own_msg_id),
                'forward_user_id': forward_user_id,
                'forward_msg_id': forward_msg_id,
                'forward_msg_url1': forward_msg_url1,
                'forward_msg_url2': forward_msg_url2,
                'is_forward': is_forward,
                'sort': self.sort,
            }
            #暂不处理weibo用户的首页头像
            # user_homepage = user_url
            # if not user_homepage:
            #     next_request = None
            # else:
            #     next_request = Request(user_homepage, callback=self.parse_user_homepage)
            item = WeiboItem(doc=doc,
                             next_request=None, list_url=list_url, query=query)
            yield self.item_or_request(item)

    #暂不处理weibo用户的首页头像
    # def parse_user_homepage(self, response):
    #     item = response.meta['item']
    #     item['doc']['detail'] = response.body_as_unicode()
    #     yield self.item_or_request(item)
Beispiel #23
0
    def _rest_request(self, path='', method="GET", args=None, body=None,
                      headers=None, optionalpassword=None, providerheader=None):
        """Rest request for blob store client

        :param path: path within tree
        :type path: str
        :param method: method to be implemented
        :type method: str
        :param args: the arguments for method
        :type args: dict
        :param body: body payload for the rest call
        :type body: dict
        :param headers: provide additional headers
        :type headers: dict
        :param optionalpassword: provide password for authentication
        :type optionalpassword: str
        :param provideheader: provider id for the header
        :type providerheader: str
        :return: returns a RestResponse object

        """
        headers = self._get_req_headers(headers, providerheader, \
                                                            optionalpassword)

        reqpath = path.replace('//', '/')

        oribody = body
        if body is not None:
            if isinstance(body, dict) or isinstance(body, list):
                headers['Content-Type'] = u'application/json'
                body = json.dumps(body)
            else:
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(body)

            if method == 'PUT':
                resp = self._rest_request(path=path)

                try:
                    if resp.getheader('content-encoding') == 'gzip':
                        buf = StringIO()
                        gfile = gzip.GzipFile(mode='wb', fileobj=buf)

                        try:
                            gfile.write(str(body))
                        finally:
                            gfile.close()

                        compresseddata = buf.getvalue()
                        if compresseddata:
                            data = bytearray()
                            data.extend(buffer(compresseddata))
                            body = data
                except BaseException as excp:
                    LOGGER.error('Error occur while compressing body: %s', excp)
                    raise

            headers['Content-Length'] = len(body)

        if args:
            if method == 'GET':
                reqpath += '?' + urllib.urlencode(args)
            elif method == 'PUT' or method == 'POST' or method == 'PATCH':
                headers['Content-Type'] = u'application/x-www-form-urlencoded'
                body = urllib.urlencode(args)

        str1 = '%s %s %s\r\n' % (method, reqpath, \
                                            Blobstore2RestClient._http_vsn_str)

        str1 += 'Host: \r\n'
        str1 += 'Accept-Encoding: identity\r\n'
        for header, value in headers.iteritems():
            str1 += '%s: %s\r\n' % (header, value)

        str1 += '\r\n'

        if body and len(body) > 0:
            if isinstance(body, bytearray):
                str1 = str1.encode("ASCII") + body
            else:
                str1 += body

        bs2 = BlobStore2()
        if not isinstance(str1, bytearray):
            str1 = str1.encode("ASCII")
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            try:
                LOGGER.debug('Blobstore REQUEST: %s\n\tPATH: %s\n\tBODY: %s'% \
                         (method, path, body))
            except:
                LOGGER.debug('Blobstore REQUEST: %s\n\tPATH: %s\n\tBODY: %s'% \
                         (method, path, 'binary body'))                

        inittime = time.clock()
        resp_txt = bs2.rest_immediate(str1)
        endtime = time.clock()

        bs2.channel.close()

        LOGGER.info("iLO Response Time to %s: %s secs."% \
                                                (path, str(endtime-inittime)))
        #Dummy response to support a bad host response
        if len(resp_txt) == 0:
            resp_txt = "HTTP/1.1 500 Not Found\r\nAllow: " \
            "GET\r\nCache-Control: no-cache\r\nContent-length: " \
            "0\r\nContent-type: text/html\r\nDate: Tues, 1 Apr 2025 " \
            "00:00:01 GMT\r\nServer: " \
            "HP-iLO-Server/1.30\r\nX_HP-CHRP-Service-Version: 1.0.3\r\n\r\n\r\n"

        restreq = RestRequest(reqpath, method=method, body=body)
        rest_response = RisRestResponse(restreq, resp_txt)

        if rest_response.status in range(300, 399) and \
                                                    rest_response.status != 304:
            newloc = rest_response.getheader("location")
            newurl = urlparse2.urlparse(newloc)

            rest_response = self._rest_request(newurl.path, method, args, \
                               oribody, headers, optionalpassword, providerheader)

        try:
            if rest_response.getheader('content-encoding') == 'gzip':
                compressedfile = StringIO(rest_response.text)
                decompressedfile = gzip.GzipFile(fileobj=compressedfile)
                rest_response.text = decompressedfile.read()
        except StandardError:
            pass
        if logging.getLogger().isEnabledFor(logging.DEBUG):
            headerstr = ''
            for header in rest_response._http_response.msg.headers:
                headerstr += '\t' + header.rstrip() + '\n'
            try:
                LOGGER.debug('Blobstore RESPONSE for %s:\nCode: %s\nHeaders:\n%s'\
                         '\nBody of %s: %s'%\
                         (rest_response.request.path,\
                        str(rest_response._http_response.status)+ ' ' + \
                        rest_response._http_response.reason, \
                        headerstr, rest_response.request.path, rest_response.read))
            except:
                LOGGER.debug('Blobstore RESPONSE for %s:\nCode:%s'% \
                             (rest_response.request.path, rest_response))
        return rest_response
Beispiel #24
0
 def get_cache_dirname(self):
     """The rest client's current base URL converted to path"""
     parts = urlparse2.urlparse(self.get_base_url())
     pathstr = '%s/%s' % (parts.netloc, parts.path)
     return pathstr.replace('//', '/')
Beispiel #25
0
def parse_url(url):
    parsed = urlparse(url)
    if parsed.scheme:
        return url
    else:
        pass