Esempio n. 1
0
    def preprocess_link(self, referrer, url):
        # Modify and filter URLs before crawling
        if not url:
            return None

        fields = urlsplit(urljoin(
            referrer, url))._asdict()  # convert to absolute URLs and split
        fields['path'] = re.sub(r'/$', '',
                                fields['path'])  # remove trailing "/"
        fields['fragment'] = ''  # remove targets within a page
        fields = SplitResult(**fields)

        if fields.scheme == 'http':
            httpurl = newurl = fields.geturl()
            httpsurl = httpurl.replace('http:', 'https:', 1)
        elif fields.scheme == 'https':
            httpsurl = newurl = fields.geturl()
            httpurl = httpsurl.replace('https:', 'http:', 1)
        else:
            # Filter the URL without 'http' or 'https'
            return None

        if httpurl not in self.url_set and httpsurl not in self.url_set:
            # Filter URL that already exists in set
            return newurl
        else:
            return None
    def build_url(url):
        url_result = {UrlParser.QUERY: "", UrlParser.FRAGMENT: ""}
        if not url or UrlParser.SCHEME not in url or not url[UrlParser.SCHEME]:
            raise Exception("UrlParser:build_url", "Url dictionary is empty or missing key values")

        url_result[UrlParser.SCHEME] = url[UrlParser.SCHEME]

        if UrlParser.NETLOC in url and url[UrlParser.NETLOC]:
            if (
                UrlParser.USERNAME in url
                and url[UrlParser.USERNAME]
                and url[UrlParser.USERNAME] in url[UrlParser.NETLOC]
            ):
                url_result[UrlParser.NETLOC] = url[UrlParser.NETLOC]
        if UrlParser.NETLOC not in url_result:
            url_result[UrlParser.NETLOC] = url[UrlParser.HOSTNAME]
            if UrlParser.PORT in url and url[UrlParser.PORT]:
                url_result[UrlParser.NETLOC] += str(url[UrlParser.PORT])
            if UrlParser.USERNAME in url and url[UrlParser.USERNAME]:
                credentials = "{}@".format(url[UrlParser.USERNAME])
                if UrlParser.PASSWORD in url and url[UrlParser.PASSWORD]:
                    credentials = "{}:{}@".format(url[UrlParser.USERNAME], url[UrlParser.PASSWORD])
                url_result[UrlParser.NETLOC] = credentials + url_result[UrlParser.NETLOC]

        url_result[UrlParser.PATH] = url[UrlParser.FILENAME]
        if UrlParser.PATH in url and url[UrlParser.PATH]:
            url_result[UrlParser.PATH] = url[UrlParser.PATH] + "/" + url_result[UrlParser.PATH]
            url_result[UrlParser.PATH] = re.sub("//+", "/", url_result[UrlParser.PATH])

        if UrlParser.QUERY in url and url[UrlParser.QUERY]:
            url_result[UrlParser.QUERY] = url[UrlParser.QUERY]

        result = SplitResult(**url_result)
        return result.geturl()
    def build_url(url):
        url_result = {UrlParser.QUERY: '', UrlParser.FRAGMENT: ''}
        if not url or UrlParser.SCHEME not in url or not url[UrlParser.SCHEME]:
            raise Exception('UrlParser:build_url', 'Url dictionary is empty or missing key values')

        url_result[UrlParser.SCHEME] = url[UrlParser.SCHEME]

        if UrlParser.NETLOC in url and url[UrlParser.NETLOC]:
            if UrlParser.USERNAME in url \
                    and url[UrlParser.USERNAME] \
                    and url[UrlParser.USERNAME] in url[UrlParser.NETLOC]:
                url_result[UrlParser.NETLOC] = url[UrlParser.NETLOC]
        if UrlParser.NETLOC not in url_result:
            url_result[UrlParser.NETLOC] = url[UrlParser.HOSTNAME]
            if UrlParser.PORT in url and url[UrlParser.PORT]:
                url_result[UrlParser.NETLOC] += str(url[UrlParser.PORT])
            if UrlParser.USERNAME in url and url[UrlParser.USERNAME]:
                credentials = '{}@'.format(url[UrlParser.USERNAME])
                if UrlParser.PASSWORD in url and url[UrlParser.PASSWORD]:
                    credentials = '{}:{}@'.format(url[UrlParser.USERNAME], url[UrlParser.PASSWORD])
                url_result[UrlParser.NETLOC] = credentials + url_result[UrlParser.NETLOC]

        url_result[UrlParser.PATH] = url[UrlParser.FILENAME]
        if UrlParser.PATH in url and url[UrlParser.PATH]:
            url_result[UrlParser.PATH] = url[UrlParser.PATH] + '/' + url_result[UrlParser.PATH]
            url_result[UrlParser.PATH] = re.sub('//+', '/', url_result[UrlParser.PATH])

        if UrlParser.QUERY in url and url[UrlParser.QUERY]:
            url_result[UrlParser.QUERY] = url[UrlParser.QUERY]

        result = SplitResult(**url_result)
        return result.geturl()
Esempio n. 4
0
	def _parseurl(self, url):
		ret = urlsplit(url)
		self.username = ret.username
		self.password = ret.password
		if ret.port <> None:
			n = SplitResult(ret.scheme, ret.hostname + ":" + ret.port.__str__(), ret.path, ret.query, ret.fragment)
		else:
			n = SplitResult(ret.scheme, ret.hostname, ret.path, ret.query, ret.fragment)
		self.url = n.geturl()
Esempio n. 5
0
def clean_link(url):
    o = urlsplit(url)
    if not o.scheme.lower() in ALLOWED_URL_SCHEMES:
        return None
    o = SplitResult(o.scheme, o.netloc, o.path, o.query, '')
    while o.query and __utm_matcher.search(o.query):
        query = __utm_matcher.sub('', o.query)
        o = SplitResult(o.scheme, o.netloc, o.path, query, '')
    return o.geturl()
Esempio n. 6
0
    def urlsplit(url, scheme="", allow_fragments=True):
        """Parse a URL into 5 components:
        <scheme>://<netloc>/<path>?<query>#<fragment>
        Return a 5-tuple: (scheme, netloc, path, query, fragment).
        Note that we don't break the components up in smaller bits
        (e.g. netloc is a single string) and we don't expand % escapes."""
        allow_fragments = bool(allow_fragments)
        key = url, scheme, allow_fragments, type(url), type(scheme)
        cached = _parse_cache.get(key, None)
        if cached:
            return cached
        if len(_parse_cache) >= MAX_CACHE_SIZE:  # avoid runaway growth
            clear_cache()
        netloc = query = fragment = ""
        i = url.find(":")
        if i > 0:
            if url[:i] == "http":  # optimize the common case
                scheme = url[:i].lower()
                url = url[i + 1 :]
                if url[:2] == "//":
                    netloc, url = _splitnetloc(url, 2)
                    if ("[" in netloc and "]" not in netloc) or (
                        "]" in netloc and "[" not in netloc
                    ):
                        raise ValueError("Invalid IPv6 URL")
                if allow_fragments and "#" in url:
                    url, fragment = url.split("#", 1)
                if "?" in url:
                    url, query = url.split("?", 1)
                v = SplitResult(scheme, netloc, url, query, fragment)
                _parse_cache[key] = v
                return v
            for c in url[:i]:
                if c not in scheme_chars:
                    break
            else:
                # make sure "url" is not actually a port number (in which case
                # "scheme" is really part of the path)
                rest = url[i + 1 :]
                if not rest or any(c not in "0123456789" for c in rest):
                    # not a port number
                    scheme, url = url[:i].lower(), rest

        if url[:2] == "//":
            netloc, url = _splitnetloc(url, 2)
            if ("[" in netloc and "]" not in netloc) or (
                "]" in netloc and "[" not in netloc
            ):
                raise ValueError("Invalid IPv6 URL")
        if allow_fragments and "#" in url:
            url, fragment = url.split("#", 1)
        if "?" in url:
            url, query = url.split("?", 1)
        v = SplitResult(scheme, netloc, url, query, fragment)
        _parse_cache[key] = v
        return v
Esempio n. 7
0
def with_port(url_str):
    try:
        port = settings.PORT
    except AttributeError:
        port = None
    if port == 80:
        port = None
    url_split = urlsplit(url_str)
    if port:
        if not url_split.port and url_split.netloc:
            scheme, netloc, url, query, fragment = url_split
            netloc += ":%s" % port
            url_split = SplitResult(scheme, netloc, url, query, fragment)
    return url_split.geturl()
Esempio n. 8
0
def with_port(url_str):
    try:
        port = settings.PORT
    except AttributeError:
        port = None
    if port == 80:
        port = None
    url_split = urlsplit(url_str)
    if port:
        if not url_split.port and url_split.netloc:
            scheme, netloc, url, query, fragment = url_split
            netloc += ":%s" % port
            url_split = SplitResult(scheme, netloc, url, query, fragment)
    return url_split.geturl()
Esempio n. 9
0
    def assertApiUrlEqual(self, *args, **kwargs):
        """
        Allows equality comparison of two or more URLs agnostic of API version.
        This is done by prepending '/api/vx' (where x is equal to the `version`
        keyword argument or API_CURRENT_VERSION) to each string passed as a
        positional argument if that URL doesn't already start with that string.

        Example usage:

        url = '/api/v1/apps/app/bastacorp/'
        self.assertApiUrlEqual(url, '/apps/app/bastacorp1/')

        # settings.API_CURRENT_VERSION = 2
        url = '/api/v1/apps/app/bastacorp/'
        self.assertApiUrlEqual(url, '/apps/app/bastacorp/', version=1)
        """
        PATH = 2
        version = kwargs.get('version', settings.API_CURRENT_VERSION)
        urls = list(args)
        prefix = '/api/v%d' % version
        for idx, url in enumerate(urls):
            urls[idx] = list(urlsplit(url))
            if not urls[idx][PATH].startswith(prefix):
                urls[idx][PATH] = prefix + urls[idx][PATH]
            urls[idx] = SplitResult(*urls[idx])
        eq_(*urls)
def _urlsplit(url, scheme="", allow_fragments=True):
    """
    Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes.
    """
    if _coerce_args:
        url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ""
    i = url.find(":")
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            start = i + 1
            scheme, url = url[:i].lower(), url[start:]

    if url[:2] == "//":
        netloc, url = _splitnetloc(url, 2)
        if ("[" in netloc and "]" not in netloc) or (
            "]" in netloc and "[" not in netloc
        ):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and "#" in url:
        url, fragment = url.split("#", 1)
    if "?" in url:
        url, query = url.split("?", 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v) if _coerce_args else v
Esempio n. 11
0
def _request(scheme=None, netloc=None, path=None, query=None, fragment=None):
    split = SplitResult(scheme=scheme,
                        netloc=netloc,
                        path=path,
                        query=query,
                        fragment=None)
    return urlunsplit(split)
Esempio n. 12
0
def _urlsplit(url, scheme='', allow_fragments=True):
    """Parse a URL into 5 components:
    <scheme>://<netloc>/<path>?<query>#<fragment>
    Return a 5-tuple: (scheme, netloc, path, query, fragment).
    Note that we don't break the components up in smaller bits
    (e.g. netloc is a single string) and we don't expand % escapes."""
    if _coerce_args:
        url, scheme, _coerce_result = _coerce_args(url, scheme)
    allow_fragments = bool(allow_fragments)
    netloc = query = fragment = ''
    i = url.find(':')
    if i > 0:
        for c in url[:i]:
            if c not in scheme_chars:
                break
        else:
            scheme, url = url[:i].lower(), url[i + 1:]

    if url[:2] == '//':
        netloc, url = _splitnetloc(url, 2)
        if (('[' in netloc and ']' not in netloc)
                or (']' in netloc and '[' not in netloc)):
            raise ValueError("Invalid IPv6 URL")
    if allow_fragments and '#' in url:
        url, fragment = url.split('#', 1)
    if '?' in url:
        url, query = url.split('?', 1)
    v = SplitResult(scheme, netloc, url, query, fragment)
    return _coerce_result(v) if _coerce_args else v
Esempio n. 13
0
def wrapped_url(api_url):
    if api_url:
        parsed = urlsplit(api_url)
        path = parsed.path.replace('.json', '').replace('api/','').replace('pages/','').strip('/')
        path = reverse("web_page_wrapper", args=[path])
        return SplitResult('', '', path, parsed.query, parsed.fragment).geturl()
    else:
        return api_url
Esempio n. 14
0
 def build(self):
     query = "&".join({"{}={}".format(k, v) for k, v in self.params.items()})
     return urlunsplit(SplitResult(
         scheme=self.scheme,
         netloc=self.netloc,
         path=self.path,
         query=query,
         fragment=None))
Esempio n. 15
0
File: page.py Progetto: dskard/chx
    def _populate_urlsplit_tuple(self,
                                 scheme='',
                                 netloc='',
                                 path='',
                                 query='',
                                 fragment=''):

        return SplitResult(scheme, netloc, path, query, fragment)
Esempio n. 16
0
def find_next_indexes(soup):
    '''
    next page for an album index or an album page
    '''
    indexes = soup.findAll('a', 'pix-navi-page')
    urls = []
    if indexes:
        max_p = max([int(tag.string) for tag in indexes if tag.string.isdigit()])
        result = urlsplit(httplib.html_unescape(indexes[0]['href']))
        #i don't want patch urllib.unquote. bug description: http://bugs.python.org/issue1712522
        #quick fix is convert to ascii.
        query_dict = parse_qs(result.query.encode('ascii'))
        for p in range(1, max_p + 1):
            query_dict['p'] = p
            result = SplitResult(result.scheme, result.netloc, result.path,
                                 urlencode(query_dict, doseq=True), result.fragment)
            urls.append(result.geturl())
    return urls
Esempio n. 17
0
    def __init__(self, url):
        """
        """
        # urlsplit will parse what it can from the provided string.
        raw = urlsplit(url)

        if not raw.path:
            raise ValueError("Invalid argument for MIB source: %s" % url)

        scheme = raw.scheme
        if not scheme:
            scheme = "file" if not raw.netloc else "http"

        path = raw.path
        if scheme == "file" and not path.startswith("/"):
            path = os.path.abspath("./" + path)

        cooked = SplitResult(scheme, raw.netloc, path, raw.query, raw.fragment)
        self._url = cooked.geturl()
        self._scheme = scheme
        self._path = cooked.path
        self._filename = os.path.split(cooked.path)[-1]
Esempio n. 18
0
    def __init__(self, url):
        """
        """
        # urlsplit will parse what it can from the provided string.
        raw = urlsplit(url)

        if not raw.path:
            raise ValueError("Invalid argument for MIB source: %s" % url)

        scheme = raw.scheme
        if not scheme:
            scheme = "file" if not raw.netloc else "http"

        path = raw.path
        if scheme == "file" and not path.startswith("/"):
            path = os.path.abspath("./" + path)

        cooked = SplitResult(scheme, raw.netloc, path, raw.query, raw.fragment)
        self._url = cooked.geturl()
        self._scheme = scheme
        self._path = cooked.path
        self._filename = os.path.split(cooked.path)[-1]
Esempio n. 19
0
def hide_thunder(url):
    ## tweak image display settings..
    url_parts = urlsplit(url)  # parse query from url
    url_query_args = parse_qs(url_parts.query)  # parse our query args
    pcmd_list = list(url_query_args["pcmd"][0])  # split into list for editing
    pcmd_list[9] = "0"  # disable Thunder display
    url_query_args["pcmd"] = "".join(pcmd_list)  # save it back
    fixed_url = urlunsplit(
        SplitResult(scheme="",
                    netloc="",
                    path=url_parts.path,
                    query=urlencode(url_query_args, True),
                    fragment=""))  # re-join url parts
    return fixed_url
Esempio n. 20
0
def get_scrape_url(tracker_url, info_hash):
    if 'announce' in tracker_url:
        v = urlsplit(tracker_url)
        sr = SplitResult(v.scheme, v.netloc,
                         v.path.replace('announce', 'scrape'), v.query,
                         v.fragment)
        result = urlunsplit(sr)
    else:
        log.debug(
            '`announce` not contained in tracker url, guessing scrape address.'
        )
        result = tracker_url + '/scrape'

    result += '&' if '?' in result else '?'
    result += 'info_hash=%s' % quote(info_hash.decode('hex'))
    return result
Esempio n. 21
0
 def _login(self):
     login_url = urljoin(self.base_url, '/manage_main')
     # would be the propper way to login, but is not supported by geckodriver/ chromedriver yet
     # self.driver.switch_to.alert.authenticate(self.login, self.password)
     # Disabled because it works only in firefox
     # if self.driver == 'Firefox':
     #     self.driver.get(login_url)
     #     self.driver.switch_to.alert.send_keys(self.login + Keys.TAB + self.password)
     #     self.driver.switch_to.alert.accept()
     # else:
     components = urlsplit(login_url)
     credentials = '%s:%s@' % (self.login, self.password)
     components_with_auth = SplitResult(components[0],
                                        credentials + components[1],
                                        *components[2:])
     self.driver.get(urlunsplit(components_with_auth))
Esempio n. 22
0
    def assertApiUrlEqual(self, *args, **kwargs):
        """
        Allows equality comparison of two or more URLs agnostic of API version.
        This is done by prepending '/api/vx' (where x is equal to the `version`
        keyword argument or API_CURRENT_VERSION) to each string passed as a
        positional argument if that URL doesn't already start with that string.
        Also accepts 'netloc' and 'scheme' optional keyword arguments to
        compare absolute URLs.

        Example usage:

        url = '/api/v1/apps/app/bastacorp/'
        self.assertApiUrlEqual(url, '/apps/app/bastacorp1/')

        # settings.API_CURRENT_VERSION = 2
        url = '/api/v1/apps/app/bastacorp/'
        self.assertApiUrlEqual(url, '/apps/app/bastacorp/', version=1)
        """
        # Constants for the positions of the URL components in the tuple
        # returned by urlsplit. Only here for readability purposes.
        SCHEME = 0
        NETLOC = 1
        PATH = 2

        version = kwargs.get('version', settings.API_CURRENT_VERSION)
        scheme = kwargs.get('scheme', None)
        netloc = kwargs.get('netloc', None)
        urls = list(args)
        prefix = '/api/v%d' % version
        for idx, url in enumerate(urls):
            urls[idx] = list(urlsplit(url))
            if not urls[idx][PATH].startswith(prefix):
                urls[idx][PATH] = prefix + urls[idx][PATH]
            if scheme and not urls[idx][SCHEME]:
                urls[idx][SCHEME] = scheme
            if netloc and not urls[idx][NETLOC]:
                urls[idx][NETLOC] = netloc
            urls[idx] = SplitResult(*urls[idx])
        eq_(*urls)
Esempio n. 23
0
    def pdf_echo_loopback(self):
        '''
        Adjunct endpoint used with above PDF test echo page that proxies the
        generated PDF back to the test page
        '''
        
        # set default PDF server URI
        pdfServerUri = '%s://%s/services/pdfserver/renderpdf' % (splunk.getDefault('protocol'), cherrypy.config.get('mgmtHostPort'))

        # get alternate PDF server URI; values seem to be varied so we normalize
        alertSettings = en.getEntity('configs/conf-alert_actions', 'email', namespace='search')
        if alertSettings.get('reportServerURL') and alertSettings['reportServerURL'].strip():
            pdfServerUri = alertSettings['reportServerURL'].strip()
            url = urlsplit(pdfServerUri)
            if len(url.path)<2:
                url = url._asdict()
                url['path'] = '/services/pdfserver/renderpdf'
                pdfServerUri = urlunsplit(SplitResult(**url))
        
        # determine the external address that is most likely accessible
        urlparts = urlparse.urlparse(pdfServerUri)

        ai = socket.getaddrinfo(urlparts.hostname, int(urlparts.port or 80), socket.AF_UNSPEC, socket.SOCK_STREAM, 0, socket.AI_PASSIVE)[0]
        af, socktype, proto, canonname, hostport = ai

        appserverHost = alertSettings.get('hostname') and alertSettings['hostname'].strip()
        if appserverHost:
            logger.info('using configured appserver hostname "%s"' % appserverHost)
        else:
            s = socket.socket(af, socktype, proto)
            s.connect(hostport)
            sockname = s.getsockname()
            logger.info('most promising interface looks like %s' % sockname[0])
            appserverHost = sockname[0]

        appserverProtocol = 'https' if splunk.util.normalizeBoolean(cherrypy.config.get('enableSplunkWebSSL', False)) else 'http'

        # create a fake sso-bypass session utilizing the user's current sessionKey
        active_session = cherrypy.serving.session
        session_args = ('timeout', 'clean_freq', 'storage_path', 'servers')
        args = dict([ (arg_name, getattr(active_session, arg_name)) for arg_name in session_args if hasattr(active_session, arg_name)])
        fake_session = cherrypy.serving.session.__class__(**args)
        fake_session['sessionKey'] = cherrypy.session['sessionKey']
        fake_session['SSO_DISABLE'] = 1
        fake_session.save()
        fake_session.release_lock()

        # set GET args
        args = {
            'target': '%s://%s:%s%s/debug/pdf_echo' % (
                appserverProtocol, 
                appserverHost if af == socket.AF_INET else '[%s]' % appserverHost, 
                cherrypy.config['httpport'],
                cherrypy.request.script_name
                ),
            'mode': 'default',
            'session': fake_session.id
        }

        # fetch the SSL certificate, if any
        cert = cherrypy.request.app.root.report.get_cert()
        if cert:
            args['cert'] = cert
        
        logger.info('Testing PDF server=%s on URI=%s' % (pdfServerUri, args['target']))
        
        # make a request to the registered PDF server for the echo page
        timeout = 20
        h = httplib2.Http(timeout=timeout, disable_ssl_certificate_validation=True)
        start = time.time()
        try:
            serverResponse, serverContent = h.request(pdfServerUri, method='POST', body=urllib.urlencode(args))
        except:
            if time.time() - start > (timeout-1):
                cherrypy.response.headers['content-type'] = 'text/plain'
                return "Timed out while waiting for a response"
            raise
                
        cherrypy.response.headers['content-type'] = 'application/pdf'
        return serverContent
Esempio n. 24
0
    def requestPDF(self, **kw):
        """
        Expects a valid splunk session key to be passed in along with the url to be rendered to PDF
        Complete parameter list:
        session_key (required)
        request_path (required)
        paperSize - 'a4', 'letter', etc or dimensions in mm '200x400' - default 'letter'
        orientation - 'portrait' or 'landscape' - default 'portrait'
        title - Title of report - default 'Splunk Report'
        override_disposition
        owner
        """

        request_path = kw.get('request_path')
        if not request_path:
            raise SimpleError(400, "Invalid request_path supplied")

        print_session_key = kw.get('session_key')
        if not print_session_key:
            if cherrypy.config.get('debug_report_server'):
                logger.warn('Using debug user for report server')
                print_session_key = splunk.auth.getSessionKey(
                    'admin', 'changeme', hostPath=self.splunkd_urlhost)
            else:
                raise SimpleError(400, "Invalid session key supplied")

        settings = en.getEntity(ALERT_ACTIONS_ENTITY,
                                'email',
                                namespace='system',
                                sessionKey=print_session_key,
                                owner='nobody')

        enabled = splunk.util.normalizeBoolean(
            settings.get('reportServerEnabled'))
        if not enabled:
            raise SimpleError(400, 'PDF server is not enabled')

        report_server_url = settings.get('reportServerURL')
        if isinstance(report_server_url, basestring):
            report_server_url = report_server_url.strip()
            url = urlsplit(report_server_url)
            if url.netloc and len(url.path) < 2:
                # user has specified the protocol://host:port only
                url = url._asdict()
                url['path'] = DEFAULT_SERVICES_URL
                report_server_url = urlunsplit(SplitResult(**url))
        elif report_server_url is None:
            report_server_url = DEFAULT_SERVICES_URL
        else:
            raise SimpleError(500, "reportServerURL is invalid")
        if not report_server_url:
            report_server_url = DEFAULT_SERVICES_URL

        papersize = kw.get('papersize')
        if not papersize:
            papersize = settings.get('reportPaperSize', 'letter')

        orientation = kw.get('orientation')
        if not orientation:
            orientation = settings.get('reportPaperOrientation', 'portrait')

        title = kw.get('title')
        if not title:
            title = settings.get('reportTitle', _('Splunk Report'))

        owner = kw.get('owner', 'nobody')
        print_session = self.build_session(owner, print_session_key)

        try:
            data = {
                'session':
                print_session.id,
                'target':
                request_path,
                'papersize':
                papersize,
                'orientation':
                orientation,
                'title':
                title,
                'footer_right':
                _('Generated by Splunk at %(time)s') % dict(time='&D'),
                'mode':
                'splunk'
            }

            # see if splunkweb is running in SSL mode; if so pass the certificate to the pdf server
            cert = self.get_cert()
            if cert:
                data['cert'] = cert

            try:
                logger.info("Appserver dispatching report request to '%s'" %
                            report_server_url)
                server_response, server_content = splunk.rest.simpleRequest(
                    report_server_url, postargs=data, rawResult=True)
            except Exception, e:
                logger.error(
                    "Appserver failed to dispatch report request to %s: %s" %
                    (report_server_url, e))
                raise SimpleError(
                    500,
                    "Appserver failed to dispatch report request to %s: %s" %
                    (report_server_url, e))

            if server_response.status == 404:
                logger.error(
                    "Appserver got a 404 response while contacting the PDF server at %s - Check that the PDF Server app is installed and that reportServerURL is correct"
                    % report_server_url)
                raise SimpleError(
                    500,
                    "Appserver got a 404 response while contacting the PDF server at %s - Check that the PDF Server app is installed and that reportServerURL is correct"
                    % report_server_url)
            elif server_response.status != 200:
                if server_content and server_content[0] == '>':
                    logger.error(
                        "Appserver received error from PDF server at %s: %s" %
                        (report_server_url, server_content[1:]))
                    raise SimpleError(
                        server_response.status,
                        "PDF server at %s returned error: %s" %
                        (report_server_url, server_content[1:]))
                logger.error(
                    "Appserver failed to dispatch report request to %s: %s - %s"
                    % (report_server_url, server_response.status,
                       server_response.reason))
                raise SimpleError(
                    500,
                    "Appserver failed to dispatch report request to %s: %s %s"
                    % (report_server_url, server_response.status,
                       server_response.reason))

            # relay the response through to the requester
            cherrypy.response.headers['content-type'] = server_response[
                'content-type']
            cherrypy.response.headers['content-length'] = server_response[
                'content-length']
            if kw.get('override_disposition'):
                cherrypy.response.headers['content-disposition'] = kw[
                    'override_disposition']
            elif 'content-disposition' in server_response:
                cherrypy.response.headers[
                    'content-disposition'] = server_response[
                        'content-disposition']
            cherrypy.response.body = server_content
            return cherrypy.response.body
Esempio n. 25
0
from circonus.collectd.df import get_df_graph_data
from circonus.collectd.graph import get_collectd_graph_data
from circonus.collectd.memory import get_memory_graph_data
from circonus.collectd.interface import get_interface_graph_data
from circonus.tag import get_tags_with, get_telemetry_tag, is_taggable
from requests import codes as status_codes
from requests.exceptions import HTTPError

import requests

API_PROTOCOL = "https"
API_LOCATION = "api.circonus.com"
API_VERSION = 2
API_BASE_SPLIT = SplitResult(scheme=API_PROTOCOL,
                             netloc=API_LOCATION,
                             path="/v%d" % API_VERSION,
                             query="",
                             fragment="")
API_BASE_URL = urlunsplit(API_BASE_SPLIT)

log = logging.getLogger(__name__)


def get_api_url(resource_type_or_cid):
    """Get a valid fully qualified Circonus API URL for the given resource type or ``cid``.

    :param str resource_type_or_cid: The resource type or ``cid`` representing a specific resource.
    :return: The API URL.
    :rtype: :py:class:`str`

    """
Esempio n. 26
0
    def handle_distrib(self, message):
        """React to a file dispatch message.
        """

        pathname1, pathname2 = message.split(" ")
        dummy, filename = os.path.split(pathname1)
        # TODO: Should not make any assumptions on filename formats, should
        # load a description of it from a config file instead.
        if pathname1.endswith(".hmf"):
            risestr, satellite = filename[:-4].split("_", 1)
            risetime = datetime.strptime(risestr, "%Y%m%d%H%M%S")
            pname = pass_name(risetime, satellite)
            swath = self._received_passes.get(pname, {
                "satellite": satellite,
                "start_time": risetime
            })
            swath["type"] = "binary"
            if satellite == "FENGYUN_1D":
                swath["format"] = "CHRPT"
            else:
                swath["format"] = "HRPT"
                swath["instrument"] = ("avhrr/3", "mhs", "amsu")
            swath["level"] = "0"

        elif filename.startswith("P042") or filename.startswith("P154"):
            pds = {}
            pds["format"] = filename[0]
            pds["apid1"] = filename[1:8]
            pds["apid2"] = filename[8:15]
            pds["apid3"] = filename[15:22]
            pds["time"] = datetime.strptime(filename[22:33], "%y%j%H%M%S")
            pds["nid"] = filename[33]
            pds["ufn"] = filename[34:36]
            pds["extension"] = filename[36:40]
            if pds["apid1"][:3] == "042":
                satellite = "TERRA"
            elif pds["apid1"][:3] == "154":
                satellite = "AQUA"
            else:
                raise ValueError("Unrecognized satellite ID: " +
                                 pds["apid1"][:3])
            risetime = pds["time"]
            pname = pass_name(risetime, satellite)
            swath = self._received_passes.get(pname, {
                "satellite": satellite,
                "start_time": risetime
            })
            instruments = {
                "0064": "modis",
                "0141": "ceres+y",
                "0157": "ceres-y",
                "0261": "amsu-a1",
                "0262": "amsu-a1",
                "0290": "amsu-a2",
                "0342": "hsb",
                "0402": "amsr-e",
                "0404": "airs",
                "0405": "airs",
                "0406": "airs",
                "0407": "airs",
                "0414": "airs",
                "0415": "airs",
                "0419": "airs",
                "0957": "gbad",
            }
            swath["instrument"] = instruments.get(pds["apid1"][3:],
                                                  pds["apid1"][3:])
            swath["format"] = "PDS"
            swath["type"] = "binary"
            swath["level"] = "0"
            swath["number"] = int(pds["ufn"])

        # NPP RDRs
        elif filename.startswith("R") and filename.endswith(".h5"):
            # Occassionaly RT-STPS produce files with a nonstandard file
            # naming, lacking the 'RNSCA' field. We will try to deal with this
            # below (Adam - 2013-06-04):
            mda = {}
            idx_start = 0
            mda["format"] = filename[0]
            if filename.startswith("RATMS-RNSCA"):
                mda["instrument"] = "atms"
            elif filename.startswith("RCRIS-RNSCA"):
                mda["instrument"] = "cris"
            elif filename.startswith("RNSCA-RVIRS"):
                mda["instrument"] = "viirs"
            else:
                if filename.startswith("RATMS_npp"):
                    mda["instrument"] = "atms"
                elif filename.startswith("RCRIS_npp"):
                    mda["instrument"] = "cris"
                else:
                    logger.warning("Seems to be a NPP/JPSS RDR " +
                                   "file but name is not standard!")
                    logger.warning("filename = " + filename)
                    return None
                idx_start = -6

            mda["start_time"] = datetime.strptime(
                filename[idx_start + 16:idx_start + 33], "d%Y%m%d_t%H%M%S")
            end_time = datetime.strptime(
                filename[idx_start + 16:idx_start + 25] + " " +
                filename[idx_start + 35:idx_start + 42], "d%Y%m%d e%H%M%S")
            mda["orbit"] = filename[idx_start + 45:idx_start + 50]
            # FIXME: swath start and end time is granule dependent.
            # Get the end time as well! - Adam 2013-06-03:
            satellite = "NPP"
            start_time = mda["start_time"]
            pname = pass_name(start_time, satellite)

            swath = self._received_passes.get(pname, {
                "satellite": satellite,
                "start_time": start_time
            })
            swath['end_time'] = end_time
            swath["instrument"] = mda["instrument"]
            swath["format"] = "RDR"
            swath["type"] = "HDF5"
            swath["level"] = "0"

        # metop
        elif filename[4:12] == "_HRP_00_":
            instruments = {
                "AVHR": "avhrr",
                "ASCA": "ascat",
                "AMSA": "amsu-a",
                "ASCA": "ascat",
                "ATOV": "atovs",
                "AVHR": "avhrr/3",
                "GOME": "gome",
                "GRAS": "gras",
                "HIRS": "hirs/4",
                "IASI": "iasi",
                "MHSx": "mhs",
                "SEMx": "sem",
                "ADCS": "adcs",
                "SBUV": "sbuv",
                "HKTM": "vcdu34"
            }

            satellites = {"M02": "METOP-A", "M01": "METOP-B"}

            satellite = satellites[filename[12:15]]
            risetime = datetime.strptime(filename[16:31], "%Y%m%d%H%M%SZ")
            #falltime = datetime.strptime(filename[16:47], "%Y%m%d%H%M%SZ")

            pname = pass_name(risetime, satellite)
            swath = self._received_passes.get(pname, {
                "satellite": satellite,
                "start_time": risetime
            })
            swath["instrument"] = instruments[filename[:4]]
            swath["format"] = "EPS"
            swath["type"] = "binary"
            swath["level"] = "0"
        else:
            return None

        if pathname2.endswith(filename):
            uri = pathname2
        else:
            uri = os.path.join(pathname2, filename)

        url = urlsplit(uri)
        if url.scheme in ["", "file"]:
            scheme = "ssh"
            netloc = self._emitter
            uri = urlunsplit(
                SplitResult(scheme, netloc, url.path, url.query, url.fragment))
        elif url.scheme == "ftp":
            scheme = "ssh"
            netloc = url.hostname
            uri = urlunsplit(
                SplitResult(scheme, netloc, url.path, url.query, url.fragment))
        swath["filename"] = os.path.split(url.path)[1]
        swath["uri"] = uri
        return swath
Esempio n. 27
0
def main(argv=None):
    """Load data into a Cheshire3 database based on parameters in argv."""
    global argparser, session, server, db
    if argv is None:
        args = argparser.parse_args()
    else:
        args = argparser.parse_args(argv)
    if irods is None:
        raise MissingDependencyException('icheshire3-load script',
                                         'irods (PyRods)'
                                         )
    session = Session()
    server = SimpleServer(session, args.serverconfig)
    if args.database is None:
        try:
            dbid = identify_database(session, os.getcwd())
        except EnvironmentError as e:
            server.log_critical(session, e.message)
            return 1
        server.log_debug(
            session,
            "database identifier not specified, discovered: {0}".format(dbid))
    else:
        dbid = args.database

    try:
        db = server.get_object(session, dbid)
    except ObjectDoesNotExistException:
        msg = """Cheshire3 database {0} does not exist.
Please provide a different database identifier using the --database option.
""".format(dbid)
        server.log_critical(session, msg)
        return 2
    else:
        # Allow for multiple data arguments
        docFac = db.get_object(session, 'defaultDocumentFactory')
        for dataArg in args.data:
            if dataArg.startswith('irods://'):
                parsed = urlsplit(dataArg)
            else:
                # Examine current environment
                status, myEnv = irods.getRodsEnv()
                try:
                    host = myEnv.getRodsHost()
                except AttributeError:
                    host = myEnv.rodsHost
                # Port
                try:
                    myEnv.getRodsPort()
                except AttributeError:
                    port = myEnv.rodsPort
                # User
                try:
                    username = myEnv.getRodsUserName()
                except AttributeError:
                    username = myEnv.rodsUserName
                netloc = '{0}@{1}:{2}'.format(username, host, port)
                try:
                    cqm = myEnv.getRodsCwd()
                except AttributeError:
                    cwd = myEnv.rodsCwd
                path = '/'.join([cwd, dataArg])
                parsed = SplitResult('irods', netloc, path, None, None)
                dataArg = urlunsplit(parsed)
            server.log_debug(session, dataArg)
            if args.format is None or not args.format.startswith('i'):
                fmt = 'irods'
            else:
                fmt = args.format
            server.log_debug(session, fmt)
            try:
                docFac.load(session, dataArg,
                            args.cache, fmt, args.tagname, args.codec)
            except MissingDependencyException as e:
                server.log_critical(session, e.reason)
                missingDependencies =  e.dependencies
                raise MissingDependencyException('cheshire3-load script',
                                                 missingDependencies)
            wf = db.get_object(session, 'buildIndexWorkflow')
            wf.process(session, docFac)