예제 #1
0
파일: __init__.py 프로젝트: ICCV/chaos
    def process_response(self, request, response, spider):
        if request.meta.get('dont_redirect', False):
            return response

        if request.method == 'HEAD':
            if response.status in [301, 302, 303, 307] and 'Location' in response.headers:
                redirected_url = urljoin(request.url, response.headers['location'])
                redirected = request.replace(url=redirected_url)
                return self._redirect(redirected, request, spider, response.status)
            else:
                return response

        referer = request.url

        if response.status in [302, 303] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = self._redirect_request_using_get(request, redirected_url)
            redirected = self._redirect(redirected, request, spider, response.status)
            redirected.headers['Referer'] = referer
            return redirected

        referer = request.url

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = request.replace(url=redirected_url)
            redirected = self._redirect(redirected, request, spider, response.status)
            redirected.headers['Referer'] = referer
            return redirected

        return response
예제 #2
0
def buildDiscover(base_url, out_dir):
    """Convert all files in a directory to apache mod_asis files in
    another directory."""
    test_data = discoverdata.readTests(discoverdata.default_test_file)

    def writeTestFile(test_name):
        template = test_data[test_name]

        data = discoverdata.fillTemplate(
            test_name, template, base_url, discoverdata.example_xrds)

        out_file_name = os.path.join(out_dir, test_name)
        out_file = open(out_file_name, 'w')
        out_file.write(data)

    manifest = [manifest_header]
    for success, input_name, id_name, result_name in discoverdata.testlist:
        if not success:
            continue
        writeTestFile(input_name)

        input_url = urljoin(base_url, input_name)
        id_url = urljoin(base_url, id_name)
        result_url = urljoin(base_url, result_name)

        manifest.append('\t'.join((input_url, id_url, result_url)))
        manifest.append('\n')

    manifest_file_name = os.path.join(out_dir, 'manifest.txt')
    manifest_file = open(manifest_file_name, 'w')
    for chunk in manifest:
        manifest_file.write(chunk)
    manifest_file.close()
    def _legacy_interact(self, location, error_info):
        visit_url = urljoin(location, error_info.info.visit_url)
        wait_url = urljoin(location, error_info.info.wait_url)
        method_urls = {
            "interactive": visit_url
        }
        if (len(self._interaction_methods) > 1 or
                self._interaction_methods[0].kind() !=
                WEB_BROWSER_INTERACTION_KIND):
            # We have several possible methods or we only support a non-window
            # method, so we need to fetch the possible methods supported by
            # the discharger.
            method_urls = _legacy_get_interaction_methods(visit_url)
        for interactor in self._interaction_methods:
            kind = interactor.kind()
            if kind == WEB_BROWSER_INTERACTION_KIND:
                # This is the old name for browser-window interaction.
                kind = "interactive"

            if not isinstance(interactor, LegacyInteractor):
                # Legacy interaction mode isn't supported.
                continue

            visit_url = method_urls.get(kind)
            if visit_url is None:
                continue

            visit_url = urljoin(location, visit_url)
            interactor.legacy_interact(self, location, visit_url)
            return _wait_for_macaroon(wait_url)

        raise InteractionError('no methods supported; supported [{}]; provided [{}]'.format(
            ' '.join([x.kind() for x in self._interaction_methods]),
            ' '.join(method_urls.keys()),
        ))
예제 #4
0
    def _send_batch(self, destination, events):
        ''' Makes a single batch API request with the given list of events. The
        `destination` argument contains the write key, API host and dataset
        name used to build the request.'''
        start = time.time()
        status_code = 0
        try:
            url = urljoin(urljoin(destination.api_host, "/1/batch/"),
                          destination.dataset)
            payload = []
            for ev in events:
                event_time = ev.created_at.isoformat()
                if ev.created_at.tzinfo is None:
                    event_time += "Z"
                payload.append({
                    "time": event_time,
                    "samplerate": ev.sample_rate,
                    "data": ev.fields()})

            self.log("firing batch, size = %d", len(payload))
            resp = self.session.post(
                url,
                headers={"X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json"},
                data=json.dumps(payload, default=json_default_handler),
                timeout=10.0,
            )
            status_code = resp.status_code
            resp.raise_for_status()
            statuses = [{"status": d.get("status"), "error": d.get("error")} for d in resp.json()]
            for ev, status in zip(events, statuses):
                self._enqueue_response(status.get("status"), "", status.get("error"), start, ev.metadata)

        except Exception as e:
            # Catch all exceptions and hand them to the responses queue.
            self._enqueue_errors(status_code, e, start, events)
예제 #5
0
def get_subscriptions(address):
    url = urljoin(MAILMAN_INSTANCE,
                  "3.1/members/find?subscriber={}".format(address))
    response = requests.get(url, auth=MAILMAN_AUTH)
    if response.status_code >= 300:
        log.error("Could not get URL %s: %d %s",
                  url, response.status_code, response.reason)
        return []
    result = response.json()
    subscriptions = []
    for entry in result.get("entries", []):
        subscription = {
            "list_id": entry["list_id"],
            "role": entry["role"],
            "delivery_mode": entry["delivery_mode"],
        }
        # Get the subscription's preferences
        member_id = entry["member_id"]
        pref_url = urljoin(MAILMAN_INSTANCE,
                           "3.1/members/{}/preferences".format(member_id))
        pref_response = requests.get(pref_url, auth=MAILMAN_AUTH)
        pref_result = pref_response.json()
        if pref_response.status_code >= 300:
            log.error("Could not get URL %s: %d %s",
                      pref_url, pref_response.status_code,
                      pref_response.reason)
        else:
            subscription["preferences"] = dict([
                (key, pref_result[key]) for key in pref_result
                if key not in ("http_etag", "self_link")
            ])
        subscriptions.append(subscription)
    return subscriptions
예제 #6
0
    def test_href_template(self):
        self.headers = {
            'Client-ID': uuidutils.generate_uuid(),
            'X-Project-ID': '8383830383'
        }
        body = self.simulate_get(self.url_prefix, headers=self.headers)
        self.assertEqual(falcon.HTTP_200, self.srmock.status)
        resp = jsonutils.loads(body[0])
        queue_href_template = resp['resources']['rel/queue']['href-template']
        path_1 = 'https://zaqar.example.com' + self.url_prefix
        path_2 = 'https://zaqar.example.com' + self.url_prefix + '/'

        # Verify all the href template start with the correct version prefix
        def get_href_or_template(resource):
            return resource.get('href-template', '') or resource['href']

        for resource in list(resp['resources']):
            self.assertTrue(
                get_href_or_template(resp['resources'][resource]).
                startswith(self.url_prefix))

        url = urlparse.urljoin(path_1, queue_href_template)
        expected = ('https://zaqar.example.com' + self.url_prefix +
                    '/queues/foo')
        self.assertEqual(expected, url.format(queue_name='foo'))

        url = urlparse.urljoin(path_2, queue_href_template)
        self.assertEqual(expected, url.format(queue_name='foo'))
예제 #7
0
    def rel_to_abs(self, base_url):
        """
        Converts relative links from html contents to absolute links
        """
        # Delete target attributes
        strip_attributes(self.tree, "target")

        # Absolute links
        self.tree.rewrite_links(
            lambda link: urljoin(base_url, link) if not link.startswith(self.rel_to_abs_excluded_prefixes) else link
        )

        # Extra attributes
        onclick_elements = self.tree.xpath("//*[@onclick]")

        for element in onclick_elements:
            # Replace attribute with absolute URL
            element.set(
                "onclick",
                self.javascript_open_re.sub(
                    lambda match: "%s%s%s"
                    % (match.group("opening"), urljoin(base_url, match.group("url")), match.group("ending")),
                    element.get("onclick"),
                ),
            )
예제 #8
0
    def __init__(self, username=None, serverloc=None, userapikey="nokey"):
        # This logic is based on ContinuumModelsClient.__init__ and
        # mpl.PlotClient.__init__.  There is some merged functionality here
        # since a Session is meant to capture the little bit of lower-level
        # logic in PlotClient (i.e. avoiding handling of things like
        # _newxyplot()), but also build in the functionality of the
        # ContinuumModelsClient.

        self.username = username
        self.root_url = serverloc
        self.http_session = requests.session()
        self.http_session.headers.update(
            {"content-type": "application/json", "BOKEHUSER-API-KEY": userapikey, "BOKEHUSER": username}
        )

        if self.root_url:
            url = urljoin(self.root_url, "/bokeh/userinfo/")
            self.userinfo = utils.get_json(self.http_session.get(url, verify=False))
        else:
            logger.info("Not using a server, plots will only work in embedded mode")
            self.userinfo = None

        self.docid = None
        self.plotcontext = None
        self.apikey = None
        self.bbclient = None  # reference to a ContinuumModelsClient
        self.base_url = urljoin(self.root_url, "/bokeh/bb/")
        self.raw_js_objs = []
        super(PlotServerSession, self).__init__()
예제 #9
0
    def export(self, ds, requestor, notify):
        """
        This function exports data as FITS files. To do this, the function binds metadata (keywords) to images (arrays) to create FITS files and then serves the FITS files at jsoc.stanford.edu.
        Written by Monica Bobra and Art Amezcua
        19 July 2016

        Parameters
        ----------
        requestor: string
        	Username of requestor.
        notify   : string
        	E-mail address of requestor.
        ds       : string
            Name of the data series.

        Returns
        -------
        supath : list
            List containing paths to all the requested FITS files.
		"""
		# test to see if the user's e-mail address is registered with jsoc.stanford.edu
        test_email_query = 'http://jsoc.stanford.edu/cgi-bin/ajax/checkAddress.sh?address='+quote_plus(notify)+'&checkonly=1'
        response = urlopen(test_email_query)
        data = json.loads(response.read())
        if (data['status'] == 4):
		    raise RuntimeError('User e-mail address is not registered with jsoc.stanford.edu')
        query = '?' + urlencode({'op': 'exp_request', 'protocol': 'fits', 'format': 'json', 'method': 'url', 'requestor': requestor, 'notify': notify, 'ds': ds})
        req = self._json_request(self._url_jsoc_fetch + query)
        # waiting for the request to be ready
        if (int(req.data['status']) == 1 or int(req.data['status']) == 2):
            if 'requestid' in req.data:
                query = '?' + urlencode({'op': 'exp_status', 'requestid': req.data['requestid']})
                supath = []
                print('Waiting for the request to be ready. Please allow at least 20 seconds.')
                time.sleep(15)
                while True :  
                    req = self._json_request(self._url_jsoc_fetch + query)
                    if (int(req.data['status']) == 1 or int(req.data['status']) == 2 or int(req.data['status']) == 6):
                        time.sleep(5)
                    elif (int(req.data['status']) == 0):
                        dir = req.data['dir']
                        for dataobj in (req.data['data']):
                            supath.append(urljoin(self.baseurl,os.path.join(req.data['dir'],dataobj['filename'])))
                        break
                    else:
                        print(type(req.data['status']))
                        if (req.data['status'] == 3):
                            raise RuntimeError('DRMS Query failed, request size is too large, status=%s' % req.data['status'])
                        if (req.data['status'] == 4):
                            raise RuntimeError('DRMS Query failed, request not formed correctly, status=%s' % req.data['status'])
                        if (req.data['status'] == 5):
                            raise RuntimeError('DRMS Query failed, export request expired, status=%s' % req.data['status'])
                            
            else:
                raise RuntimeError('DRMS Query failed, there is no requestid, status=%s' % req.data['status'])
        else:
            raise RuntimeError('DRMS Query failed, series is not a valid series, status=%s' % req.data['status'])
        print("All the data are available at:")
        print(str(urljoin(self.baseurl,req.data['dir'])))
        return supath
예제 #10
0
파일: httpd.py 프로젝트: jaraco/pycoreutils
def list_directory(urlpath, filepath):
    """Helper to produce a directory listing (absent index.html).

    Return value is either a file object, or None (indicating an
    wsgierror).  In either case, the headers are sent, making the
    interface the same as for send_head().
    """
    path = urlpath.rstrip('/') + '/'
    listdir = os.listdir(filepath)
    dirlist = []
    filelist = []

    for file in listdir:
        if os.path.isdir(os.path.join(path, file)):
            dirlist.append(file)
        else:
            filelist.append(file)

    dirlist.sort()
    filelist.sort()

    res = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">\n'
    res += '<html><head><title>{0}</title></head><body>\n'.format(path)
    res += '<big><strong>Listing %s</strong></big><br>\n' % (path)
    if path != '/':
        item = '..'
        res += 'D <a href=%s>%s</a><br/>\n' % (urljoin(path, item), item)
    for item in dirlist:
        res += 'D <a href=%s>%s</a><br/>\n' % (urljoin(path, item), item)
    for item in filelist:
        res += 'F <a href=%s>%s</a><br/>\n' % (urljoin(path, item), item)
    res += '</body></html>'
    return str(res)
예제 #11
0
파일: deactivate.py 프로젝트: Pike/elmo
 def handleSection(self, section, items):
     locales = items['locales']
     if locales == 'all':
         inipath = '/'.join((
             items['repo'], items['mozilla'],
             'raw-file', 'default',
             items['l10n.ini']
         ))
         ini = ConfigParser()
         ini.readfp(urlopen(inipath))
         allpath = urljoin(
             urljoin(inipath, ini.get('general', 'depth')),
             ini.get('general', 'all'))
         locales = urlopen(allpath).read()
     locales = locales.split()
     obs = (Active.objects
            .filter(run__tree__code=section)
            .exclude(run__locale__code__in=locales)
            .order_by('run__locale__code'))
     obslocs = ' '.join(obs.values_list('run__locale__code', flat=True))
     if not obslocs:
         self.stdout.write(' OK\n')
         return
     s = input('Remove %s? [Y/n] ' % obslocs)
     if s.lower() == 'y' or s == '':
         obs.delete()
예제 #12
0
    def process_response(self, request, response, spider):
        if (request.meta.get('dont_redirect', False) or
               response.status in getattr(spider, 'handle_httpstatus_list', []) or
               response.status in request.meta.get('handle_httpstatus_list', []) or
               request.meta.get('handle_httpstatus_all', False)):
            return response

        if request.method == 'HEAD':
            if response.status in [301, 302, 303, 307] and 'Location' in response.headers:
                redirected_url = urljoin(request.url, response.headers['location'])
                redirected = request.replace(url=redirected_url)
                return self._redirect(redirected, request, spider, response.status)
            else:
                return response

        if response.status in [302, 303] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = self._redirect_request_using_get(request, redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = request.replace(url=redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        return response
예제 #13
0
파일: form.py 프로젝트: ArturGaspar/scrapy
def _get_form_url(form, url):
    if url is None:
        action = form.get('action')
        if action is None:
            return form.base_url
        return urljoin(form.base_url, strip_html5_whitespace(action))
    return urljoin(form.base_url, url)
예제 #14
0
    def __init__(self):

        self.verify_https = os.environ.get('OAUTHLIB_INSECURE_TRANSPORT', '') == ""
        if self.verify_https and os.environ.get("REQUESTS_CA_BUNDLE", "").strip() != "":
            self.verify_https = os.environ["REQUESTS_CA_BUNDLE"].strip()

        self.jwt_enable = six.text_type(os.environ.get('CKAN_OAUTH2_JWT_ENABLE', toolkit.config.get('ckan.oauth2.jwt.enable',''))).strip().lower() in ("true", "1", "on")

        self.legacy_idm = six.text_type(os.environ.get('CKAN_OAUTH2_LEGACY_IDM', toolkit.config.get('ckan.oauth2.legacy_idm', ''))).strip().lower() in ("true", "1", "on")
        self.authorization_endpoint = six.text_type(os.environ.get('CKAN_OAUTH2_AUTHORIZATION_ENDPOINT', toolkit.config.get('ckan.oauth2.authorization_endpoint', ''))).strip()
        self.token_endpoint = six.text_type(os.environ.get('CKAN_OAUTH2_TOKEN_ENDPOINT', toolkit.config.get('ckan.oauth2.token_endpoint', ''))).strip()
        self.profile_api_url = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_URL', toolkit.config.get('ckan.oauth2.profile_api_url', ''))).strip()
        self.client_id = six.text_type(os.environ.get('CKAN_OAUTH2_CLIENT_ID', toolkit.config.get('ckan.oauth2.client_id', ''))).strip()
        self.client_secret = six.text_type(os.environ.get('CKAN_OAUTH2_CLIENT_SECRET', toolkit.config.get('ckan.oauth2.client_secret', ''))).strip()
        self.scope = six.text_type(os.environ.get('CKAN_OAUTH2_SCOPE', toolkit.config.get('ckan.oauth2.scope', ''))).strip()
        self.rememberer_name = six.text_type(os.environ.get('CKAN_OAUTH2_REMEMBER_NAME', toolkit.config.get('ckan.oauth2.rememberer_name', 'auth_tkt'))).strip()
        self.profile_api_user_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_USER_FIELD', toolkit.config.get('ckan.oauth2.profile_api_user_field', ''))).strip()
        self.profile_api_fullname_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_FULLNAME_FIELD', toolkit.config.get('ckan.oauth2.profile_api_fullname_field', ''))).strip()
        self.profile_api_mail_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_MAIL_FIELD', toolkit.config.get('ckan.oauth2.profile_api_mail_field', ''))).strip()
        self.profile_api_groupmembership_field = six.text_type(os.environ.get('CKAN_OAUTH2_PROFILE_API_GROUPMEMBERSHIP_FIELD', toolkit.config.get('ckan.oauth2.profile_api_groupmembership_field', ''))).strip()
        self.sysadmin_group_name = six.text_type(os.environ.get('CKAN_OAUTH2_SYSADMIN_GROUP_NAME', toolkit.config.get('ckan.oauth2.sysadmin_group_name', ''))).strip()

        self.redirect_uri = urljoin(urljoin(toolkit.config.get('ckan.site_url', 'http://localhost:5000'), toolkit.config.get('ckan.root_path')), constants.REDIRECT_URL)

        # Init db
        db.init_db(model)

        missing = [key for key in REQUIRED_CONF if getattr(self, key, "") == ""]
        if missing:
            raise ValueError("Missing required oauth2 conf: %s" % ", ".join(missing))
        elif self.scope == "":
            self.scope = None
예제 #15
0
파일: rdfxml.py 프로젝트: drewp/rdflib
 def startElementNS(self, name, qname, attrs):
     stack = self.stack
     stack.append(ElementHandler())
     current = self.current
     parent = self.parent
     base = attrs.get(BASE, None)
     if base is not None:
         base, frag = urldefrag(base)
         if parent and parent.base:
             base = urljoin(parent.base, base)
         else:
             systemId = self.locator.getPublicId() \
                 or self.locator.getSystemId()
             if systemId:
                 base = urljoin(systemId, base)
     else:
         if parent:
             base = parent.base
         if base is None:
             systemId = self.locator.getPublicId() \
                 or self.locator.getSystemId()
             if systemId:
                 base, frag = urldefrag(systemId)
     current.base = base
     language = attrs.get(LANG, None)
     if language is None:
         if parent:
             language = parent.language
     current.language = language
     current.start(name, qname, attrs)
예제 #16
0
def fix_auth_url_version(auth_url):
    """Fix up the auth url if an invalid or no version prefix was given.

    People still give a v2 auth_url even when they specify that they want v3
    authentication. Fix the URL to say v3 in this case and add version if it is
    missing entirely. This should be smarter and use discovery.
    """

    # Check for empty path component in endpoint URL and add keystone version
    # to endpoint: as of Kilo, the identity URLs returned by Keystone might no
    # longer contain API versions, leaving the version choice up to the user.
    if urlparse.urlparse(auth_url).path.rstrip('/') == '':
        if get_keystone_version() >= 3:
            auth_url = urlparse.urljoin(auth_url, 'v3')
        else:
            auth_url = urlparse.urljoin(auth_url, 'v2.0')

    if get_keystone_version() >= 3:
        if has_in_url_path(auth_url, "/v2.0"):
            LOG.warning("The settings.py file points to a v2.0 keystone "
                        "endpoint, but v3 is specified as the API version "
                        "to use. Using v3 endpoint for authentication.")
            auth_url = url_path_replace(auth_url, "/v2.0", "/v3", 1)

    return auth_url
예제 #17
0
파일: lxmlhtml.py 프로젝트: cjzswust/test
 def _extract_links(self, selector, response_url, response_encoding, base_url):
     '''
     Pretty much the same function, just added 'ignore' to to_native_str()
     '''
     links = []
     # hacky way to get the underlying lxml parsed document
     for el, attr, attr_val in self._iter_links(selector.root):
         # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
         try:
             attr_val = urljoin(base_url, attr_val)
         except ValueError:
             continue # skipping bogus links
         else:
             url = self.process_attr(attr_val)
             if url is None:
                 continue
         # added 'ignore' to encoding errors
         url = to_native_str(url, encoding=response_encoding,
                             errors='ignore')
         # to fix relative links after process_value
         url = urljoin(response_url, url)
         link = Link(url, _collect_string_content(el) or u'',
                     nofollow=rel_has_nofollow(el.get('rel')))
         links.append(link)
     return self._deduplicate_if_needed(links)
예제 #18
0
def open_in_browser(config_obj, jql_query, query_log_path, new_tab=False):
    """
    Open browser in JIRA with the retrieved keys from Stash as url-params

    :param config_obj:
    :param jql_query:
    :return:
    """

    jira_url = config_obj.jira_url
    if len(jql_query) < OPEN_IN_BROWSER_BELOW:
        params = {
            'jql': jql_query
        }
        b_url = urljoin(urljoin(jira_url, 'issues/'), '?' + urlencode(params))
        if new_tab:
            webbrowser.open(b_url, new=2)
        else:
            webbrowser.open(b_url, new=0)
    else:
        click.echo("Too much data to open in browser.")

    click.echo("Query saved to " + query_log_path)
    with open(query_log_path, 'a') as f:
        f.write(jql_query + "\n\n")
예제 #19
0
def request_raw(method, path, params=None, body=None, headers=None,
                handle_errors=True, auto_retry=True):
    kwargs = {
        'params': params,
        'data': body,
        'headers': headers,
        'verify': analyzere.tls_verify,
    }

    username = analyzere.username
    password = analyzere.password
    if username and password:
        kwargs['auth'] = (username, password)

    resp = requests.request(method, urljoin(analyzere.base_url, path),
                            **kwargs)

    # Handle HTTP 503 with the Retry-After header by automatically retrying
    # request after sleeping for the recommended amount of time
    retry_after = resp.headers.get('Retry-After')
    while auto_retry and (resp.status_code == 503 and retry_after):
        time.sleep(float(retry_after))
        # Repeat original request after Retry-After time has elapsed.
        resp = requests.request(method, urljoin(analyzere.base_url, path),
                                **kwargs)
        retry_after = resp.headers.get('Retry-After')

    if handle_errors and (not 200 <= resp.status_code < 300):
        handle_api_error(resp, resp.status_code)

    return resp
예제 #20
0
def get_journal_about_page_url(about_page_id=0, auth=True):
    """
    Return url to journal about page.
    If auth=True, the url will redirect through the journals service log in page
    which will prevent the "purchase now" button being shown.
    If auth=False, the url will point to Journal About Page with purchase button shown

    Arguments:
        about_page_id (int): id of Journal About Page as found in Discovery
        auth (boolen): authorization flag, if true will force login to journal service
        and redirect to last visited page in Journal after login. If false, this method
        will return direct url to journal about page.

    Returns:
        url (str): url pointing to Journals Service login, w/ a redirect to last visited journal page
        or url pointing directly to journal about page.
    """
    if not auth:
        return urljoin(get_journals_frontend_url(), '{id}/about'.format(id=about_page_id))

    # by providing just the about_page_id in the url, the user will be redirected
    # to the last page viewed after logging in
    about_page_url = urljoin(get_journals_frontend_url(), '{id}'.format(id=about_page_id))
    login_url = urljoin(get_journals_root_url(), 'require_auth')
    query = 'forward={next_url}'.format(next_url=about_page_url)

    split_url = urlsplit(login_url)
    url = urlunsplit((
        split_url.scheme,
        split_url.netloc,
        split_url.path,
        query,
        split_url.fragment,
    ))
    return url
예제 #21
0
 def open(self, filename=None):
     if filename is None:
         filename = self._base_uri
     else:
         if self._file_type == 's3':
             filename = urljoin(self._base_uri.replace(
                 's3://', 'http://'), filename.replace('\\', '/')).replace('http://', 's3://')
         elif self._file_type == 'http':
             filename = urljoin(self._base_uri, filename.replace('\\', '/'))
         else:
             filename = os.path.abspath(os.path.join(os.path.dirname(
                 self._base_uri.replace('\\', '/')), filename.replace('\\', '/')))
     f = None
     if self._file_type == 's3':
         uri_header, uri_body = filename.split('://', 1)
         us = uri_body.split('/')
         bucketname = us.pop(0)
         key = '/'.join(us)
         logger.info('Opening {}'.format(key))
         f = StringIO(self._s3_bucket.Object(key).get()['Body'].read())
     elif self._file_type == 'http':
         f = request.urlopen(filename)
     else:
         f = open(filename, 'rb')
     yield f
     f.close()
예제 #22
0
    def process_response(self, request, response, spider):
        if request.meta.get('dont_redirect', False):
            return response

        if request.method == 'HEAD':
            if response.status in [301, 302, 303, 307] and 'Location' in response.headers:
                redirected_url = urljoin(request.url, response.headers['location'])
                redirected = request.replace(url=redirected_url)
                return self._redirect(redirected, request, spider, response.status)
            else:
                return response

        if response.status in [302, 303] and 'Location' in response.headers:
            if (response.headers['Location'] == "http://store.steampowered.com/") or \
               (response.headers['Location'] == "http://store.steampowered.com") or \
               ('video' in response.headers['Location']):
                # log.msg("Ignored home page / video redirect!")
                raise IgnoreRequest()
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = self._redirect_request_using_get(request, redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = request.replace(url=redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        return response
예제 #23
0
파일: models.py 프로젝트: Mognom/wirecloud
def add_absolute_urls(results, request=None):

    for hit in results:
        base_url = get_template_url(hit['vendor'], hit['name'], hit['version'], hit['template_uri'], request=request)
        hit['uri'] = "/".join((hit['vendor'], hit['name'], hit['version']))
        hit['image'] = urljoin(base_url, hit['image'])
        hit['smartphoneimage'] = urljoin(base_url, hit['smartphoneimage'])
예제 #24
0
    def _extract_links(self, selector, response_url, response_encoding, base_url):
        '''
        Pretty much the same function, just added 'ignore' to url.encode
        '''
        links = []
        # hacky way to get the underlying lxml parsed document
        for el, attr, attr_val in self._iter_links(selector.root):
            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
            try:
                attr_val = urljoin(base_url, attr_val)
            except ValueError:
                continue  # skipping bogus links
            else:
                url = self.process_attr(attr_val)
                if url is None:
                    continue
            if isinstance(url, unicode):
                # add 'ignore' to encoding errors
                url = url.encode(response_encoding, 'ignore')
            # to fix relative links after process_value
            url = urljoin(response_url, url)
            link = Link(url, _collect_string_content(el) or u'',
                        nofollow=True if el.get('rel') == 'nofollow' else False)
            links.append(link)

        return unique_list(links, key=lambda link: link.url) \
                if self.unique else links
예제 #25
0
 def _post_request(self, route, data, description):
     url = urlparse.urljoin(self.base_url, route)
     extra_logger.info('[%s] %s: POST %s: %s', self.label, description, url, json.dumps(data))
     resp = self.session.post(urlparse.urljoin(self.base_url, route),
                              data=json.dumps(data), auth=(self.api_key, ''),
                              timeout=self.request_timeout,
     )
     return self._handle_resp(resp)
def ci_artifacts(job):
    url = urljoin(urljoin(APPVEYOR_API_JOB_URL, job['jobId'] + '/'),
                  'artifacts/')
    response = urlopen(url)
    files = json.loads(response.read().decode('utf-8'))  # py3 compat
    response.close()
    for file_ in files:
        file_['url'] = urljoin(url, file_['fileName'])
    return files
예제 #27
0
 def absolutize(self, uri, defrag=1):
     base = urljoin("file:", pathname2url(os.getcwd()))
     result = urljoin("%s/" % base, uri, allow_fragments=not defrag)
     if defrag:
         result = urldefrag(result)[0]
     if not defrag:
         if uri and uri[-1] == "#" and result[-1] != "#":
             result = "%s#" % result
     return URIRef(result)
예제 #28
0
def browse_directory(dir_url):
    name_dict = {}
    try:
        with closing(urlopen(dir_url)) as urlpath:
            string_from_url = urlpath.read()
    except HTTPError:
        logger.exception("Skipping: %r", dir_url)
        return None

    rhevm_pattern = re.compile(r'<a href="?\'?([^"\']*(?:rhevm\.ova|ovirt)[^"\'>]*)')
    rhevm_image_name = rhevm_pattern.findall(string_from_url)
    rhos_pattern = re.compile(r'<a href="?\'?([^"\']*(?:rhos|openstack|rhelosp)[^"\'>]*)')
    rhos_image_name = rhos_pattern.findall(string_from_url)
    scvmm_pattern = re.compile(r'<a href="?\'?([^"\']*hyperv[^"\'>]*)')
    scvmm_image_name = scvmm_pattern.findall(string_from_url)
    vsphere_pattern = re.compile(r'<a href="?\'?([^"\']*vsphere[^"\'>]*)')
    vsphere_image_name = vsphere_pattern.findall(string_from_url)
    google_pattern = re.compile(r'<a href="?\'?([^"\']*gce[^"\'>]*)')
    google_image_name = google_pattern.findall(string_from_url)
    ec2_pattern = re.compile(r'<a href="?\'?([^"\']*ec2[^"\'>]*)')
    ec2_image_name = ec2_pattern.findall(string_from_url)
    openshift_pattern = re.compile(r'<a href="?\'?(openshift-pods/*)')
    openshift_image_name = openshift_pattern.findall(string_from_url)

    if len(rhevm_image_name) is not 0:
        name_dict['template_upload_rhevm'] = rhevm_image_name[0]
    if len(rhos_image_name) is not 0:
        name_dict['template_upload_rhos'] = rhos_image_name[0]
    if len(scvmm_image_name) is not 0:
        name_dict['template_upload_scvmm'] = scvmm_image_name[0]
    if len(vsphere_image_name) is not 0:
        name_dict['template_upload_vsphere'] = vsphere_image_name[0]
    if len(google_image_name) is not 0:
        name_dict['template_upload_gce'] = google_image_name[0]
    if len(ec2_image_name) is not 0:
        name_dict['template_upload_ec2'] = ec2_image_name[0]
    if len(openshift_image_name) is not 0:
        name_dict['template_upload_openshift'] = openshift_image_name[0]

    for key, val in name_dict.items():
        name_dict[key] = urljoin(dir_url, val)

    for key in name_dict.keys():
        if key == 'template_upload_openshift':
            # this is necessary because headers don't contain last-modified date for folders
            #  cfme-template is disposed in templates everywhere except 'latest' in 5.9
            # todo: remove this along with refactoring script
            if '5.8' in name_dict[key] or ('5.9' in name_dict[key] and 'latest' in name_dict[key]):
                url = urljoin(name_dict[key], 'cfme-template.yaml')
            else:
                url = urljoin(name_dict[key], 'templates/cfme-template.yaml')
        else:
            url = name_dict[key]
        date = urlopen(url).info().getdate('last-modified')
        name_dict[key + "_date"] = "%02d" % date[1] + "%02d" % date[2]

    return name_dict
 def interact(self, ctx, location, ir_err):
     '''Implement Interactor.interact by opening the browser window
     and waiting for the discharge token'''
     p = ir_err.interaction_method(self.kind(), WebBrowserInteractionInfo)
     if not location.endswith('/'):
         location += '/'
     visit_url = urljoin(location, p.visit_url)
     wait_token_url = urljoin(location, p.wait_token_url)
     self._open_web_browser(visit_url)
     return self._wait_for_token(ctx, wait_token_url)
예제 #30
0
파일: regex.py 프로젝트: 0326/scrapy
    def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
        if base_url is None:
            base_url = urljoin(response_url, self.base_url) if self.base_url else response_url

        clean_url = lambda u: urljoin(base_url, replace_entities(clean_link(u.decode(response_encoding))))
        clean_text = lambda t: replace_escape_chars(remove_tags(t.decode(response_encoding))).strip()

        links_text = linkre.findall(response_text)
        return [Link(clean_url(url).encode(response_encoding),
                     clean_text(text))
                for url, _, text in links_text]
예제 #31
0
def absolute_uri(url=None):
    if not url:
        return options.get('system.url-prefix')
    return urljoin(
        options.get('system.url-prefix').rstrip('/') + '/', url.lstrip('/'))
예제 #32
0
    def expand_frames(self, frames, release):
        last_token = None
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        for frame in frames:
            errors = cache.get_errors(frame.abs_path)
            if errors:
                all_errors.extend(errors)

            # can't fetch source if there's no filename present
            if not frame.abs_path:
                continue

            source = self.get_source(frame.abs_path, release)
            if source is None:
                logger.debug('No source found for %s', frame.abs_path)
                continue

            sourcemap_url, sourcemap_view = sourcemaps.get_link(frame.abs_path)
            if sourcemap_view and frame.colno is None:
                all_errors.append({
                    'type': EventError.JS_NO_COLUMN,
                    'url': expose_url(frame.abs_path),
                })
            elif sourcemap_view:
                last_token = token

                if is_data_uri(sourcemap_url):
                    sourcemap_label = frame.abs_path
                else:
                    sourcemap_label = sourcemap_url

                sourcemap_label = expose_url(sourcemap_label)

                try:
                    # Errors are 1-indexed in the frames, so we need to -1 to get
                    # zero-indexed value from tokens.
                    assert frame.lineno > 0, "line numbers are 1-indexed"
                    token = sourcemap_view.lookup_token(
                        frame.lineno - 1, frame.colno)
                except Exception:
                    token = None
                    all_errors.append({
                        'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                        'column': frame.colno,
                        'row': frame.lineno,
                        'source': frame.abs_path,
                        'sourcemap': sourcemap_label,
                    })

                # Store original data in annotation
                # HACK(dcramer): we stuff things into raw which gets popped off
                # later when adding the raw_stacktrace attribute.
                raw_frame = frame.to_json()
                frame.data = {
                    'raw': raw_frame,
                    'sourcemap': sourcemap_label,
                }

                sourcemap_applied = True

                if token is not None:
                    abs_path = urljoin(sourcemap_url, token.src)

                    logger.debug(
                        'Mapping compressed source %r to mapping in %r',
                        frame.abs_path, abs_path)
                    source = self.get_source(abs_path, release)

                if not source:
                    errors = cache.get_errors(abs_path)
                    if errors:
                        all_errors.extend(errors)
                    else:
                        all_errors.append({
                            'type': EventError.JS_MISSING_SOURCE,
                            'url': expose_url(abs_path),
                        })

                if token is not None:
                    # Token's return zero-indexed lineno's
                    frame.lineno = token.src_line + 1
                    frame.colno = token.src_col
                    # The offending function is always the previous function in the stack
                    # Honestly, no idea what the bottom most frame is, so we're ignoring that atm
                    if last_token:
                        frame.function = last_token.name or frame.function
                    else:
                        frame.function = token.name or frame.function

                    filename = token.src
                    # special case webpack support
                    # abs_path will always be the full path with webpack:/// prefix.
                    # filename will be relative to that
                    if abs_path.startswith('webpack:'):
                        filename = abs_path
                        # webpack seems to use ~ to imply "relative to resolver root"
                        # which is generally seen for third party deps
                        # (i.e. node_modules)
                        if '/~/' in filename:
                            filename = '~/' + abs_path.split('/~/', 1)[-1]
                        else:
                            filename = filename.split('webpack:///', 1)[-1]

                        # As noted above, '~/' means they're coming from node_modules,
                        # so these are not app dependencies
                        if filename.startswith('~/'):
                            frame.in_app = False
                        # And conversely, local dependencies start with './'
                        elif filename.startswith('./'):
                            frame.in_app = True

                        # Update 'raw' copy to have same in_app status
                        raw_frame['in_app'] = frame.in_app

                        # We want to explicitly generate a webpack module name
                        frame.module = generate_module(filename)

                    frame.abs_path = abs_path
                    frame.filename = filename
                    if not frame.module and abs_path.startswith(
                        ('http:', 'https:', 'webpack:')):
                        frame.module = generate_module(abs_path)

            elif sourcemap_url:
                frame.data = {
                    'sourcemap': expose_url(sourcemap_url),
                }

            # TODO: theoretically a minified source could point to another mapped, minified source
            frame.pre_context, frame.context_line, frame.post_context = get_source_context(
                source=source, lineno=frame.lineno, colno=frame.colno or 0)

            if not frame.context_line and source:
                all_errors.append({
                    'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    'column': frame.colno,
                    'row': frame.lineno,
                    'source': frame.abs_path,
                })
        return all_errors, sourcemap_applied
예제 #33
0
def _url(target, action):
    return urljoin(target['url'], action)
예제 #34
0
def discover_sourcemap(result):
    """
    Given a UrlResult object, attempt to discover a sourcemap.
    """
    # When coercing the headers returned by urllib to a dict
    # all keys become lowercase so they're normalized
    sourcemap = result.headers.get('sourcemap',
                                   result.headers.get('x-sourcemap'))

    if not sourcemap:
        parsed_body = result.body.split('\n')
        # Source maps are only going to exist at either the top or bottom of the document.
        # Technically, there isn't anything indicating *where* it should exist, so we
        # are generous and assume it's somewhere either in the first or last 5 lines.
        # If it's somewhere else in the document, you're probably doing it wrong.
        if len(parsed_body) > 10:
            possibilities = parsed_body[:5] + parsed_body[-5:]
        else:
            possibilities = parsed_body

        # We want to scan each line sequentially, and the last one found wins
        # This behavior is undocumented, but matches what Chrome and Firefox do.
        for line in possibilities:
            if line[:21] in ('//# sourceMappingURL=', '//@ sourceMappingURL='):
                # We want everything AFTER the indicator, which is 21 chars long
                sourcemap = line[21:].rstrip()

        # If we still haven't found anything, check end of last line AFTER source code.
        # This is not the literal interpretation of the spec, but browsers support it.
        # e.g. {code}//# sourceMappingURL={url}
        if not sourcemap:
            # Only look at last 300 characters to keep search space reasonable (minified
            # JS on a single line could be tens of thousands of chars). This is a totally
            # arbitrary number / best guess; most sourceMappingURLs are relative and
            # not very long.
            search_space = possibilities[-1][-300:].rstrip()
            match = SOURCE_MAPPING_URL_RE.search(search_space)
            if match:
                sourcemap = match.group(1)

    if sourcemap:
        # react-native shoves a comment at the end of the
        # sourceMappingURL line.
        # For example:
        #  sourceMappingURL=app.js.map/*ascii:...*/
        # This comment is completely out of spec and no browser
        # would support this, but we need to strip it to make
        # people happy.
        if '/*' in sourcemap and sourcemap[-2:] == '*/':
            index = sourcemap.index('/*')
            # comment definitely shouldn't be the first character,
            # so let's just make sure of that.
            if index == 0:
                raise AssertionError(
                    'react-native comment found at bad location: %d, %r' %
                    (index, sourcemap))
            sourcemap = sourcemap[:index]
        # fix url so its absolute
        sourcemap = urljoin(result.url, sourcemap)

    return sourcemap
예제 #35
0
def UploadFile(request):
    """上传文件"""
    if not request.method == "POST":
        return HttpResponse(json.dumps(u"{'state:'ERROR'}"),
                            content_type="application/javascript")

    state = "SUCCESS"
    action = request.GET.get("action")
    # 上传文件
    upload_field_name = {
        "uploadfile": "fileFieldName",
        "uploadimage": "imageFieldName",
        "uploadscrawl": "scrawlFieldName",
        "catchimage": "catcherFieldName",
        "uploadvideo": "videoFieldName",
    }
    UploadFieldName = request.GET.get(
        upload_field_name[action],
        USettings.UEditorUploadSettings.get(action, "upfile"))

    # 上传涂鸦,涂鸦是采用base64编码上传的,需要单独处理
    if action == "uploadscrawl":
        upload_file_name = "scrawl.png"
        upload_file_size = 0
    else:
        # 取得上传的文件
        file = request.FILES.get(UploadFieldName, None)
        if file is None:
            return HttpResponse(json.dumps(u"{'state:'ERROR'}"),
                                content_type="application/javascript")
        upload_file_name = file.name
        upload_file_size = file.size

    # 取得上传的文件的原始名称
    upload_original_name, upload_original_ext = os.path.splitext(
        upload_file_name)

    # 文件类型检验
    upload_allow_type = {
        "uploadfile": "fileAllowFiles",
        "uploadimage": "imageAllowFiles",
        "uploadvideo": "videoAllowFiles"
    }
    if action in upload_allow_type:
        allow_type = list(
            request.GET.get(
                upload_allow_type[action],
                USettings.UEditorUploadSettings.get(upload_allow_type[action],
                                                    "")))
        if not upload_original_ext in allow_type:
            state = u"服务器不允许上传%s类型的文件。" % upload_original_ext

    # 大小检验
    upload_max_size = {
        "uploadfile": "filwMaxSize",
        "uploadimage": "imageMaxSize",
        "uploadscrawl": "scrawlMaxSize",
        "uploadvideo": "videoMaxSize"
    }
    max_size = long(
        request.GET.get(
            upload_max_size[action],
            USettings.UEditorUploadSettings.get(upload_max_size[action], 0)))
    if max_size != 0:
        from .utils import FileSize
        MF = FileSize(max_size)
        if upload_file_size > MF.size:
            state = u"上传文件大小不允许超过%s。" % MF.FriendValue

    # 检测保存路径是否存在,如果不存在则需要创建
    upload_path_format = {
        "uploadfile": "filePathFormat",
        "uploadimage": "imagePathFormat",
        "uploadscrawl": "scrawlPathFormat",
        "uploadvideo": "videoPathFormat"
    }

    path_format_var = get_path_format_vars()
    path_format_var.update({
        "basename": upload_original_name,
        "extname": upload_original_ext[1:],
        "filename": upload_file_name,
    })
    # 取得输出文件的路径
    OutputPathFormat, OutputPath, OutputFile = get_output_path(
        request, upload_path_format[action], path_format_var)

    # 所有检测完成后写入文件
    if state == "SUCCESS":
        if action == "uploadscrawl":
            state = save_scrawl_file(request,
                                     os.path.join(OutputPath, OutputFile))
        else:
            # 保存到文件中,如果保存错误,需要返回ERROR
            upload_module_name = USettings.UEditorUploadSettings.get(
                "upload_module", None)
            if upload_module_name:
                mod = import_module(upload_module_name)
                state = mod.upload(file, OutputPathFormat)
            else:
                state = save_upload_file(file,
                                         os.path.join(OutputPath, OutputFile))

    # 返回数据
    return_info = {
        # 保存后的文件名称
        'url': urljoin(USettings.gSettings.MEDIA_URL, OutputPathFormat),
        'original': upload_file_name,  # 原始文件名
        'type': upload_original_ext,
        'state': state,  # 上传状态,成功时返回SUCCESS,其他任何值将原样返回至图片上传框中
        'size': upload_file_size
    }
    return HttpResponse(json.dumps(return_info, ensure_ascii=False),
                        content_type="application/javascript")
예제 #36
0
def catcher_remote_image(request):
    """远程抓图,当catchRemoteImageEnable:true时,
        如果前端插入图片地址与当前web不在同一个域,则由本函数从远程下载图片到本地
    """
    if not request.method == "POST":
        return HttpResponse(json.dumps(u"{'state:'ERROR'}"),
                            content_type="application/javascript")

    state = "SUCCESS"

    allow_type = list(
        request.GET.get(
            "catcherAllowFiles",
            USettings.UEditorUploadSettings.get("catcherAllowFiles", "")))
    max_size = long(
        request.GET.get(
            "catcherMaxSize",
            USettings.UEditorUploadSettings.get("catcherMaxSize", 0)))

    remote_urls = request.POST.getlist("source[]", [])
    catcher_infos = []
    path_format_var = get_path_format_vars()

    for remote_url in remote_urls:
        # 取得上传的文件的原始名称
        remote_file_name = os.path.basename(remote_url)
        remote_original_name, remote_original_ext = os.path.splitext(
            remote_file_name)
        # 文件类型检验
        if remote_original_ext in allow_type:
            path_format_var.update({
                "basename": remote_original_name,
                "extname": remote_original_ext[1:],
                "filename": remote_original_name
            })
            # 计算保存的文件名
            o_path_format, o_path, o_file = get_output_path(
                request, "catcherPathFormat", path_format_var)
            o_filename = os.path.join(o_path, o_file).replace("\\", "/")
            # 读取远程图片文件
            try:
                remote_image = urlopen(remote_url)
                # 将抓取到的文件写入文件
                try:
                    f = open(o_filename, 'wb')
                    f.write(remote_image.read())
                    f.close()
                    state = "SUCCESS"
                except Exception as E:
                    state = u"写入抓取图片文件错误:%s" % E.message
            except Exception as E:
                state = u"抓取图片错误:%s" % E.message

            catcher_infos.append({
                "state":
                state,
                "url":
                urljoin(USettings.gSettings.MEDIA_URL, o_path_format),
                "size":
                os.path.getsize(o_filename),
                "title":
                os.path.basename(o_file),
                "original":
                remote_file_name,
                "source":
                remote_url
            })

    return_info = {
        "state": "SUCCESS" if len(catcher_infos) > 0 else "ERROR",
        "list": catcher_infos
    }

    return HttpResponse(json.dumps(return_info, ensure_ascii=False),
                        content_type="application/javascript")
예제 #37
0
	def contents(owner, repo):
		r = requests.get(urljoin(GitHubAPI.API_URL, 'repos/{}/{}/contents'.format(owner, repo)))
		return r.json()
예제 #38
0
 def __init__(self):
     self.name = "darknet"
     self.source_url = _urlparse.urljoin(MODELS_URL_ROOT, "darknet.mlmodel")
     self.source_md5 = "a06761976a0472cf0553b64ecc15b0fe"
예제 #39
0
 def __init__(self):
     self.source_url = _urlparse.urljoin(
         MODELS_URL_ROOT,
         "drawing_classifier_pre_trained_model_245_classes_v0.mlmodel",
     )
     self.source_md5 = "fc1c04126728514c47991a62b9e66715"
예제 #40
0
파일: form.py 프로젝트: zvin/scrapy
def _get_form_url(form, url):
    if url is None:
        return urljoin(form.base_url, form.action)
    return urljoin(form.base_url, url)
import client
import dom_parser2, os,re
from bs4 import BeautifulSoup
buildDirectory = utils.buildDir #CODE BY NEMZZY AND ECHO
dialog = xbmcgui.Dialog()
translatePath = xbmc.translatePath if PY2 else xbmcvfs.translatePath
filename     = os.path.basename(__file__).split('.')[0]
base_domain  = 'https://eporner.com'
base_name    = base_domain.replace('www.',''); base_name = re.findall('(?:\/\/|\.)([^.]+)\.',base_name)[0].title()
type         = 'video'
menu_mode    = 200
content_mode = 201
player_mode  = 801

search_tag   = 1
search_base  = urljoin(base_domain,'search/%s')#.replace(' ','-')

@utils.url_dispatcher.register('%s' % menu_mode)
def menu():
    
	lover.checkupdates()

	try:
		url = urljoin(base_domain, 'categories/')
		c = client.request(url)
		soup = BeautifulSoup(c, 'html5lib')
		content = soup.find_all('div', class_={'ctbinner'})
		if ( not content ):
			log_utils.log('Scraping Error in %s:: Content of request: %s' % (base_name.title(),str(c)), log_utils.LOGERROR)
			kodi.notify(msg='Scraping Error: Info Added To Log File', duration=6000, sound=True)
			quit()
예제 #42
0
# Run tasks in-process, without sending them to the queue (i.e., synchronously).
CELERY_ALWAYS_EAGER = True
# END CELERY

# Use production settings for asset compression so that asset compilation can be tested on the CI server.
COMPRESS_ENABLED = True
COMPRESS_OFFLINE = True

# Comprehensive theme settings for testing environment
COMPREHENSIVE_THEME_DIRS = [
    Path(DJANGO_ROOT + "/tests/themes"),
    Path(DJANGO_ROOT + "/tests/themes-dir-2"),
]

DEFAULT_SITE_THEME = "test-theme"

ENTERPRISE_API_URL = urljoin(ENTERPRISE_SERVICE_URL, 'api/v1/')

# Don't bother sending fake events to Segment. Doing so creates unnecessary threads.
SEND_SEGMENT_EVENTS = False

# SPEED
DEBUG = False
TEMPLATE_DEBUG = False
CELERY_EAGER_PROPAGATES_EXCEPTIONS = True
BROKER_BACKEND = 'memory'

#SAILTHRU settings
SAILTHRU_KEY = 'abc123'
SAILTHRU_SECRET = 'top_secret'
예제 #43
0
    def process_frame(self, processable_frame, processing_task):
        frame = processable_frame.frame
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        # can't fetch source if there's no filename present or no line
        if not frame.get('abs_path') or not frame.get('lineno'):
            return

        # can't fetch if this is internal node module as well
        # therefore we only process user-land frames (starting with /)
        # or those created by bundle/webpack internals
        if self.data.get('platform') == 'node' and \
                not frame.get('abs_path').startswith(('/', 'app:', 'webpack:')):
            return

        errors = cache.get_errors(frame['abs_path'])
        if errors:
            all_errors.extend(errors)

        # This might fail but that's okay, we try with a different path a
        # bit later down the road.
        source = self.get_sourceview(frame['abs_path'])

        in_app = None
        new_frame = dict(frame)
        raw_frame = dict(frame)

        sourcemap_url, sourcemap_view = sourcemaps.get_link(frame['abs_path'])
        self.sourcemaps_touched.add(sourcemap_url)
        if sourcemap_view and frame.get('colno') is None:
            all_errors.append({
                'type': EventError.JS_NO_COLUMN,
                'url': http.expose_url(frame['abs_path']),
            })
        elif sourcemap_view:
            if is_data_uri(sourcemap_url):
                sourcemap_label = frame['abs_path']
            else:
                sourcemap_label = sourcemap_url

            sourcemap_label = http.expose_url(sourcemap_label)

            if frame.get('function'):
                minified_function_name = frame['function']
                minified_source = self.get_sourceview(frame['abs_path'])
            else:
                minified_function_name = minified_source = None

            try:
                # Errors are 1-indexed in the frames, so we need to -1 to get
                # zero-indexed value from tokens.
                assert frame['lineno'] > 0, "line numbers are 1-indexed"
                token = sourcemap_view.lookup(frame['lineno'] - 1,
                                              frame['colno'] - 1,
                                              minified_function_name,
                                              minified_source)
            except Exception:
                token = None
                all_errors.append({
                    'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    'column': frame.get('colno'),
                    'row': frame.get('lineno'),
                    'source': frame['abs_path'],
                    'sourcemap': sourcemap_label,
                })

            # persist the token so that we can find it later
            processable_frame.data['token'] = token

            # Store original data in annotation
            new_frame['data'] = dict(frame.get('data') or {},
                                     sourcemap=sourcemap_label)

            sourcemap_applied = True

            if token is not None:
                abs_path = urljoin(sourcemap_url, token.src)

                logger.debug('Mapping compressed source %r to mapping in %r',
                             frame['abs_path'], abs_path)
                source = self.get_sourceview(abs_path)

            if source is None:
                errors = cache.get_errors(abs_path)
                if errors:
                    all_errors.extend(errors)
                else:
                    all_errors.append({
                        'type': EventError.JS_MISSING_SOURCE,
                        'url': http.expose_url(abs_path),
                    })

            if token is not None:
                # the tokens are zero indexed, so offset correctly
                new_frame['lineno'] = token.src_line + 1
                new_frame['colno'] = token.src_col + 1

                # Try to use the function name we got from symbolic
                original_function_name = token.function_name

                # In the ideal case we can use the function name from the
                # frame and the location to resolve the original name
                # through the heuristics in our sourcemap library.
                if original_function_name is None:
                    last_token = None

                    # Find the previous token for function name handling as a
                    # fallback.
                    if processable_frame.previous_frame and \
                       processable_frame.previous_frame.processor is self:
                        last_token = processable_frame.previous_frame.data.get(
                            'token')
                        if last_token:
                            original_function_name = last_token.name

                if original_function_name is not None:
                    new_frame['function'] = original_function_name

                filename = token.src
                # special case webpack support
                # abs_path will always be the full path with webpack:/// prefix.
                # filename will be relative to that
                if abs_path.startswith('webpack:'):
                    filename = abs_path
                    # webpack seems to use ~ to imply "relative to resolver root"
                    # which is generally seen for third party deps
                    # (i.e. node_modules)
                    if '/~/' in filename:
                        filename = '~/' + abs_path.split('/~/', 1)[-1]
                    else:
                        filename = filename.split('webpack:///', 1)[-1]

                    # As noted above:
                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
                    # * [node] sames goes for `./node_modules/`, which is used when bundling node apps
                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
                    #   eg. webpack:///webpack/bootstrap, webpack:///external
                    if filename.startswith('~/') or \
                            filename.startswith('./node_modules/') or \
                            not filename.startswith('./'):
                        in_app = False
                    # And conversely, local dependencies start with './'
                    elif filename.startswith('./'):
                        in_app = True

                    # We want to explicitly generate a webpack module name
                    new_frame['module'] = generate_module(filename)

                if abs_path.startswith('app:'):
                    if filename and NODE_MODULES_RE.search(filename):
                        in_app = False
                    else:
                        in_app = True

                new_frame['abs_path'] = abs_path
                new_frame['filename'] = filename
                if not frame.get('module') and abs_path.startswith(
                    ('http:', 'https:', 'webpack:', 'app:')):
                    new_frame['module'] = generate_module(abs_path)

        elif sourcemap_url:
            new_frame['data'] = dict(new_frame.get('data') or {},
                                     sourcemap=http.expose_url(sourcemap_url))

        # TODO: theoretically a minified source could point to
        # another mapped, minified source
        changed_frame = self.expand_frame(new_frame, source=source)

        # If we did not manage to match but we do have a line or column
        # we want to report an error here.
        if not new_frame.get('context_line') \
           and source and \
           new_frame.get('colno') is not None:
            all_errors.append({
                'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                'column': new_frame['colno'],
                'row': new_frame['lineno'],
                'source': new_frame['abs_path'],
            })

        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
        if sourcemap_applied or all_errors or changed_frame or \
           changed_raw:
            if in_app is not None:
                new_frame['in_app'] = in_app
                raw_frame['in_app'] = in_app
            return [new_frame
                    ], [raw_frame] if changed_raw else None, all_errors
예제 #44
0
def skeletonize(packages, output_dir=".", version=None, recursive=False,
                all_urls=False, pypi_url='https://pypi.io/pypi/', noprompt=False,
                version_compare=False, python_version=default_python, manual_url=False,
                all_extras=False, noarch_python=False, config=None, setup_options=None,
                extra_specs=[],
                pin_numpy=False):
    package_dicts = {}

    if not setup_options:
        setup_options = []

    if isinstance(setup_options, string_types):
        setup_options = [setup_options]

    if not config:
        config = Config()

    created_recipes = []
    while packages:
        package = packages.pop()
        created_recipes.append(package)

        is_url = ':' in package

        if is_url:
            package_pypi_url = ''
        else:
            package_pypi_url = urljoin(pypi_url, '/'.join((package, 'json')))

        if not is_url:
            dir_path = join(output_dir, package.lower())
            if exists(dir_path) and not version_compare:
                raise RuntimeError("directory already exists: %s" % dir_path)
        d = package_dicts.setdefault(package,
            {
                'packagename': package.lower(),
                'run_depends': '',
                'build_depends': '',
                'entry_points': '',
                'test_commands': '',
                'tests_require': '',
            })
        if is_url:
            del d['packagename']

        if is_url:
            d['version'] = 'UNKNOWN'
            # Make sure there is always something to pass in for this
            pypi_data = {}
        else:
            sort_by_version = lambda l: sorted(l, key=parse_version)

            pypi_resp = requests.get(package_pypi_url)

            if pypi_resp.status_code != 200:
                sys.exit("Request to fetch %s failed with status: %d"
                        % (package_pypi_url, pypi_resp.status_code))

            pypi_data = pypi_resp.json()

            versions = sort_by_version(pypi_data['releases'].keys())

            if version_compare:
                version_compare(versions)
            if version:
                if version not in versions:
                    sys.exit("Error: Version %s of %s is not available on PyPI."
                             % (version, package))
                d['version'] = version
            else:
                # select the most visible version from PyPI.
                if not versions:
                    sys.exit("Error: Could not find any versions of package %s" % package)
                if len(versions) > 1:
                    print("Warning, the following versions were found for %s" %
                          package)
                    for ver in versions:
                        print(ver)
                    print("Using %s" % versions[-1])
                    print("Use --version to specify a different version.")
                d['version'] = versions[-1]

        data, d['pypiurl'], d['filename'], d['digest'] = get_download_data(pypi_data,
                                                                           package,
                                                                           d['version'],
                                                                           is_url, all_urls,
                                                                           noprompt, manual_url)

        d['import_tests'] = ''

        # Get summary and description directly from the metadata returned
        # from PyPI. summary will be pulled from package information in
        # get_package_metadata or a default value set if it turns out that
        # data['summary'] is empty.
        d['summary'] = data.get('summary', '')
        d['description'] = data.get('description', '')
        get_package_metadata(package, d, data, output_dir, python_version,
                             all_extras, recursive, created_recipes, noarch_python,
                             noprompt, packages, extra_specs, config=config,
                             setup_options=setup_options)

        # Set these *after* get_package_metadata so that the preferred hash
        # can be calculated from the downloaded file, if necessary.
        d['hash_type'] = d['digest'][0]
        d['hash_value'] = d['digest'][1]

        # Change requirements to use format that guarantees the numpy
        # version will be pinned when the recipe is built and that
        # the version is included in the build string.
        if pin_numpy:
            for depends in ['build_depends', 'run_depends']:
                deps = d[depends]
                numpy_dep = [idx for idx, dep in enumerate(deps)
                             if 'numpy' in dep]
                if numpy_dep:
                    # Turns out this needs to be inserted before the rest
                    # of the numpy spec.
                    deps.insert(numpy_dep[0], 'numpy x.x')
                    d[depends] = deps

    for package in package_dicts:
        d = package_dicts[package]
        name = d['packagename']
        makedirs(join(output_dir, name))
        print("Writing recipe for %s" % package.lower())
        with open(join(output_dir, name, 'meta.yaml'), 'w') as f:
            rendered_recipe = PYPI_META_HEADER.format(**d)

            ordered_recipe = ruamel_yaml.comments.CommentedMap()
            # Create all keys in expected ordered
            for key in EXPECTED_SECTION_ORDER:
                try:
                    ordered_recipe[key] = PYPI_META_STATIC[key]
                except KeyError:
                    ordered_recipe[key] = ruamel_yaml.comments.CommentedMap()

            if d['entry_points']:
                ordered_recipe['build']['entry_points'] = d['entry_points']

            if noarch_python:
                ordered_recipe['build']['noarch'] = 'python'

            ordered_recipe['build']['script'] = 'python setup.py install ' + ' '.join(setup_options)
            if any(re.match(r'^setuptools(?:\s|$)', req) for req in d['build_depends']):
                ordered_recipe['build']['script'] += ('--single-version-externally-managed '
                                                      '--record=record.txt')

            # Always require python as a dependency
            ordered_recipe['requirements'] = ruamel_yaml.comments.CommentedMap()
            ordered_recipe['requirements']['build'] = ['python'] + ensure_list(d['build_depends'])
            ordered_recipe['requirements']['run'] = ['python'] + ensure_list(d['run_depends'])

            if d['import_tests']:
                ordered_recipe['test']['imports'] = d['import_tests']

            if d['test_commands']:
                ordered_recipe['test']['commands'] = d['test_commands']

            if d['tests_require']:
                ordered_recipe['test']['requires'] = d['tests_require']

            ordered_recipe['about'] = ruamel_yaml.comments.CommentedMap()

            for key in ABOUT_ORDER:
                try:
                    ordered_recipe['about'][key] = d[key]
                except KeyError:
                    ordered_recipe['about'][key] = ''
            ordered_recipe['extra']['recipe-maintainers'] = ''

            # Prune any top-level sections that are empty
            for key in EXPECTED_SECTION_ORDER:
                if not ordered_recipe[key]:
                    del ordered_recipe[key]
                else:
                    rendered_recipe += ruamel_yaml.dump({key: ordered_recipe[key]},
                                                Dumper=ruamel_yaml.RoundTripDumper,
                                                default_flow_style=False,
                                                width=200)
                    rendered_recipe += '\n'
            # make sure that recipe ends with one newline, by god.
            rendered_recipe.rstrip()

            # This hackery is necessary because
            #  - the default indentation of lists is not what we would like.
            #    Ideally we'd contact the ruamel.yaml auther to find the right
            #    way to do this. See this PR thread for more:
            #    https://github.com/conda/conda-build/pull/2205#issuecomment-315803714
            #    Brute force fix below.

            # Fix the indents
            recipe_lines = []
            for line in rendered_recipe.splitlines():
                match = re.search('^\s+(-) ', line,
                                  flags=re.MULTILINE)
                if match:
                    pre, sep, post = line.partition('-')
                    sep = '  ' + sep
                    line = pre + sep + post
                recipe_lines.append(line)
            rendered_recipe = '\n'.join(recipe_lines)

            f.write(rendered_recipe)
예제 #45
0
def album_url(albumid):
    return urljoin(BASE_URL, 'release/' + albumid)
예제 #46
0
def track_url(trackid):
    return urljoin(BASE_URL, 'recording/' + trackid)
예제 #47
0
파일: web.py 프로젝트: nvarini1/spack
def _spider(url, visited, root, depth, max_depth, raise_on_error):
    """Fetches URL and any pages it links to up to max_depth.

       depth should initially be zero, and max_depth is the max depth of
       links to follow from the root.

       Prints out a warning only if the root can't be fetched; it ignores
       errors with pages that the root links to.

       Returns a tuple of:
       - pages: dict of pages visited (URL) mapped to their full text.
       - links: set of links encountered while visiting the pages.
    """
    pages = {}     # dict from page URL -> text content.
    links = set()  # set of all links seen on visited pages.

    # root may end with index.html -- chop that off.
    if root.endswith('/index.html'):
        root = re.sub('/index.html$', '', root)

    try:
        context = None
        if sys.version_info < (2, 7, 9) or \
                ((3,) < sys.version_info < (3, 4, 3)):
            if not spack.insecure:
                tty.warn("Spack will not check SSL certificates. You need to "
                         "update your Python to enable certificate "
                         "verification.")
        else:
            # We explicitly create default context to avoid error described in
            # https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html
            context = ssl._create_unverified_context() \
                if spack.insecure \
                else ssl.create_default_context()

        # Make a HEAD request first to check the content type.  This lets
        # us ignore tarballs and gigantic files.
        # It would be nice to do this with the HTTP Accept header to avoid
        # one round-trip.  However, most servers seem to ignore the header
        # if you ask for a tarball with Accept: text/html.
        req = Request(url)
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)

        if "Content-type" not in resp.headers:
            tty.debug("ignoring page " + url)
            return pages, links

        if not resp.headers["Content-type"].startswith('text/html'):
            tty.debug("ignoring page " + url + " with content type " +
                      resp.headers["Content-type"])
            return pages, links

        # Do the real GET request when we know it's just HTML.
        req.get_method = lambda: "GET"
        response = _urlopen(req, timeout=_timeout, context=context)
        response_url = response.geturl()

        # Read the page and and stick it in the map we'll return
        page = response.read().decode('utf-8')
        pages[response_url] = page

        # Parse out the links in the page
        link_parser = LinkParser()
        subcalls = []
        link_parser.feed(page)

        while link_parser.links:
            raw_link = link_parser.links.pop()
            abs_link = urljoin(response_url, raw_link.strip())

            links.add(abs_link)

            # Skip stuff that looks like an archive
            if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES):
                continue

            # Skip things outside the root directory
            if not abs_link.startswith(root):
                continue

            # Skip already-visited links
            if abs_link in visited:
                continue

            # If we're not at max depth, follow links.
            if depth < max_depth:
                subcalls.append((abs_link, visited, root,
                                 depth + 1, max_depth, raise_on_error))
                visited.add(abs_link)

        if subcalls:
            pool = NonDaemonPool(processes=len(subcalls))
            try:
                results = pool.map(_spider_wrapper, subcalls)

                for sub_pages, sub_links in results:
                    pages.update(sub_pages)
                    links.update(sub_links)

            finally:
                pool.terminate()
                pool.join()

    except URLError as e:
        tty.debug(e)

        if hasattr(e, 'reason') and isinstance(e.reason, ssl.SSLError):
            tty.warn("Spack was unable to fetch url list due to a certificate "
                     "verification problem. You can try running spack -k, "
                     "which will not check SSL certificates. Use this at your "
                     "own risk.")

        if raise_on_error:
            raise NoNetworkConnectionError(str(e), url)

    except HTMLParseError as e:
        # This error indicates that Python's HTML parser sucks.
        msg = "Got an error parsing HTML."

        # Pre-2.7.3 Pythons in particular have rather prickly HTML parsing.
        if sys.version_info[:3] < (2, 7, 3):
            msg += " Use Python 2.7.3 or newer for better HTML parsing."

        tty.warn(msg, url, "HTMLParseError: " + str(e))

    except Exception as e:
        # Other types of errors are completely ignored, except in debug mode.
        tty.debug("Error in _spider: %s:%s" % (type(e), e),
                  traceback.format_exc())

    return pages, links
예제 #48
0
def setup_injection_workflow(workflow,
                             output_dir=None,
                             inj_section_name='injections',
                             exttrig_file=None,
                             tags=None):
    """
    This function is the gateway for setting up injection-generation jobs in a
    workflow. It should be possible for this function to support a number
    of different ways/codes that could be used for doing this, however as this
    will presumably stay as a single call to a single code (which need not be
    inspinj) there are currently no subfunctions in this moudle.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which injection files will be stored.
    inj_section_name : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.

    Returns
    --------
    inj_files : pycbc.workflow.core.FileList
        The list of injection files created by this call.
    inj_tags : list of strings
        The tag corresponding to each injection file and used to uniquely
        identify them. The FileList class contains functions to search
        based on tags.
    """
    if tags is None:
        tags = []
    logging.info("Entering injection module.")
    make_analysis_dir(output_dir)

    # Get full analysis segment for output file naming
    full_segment = workflow.analysis_time
    ifos = workflow.ifos

    # Identify which injections to do by presence of sub-sections in
    # the configuration file
    inj_tags = []
    inj_files = FileList([])

    for section in workflow.cp.get_subsections(inj_section_name):
        inj_tag = section.upper()
        curr_tags = tags + [inj_tag]

        # Parse for options in ini file
        injection_method = workflow.cp.get_opt_tags("workflow-injections",
                                                    "injections-method",
                                                    curr_tags)

        if injection_method in ["IN_WORKFLOW", "AT_RUNTIME"]:
            # FIXME: Add ability to specify different exes
            inj_job = LalappsInspinjExecutable(workflow.cp,
                                               inj_section_name,
                                               out_dir=output_dir,
                                               ifos='HL',
                                               tags=curr_tags)
            node = inj_job.create_node(full_segment)
            if injection_method == "AT_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]
            inj_files.append(inj_file)
        elif injection_method == "PREGENERATED":
            injectionFilePath = workflow.cp.get_opt_tags(
                "workflow-injections", "injections-pregenerated-file",
                curr_tags)
            injectionFilePath = resolve_url(injectionFilePath)
            file_url = urljoin('file:', pathname2url(injectionFilePath))
            inj_file = File('HL',
                            'PREGEN_inj_file',
                            full_segment,
                            file_url,
                            tags=curr_tags)
            inj_file.PFN(injectionFilePath, site='local')
            inj_files.append(inj_file)
        elif injection_method in ["IN_COH_PTF_WORKFLOW", "AT_COH_PTF_RUNTIME"]:
            inj_job = LalappsInspinjExecutable(workflow.cp,
                                               inj_section_name,
                                               out_dir=output_dir,
                                               ifos=ifos,
                                               tags=curr_tags)
            node = inj_job.create_node(full_segment, exttrig_file)
            if injection_method == "AT_COH_PTF_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections", "em-bright-only"):
                em_filter_job = PycbcDarkVsBrightInjectionsExecutable(
                    workflow.cp,
                    'em_bright_filter',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = em_filter_job.create_node(inj_file, full_segment,
                                                 curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-jitter-skyloc"):
                jitter_job = LigolwCBCJitterSkylocExecutable(
                    workflow.cp,
                    'jitter_skyloc',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = jitter_job.create_node(inj_file, full_segment,
                                              curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-align-total-spin"):
                align_job = LigolwCBCAlignTotalSpinExecutable(
                    workflow.cp,
                    'align_total_spin',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = align_job.create_node(inj_file, full_segment, curr_tags)

                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            inj_files.append(inj_file)
        else:
            err = "Injection method must be one of IN_WORKFLOW, "
            err += "AT_RUNTIME or PREGENERATED. Got %s." % (injection_method)
            raise ValueError(err)

        inj_tags.append(inj_tag)

    logging.info("Leaving injection module.")
    return inj_files, inj_tags
예제 #49
0
    def _get_system_stats(self, server, name, instance, tags):
        url = urljoin(server, "/_node/{0}/_system".format(name))

        # Fetch _system (Erlang) stats.
        return self.agent_check.get(url, instance, tags)
예제 #50
0
    def check(self, instance):
        kubelet_conn_info = get_connection_info()
        endpoint = kubelet_conn_info.get('url')
        if endpoint is None:
            raise CheckException(
                "Unable to detect the kubelet URL automatically.")

        self.kube_health_url = urljoin(endpoint, KUBELET_HEALTH_PATH)
        self.node_spec_url = urljoin(endpoint, NODE_SPEC_PATH)
        self.pod_list_url = urljoin(endpoint, POD_LIST_PATH)
        self.stats_url = urljoin(endpoint, STATS_PATH)
        self.instance_tags = instance.get('tags', [])
        self.kubelet_credentials = KubeletCredentials(kubelet_conn_info)

        # Test the kubelet health ASAP
        self._perform_kubelet_check(self.instance_tags)

        if 'cadvisor_metrics_endpoint' in instance:
            self.cadvisor_scraper_config['prometheus_url'] = instance.get(
                'cadvisor_metrics_endpoint',
                urljoin(endpoint, CADVISOR_METRICS_PATH))
        else:
            self.cadvisor_scraper_config['prometheus_url'] = instance.get(
                'metrics_endpoint', urljoin(endpoint, CADVISOR_METRICS_PATH))

        if 'metrics_endpoint' in instance:
            self.log.warning(
                'metrics_endpoint is deprecated, please specify cadvisor_metrics_endpoint instead.'
            )

        self.kubelet_scraper_config['prometheus_url'] = instance.get(
            'kubelet_metrics_endpoint', urljoin(endpoint,
                                                KUBELET_METRICS_PATH))

        # Kubelet credentials handling
        self.kubelet_credentials.configure_scraper(
            self.cadvisor_scraper_config)
        self.kubelet_credentials.configure_scraper(self.kubelet_scraper_config)

        # Legacy cadvisor support
        try:
            self.cadvisor_legacy_url = self.detect_cadvisor(
                endpoint, self.cadvisor_legacy_port)
        except Exception as e:
            self.log.debug(
                'cAdvisor not found, running in prometheus mode: %s', e)

        self.pod_list = self.retrieve_pod_list()
        self.pod_list_utils = PodListUtils(self.pod_list)

        self._report_node_metrics(self.instance_tags)
        self._report_pods_running(self.pod_list, self.instance_tags)
        self._report_container_spec_metrics(self.pod_list, self.instance_tags)
        self._report_container_state_metrics(self.pod_list, self.instance_tags)

        self.stats = self._retrieve_stats()
        self._report_ephemeral_storage_usage(self.pod_list, self.stats,
                                             self.instance_tags)
        self._report_system_container_metrics(self.stats, self.instance_tags)

        if self.cadvisor_legacy_url:  # Legacy cAdvisor
            self.log.debug('processing legacy cadvisor metrics')
            self.process_cadvisor(instance, self.cadvisor_legacy_url,
                                  self.pod_list, self.pod_list_utils)
        elif self.cadvisor_scraper_config['prometheus_url']:  # Prometheus
            self.log.debug('processing cadvisor metrics')
            self.process(self.cadvisor_scraper_config,
                         metric_transformers=self.transformers)

        if self.kubelet_scraper_config['prometheus_url']:  # Prometheus
            self.log.debug('processing kubelet metrics')
            self.process(self.kubelet_scraper_config,
                         metric_transformers=self.transformers)

        # Free up memory
        self.pod_list = None
        self.pod_list_utils = None