Beispiel #1
0
def validate_url(url):
    """
    Validate an HTTP(S) URL as a link for a user's profile.

    Helper for use with Colander that validates a URL provided by a user as a
    link for their profile.

    Returns the normalized URL if successfully parsed or raises a ValueError
    otherwise.
    """

    # Minimal URL validation with urlparse. This is extremely lenient, we might
    # want to use something like https://github.com/kvesteri/validators instead.
    parsed_url = urlparse.urlparse(url)

    if not parsed_url.scheme:
        parsed_url = urlparse.urlparse("http://" + url)

    if not re.match("https?", parsed_url.scheme):
        raise ValueError('Links must have an "http" or "https" prefix')

    if not parsed_url.netloc:
        raise ValueError("Links must include a domain name")

    return parsed_url.geturl()
Beispiel #2
0
def validate_url(url):
    """
    Validate an HTTP(S) URL as a link for a user's profile.

    Helper for use with Colander that validates a URL provided by a user as a
    link for their profile.

    Returns the normalized URL if successfully parsed or raises a ValueError
    otherwise.
    """

    # Minimal URL validation with urlparse. This is extremely lenient, we might
    # want to use something like https://github.com/kvesteri/validators instead.
    parsed_url = urlparse.urlparse(url)

    if not parsed_url.scheme:
        parsed_url = urlparse.urlparse('http://' + url)

    if not re.match('https?', parsed_url.scheme):
        raise ValueError('Links must have an "http" or "https" prefix')

    if not parsed_url.netloc:
        raise ValueError('Links must include a domain name')

    return parsed_url.geturl()
Beispiel #3
0
def chrome_manifest(request):
    # Chrome is strict about the format of the version string
    if '+' in h.__version__:
        tag, detail = h.__version__.split('+')
        distance, commit = detail.split('.', 1)
        version = '{}.{}'.format(tag, distance)
        version_name = commit
    else:
        version = h.__version__
        version_name = 'Official Build'

    src = request.resource_url(request.context)

    # We need to use only the host and port for the CSP script-src when
    # developing. If we provide a path such as /assets the CSP check fails.
    # See:
    #
    #   https://developer.chrome.com/extensions/contentSecurityPolicy#relaxing-remote-script
    if urlparse.urlparse(src).hostname not in ('localhost', '127.0.0.1'):
        src = urlparse.urljoin(src, request.webassets_env.url)

    value = {'src': src, 'version': version, 'version_name': version_name}

    return render('h:browser/chrome/manifest.json.jinja2',
                  value,
                  request=request)
Beispiel #4
0
    def _find_http_or_https_uri(self, document):
        for docuri in document.document_uris:
            uri = urlparse.urlparse(docuri.uri)
            if uri.scheme in ['http', 'https']:
                return uri

        return None
Beispiel #5
0
def wildcard_uri_is_valid(wildcard_uri):
    """
    Return True if uri contains wildcards in appropriate places, return False otherwise.

    *'s and _'s are not permitted in the scheme or netloc aka:
        scheme://netloc/path;parameters?query#fragment.

    If a wildcard is near the begining of a url, elasticsearch will find a large portion of the
    annotations because it is based on luncene which searches from left to right. In order to
    avoid the performance implications of having such a large initial search space, wildcards are
    not allowed in the begining of the url.
    """
    if "*" not in wildcard_uri and "_" not in wildcard_uri:
        return False

    # Note: according to the URL spec _'s are allowed in the domain so this may be
    # something that needs to be supported at a later date.
    normalized_uri = urlparse.urlparse(wildcard_uri)
    if (
        not normalized_uri.scheme
        or "*" in normalized_uri.netloc
        or "_" in normalized_uri.netloc
    ):
        return False

    return True
Beispiel #6
0
    def link_text(self):
        """
        Return some link text for this document.

        Return a text representation of this document suitable for use as the
        link text in a link like <a ...>{link_text}</a>.

        Returns the document's title if it has one, or failing that uses part
        of the document's URI if it has one.

        The link text is escaped and safe for rendering.

        If it contains escaped characters the returned value will be a
        Markup object so it doesn't get double-escaped.

        """
        title = jinja2.escape(self.title)

        # Sometimes self.title is the annotated document's URI (if the document
        # has no title). In those cases we want to remove the http(s):// from
        # the front and unquote it for link text.
        lower = title.lower()
        if lower.startswith('http://') or lower.startswith('https://'):
            parts = urlparse.urlparse(title)
            return url_unquote(parts.netloc + parts.path)

        else:
            return title
Beispiel #7
0
Datei: query.py Projekt: kael/h
def wildcard_uri_is_valid(wildcard_uri):
    """
    Return True if uri contains wildcards in appropriate places, return False otherwise.

    Wildcards are not permitted in the scheme or netloc of the uri.
    """
    if "*" not in wildcard_uri and "?" not in wildcard_uri:
        return False
    try:
        normalized_uri = urlparse.urlparse(
            wildcard_uri.replace("*", "").replace("?", ""))

        # Remove all parts of the url except the scheme, netloc, and provide a substitute
        # path value "p" so that uri's that only have a scheme and path are still valid.
        uri_parts = (normalized_uri.scheme, normalized_uri.netloc, "p", "", "",
                     "")

        # Remove the "p" standing for path from the end of the uri.
        begining_of_uri = urlparse.urlunparse(uri_parts)[:-1]

        # If a wildcard was in the scheme the uri may come back as "" (a falsey value).
        if begining_of_uri and wildcard_uri.startswith(begining_of_uri):
            return True
    except ValueError:
        pass
    return False
Beispiel #8
0
    def hostname_or_filename(self):
        """
        Return the hostname or filename of this document.

        Returns the hostname part of the document's URI, e.g.
        'www.example.com' for 'http://www.example.com/example.html'.

        If the URI is a file:// URI then return the filename part of it
        instead.

        The returned hostname or filename is escaped and safe to be rendered.

        If it contains escaped characters the returned value will be a Markup
        object so that it doesn't get double-escaped.

        """
        if self.filename:
            return jinja2.escape(url_unquote(self.filename))
        else:
            hostname = urlparse.urlparse(self.uri).hostname

            # urlparse()'s .hostname is sometimes None.
            hostname = hostname or ''

            return jinja2.escape(hostname)
Beispiel #9
0
    def link_text(self):
        """
        Return some link text for this document.

        Return a text representation of this document suitable for use as the
        link text in a link like <a ...>{link_text}</a>.

        Returns the document's title if it has one, or failing that uses part
        of the document's URI if it has one.

        The link text is escaped and safe for rendering.

        If it contains escaped characters the returned value will be a
        Markup object so it doesn't get double-escaped.

        """
        title = jinja2.escape(self.title)

        # Sometimes self.title is the annotated document's URI (if the document
        # has no title). In those cases we want to remove the http(s):// from
        # the front and unquote it for link text.
        lower = title.lower()
        if lower.startswith('http://') or lower.startswith('https://'):
            parts = urlparse.urlparse(title)
            return url_unquote(parts.netloc + parts.path)

        else:
            return title
Beispiel #10
0
    def hostname_or_filename(self):
        """
        Return the hostname or filename of this document.

        Returns the hostname part of the document's URI, e.g.
        'www.example.com' for 'http://www.example.com/example.html'.

        If the URI is a file:// URI then return the filename part of it
        instead.

        The returned hostname or filename is escaped and safe to be rendered.

        If it contains escaped characters the returned value will be a Markup
        object so that it doesn't get double-escaped.

        """
        if self.filename:
            return jinja2.escape(url_unquote(self.filename))
        else:
            hostname = urlparse.urlparse(self.uri).hostname

            # urlparse()'s .hostname is sometimes None.
            hostname = hostname or ''

            return jinja2.escape(hostname)
Beispiel #11
0
    def _render_web_message_response(self, redirect_uri):
        print("_render_web_message_response", redirect_uri)

        request_filtred = self.request.url
        try:
            foundscheme = re.search('origin=(.+?)%', request_filtred).group(1)
            foundnetloc = re.search('%3A%2F%2F(.+?)&response_mode',
                                    request_filtred).group(1)
        except AttributeError:
            # AAA, ZZZ not found in the original string
            foundscheme = ''  # apply your error handling
            foundnetloc = ''
        print("found", foundscheme)
        print("found", foundnetloc)

        location = urlparse.urlparse(redirect_uri)
        print(location)
        params = urlparse.parse_qs(location.query)
        if (foundscheme == ''):
            origin = '{l.scheme}://{l.netloc}'.format(l=location)
        else:
            origin = foundscheme + '://' + foundnetloc
        print(origin)
        state = None
        states = params.get('state', [])
        if states:
            state = states[0]

        return {
            'code': params.get('code', [])[0],
            'origin': origin,
            'state': state,
        }
Beispiel #12
0
def chrome_manifest(request):
    # Chrome is strict about the format of the version string
    if '+' in h.__version__:
        tag, detail = h.__version__.split('+')
        distance, commit = detail.split('.', 1)
        version = '{}.{}'.format(tag, distance)
        version_name = commit
    else:
        version = h.__version__
        version_name = 'Official Build'

    src = request.resource_url(request.context)

    # We need to use only the host and port for the CSP script-src when
    # developing. If we provide a path such as /assets the CSP check fails.
    # See:
    #
    #   https://developer.chrome.com/extensions/contentSecurityPolicy#relaxing-remote-script
    if urlparse.urlparse(src).hostname not in ('localhost', '127.0.0.1'):
        src = urlparse.urljoin(src, request.webassets_env.url)

    value = {
        'src': src,
        'version': version,
        'version_name': version_name
    }

    return render('h:browser/chrome/manifest.json.jinja2', value,
                  request=request)
Beispiel #13
0
    def _find_http_or_https_uri(self, document):
        for docuri in document.document_uris:
            uri = urlparse.urlparse(docuri.uri)
            if uri.scheme in ['http', 'https']:
                return uri

        return None
 def first_http_url(type_=None):
     for document_uri in self.document_uris:
         uri = document_uri.uri
         if type_ is not None and document_uri.type != type_:
             continue
         if urlparse.urlparse(uri).scheme not in ["http", "https"]:
             continue
         return document_uri.uri
 def first_http_url(type_=None):
     for document_uri in self.document_uris:
         uri = document_uri.uri
         if type_ is not None and document_uri.type != type_:
             continue
         if urlparse.urlparse(uri).scheme not in ["http", "https"]:
             continue
         return document_uri.uri
Beispiel #16
0
def wildcard_uri_is_valid(wildcard_uri):
    """
    Return True if uri contains wildcards in appropriate places, return False otherwise.

    *'s are not permitted in the scheme or netloc. ?'s are not permitted in the scheme.
    """
    if "*" not in wildcard_uri and "?" not in wildcard_uri:
        return False

    normalized_uri = urlparse.urlparse(wildcard_uri.replace("?", ""))
    if not normalized_uri.scheme or "*" in normalized_uri.netloc:
        return False

    normalized_uri = urlparse.urlparse(wildcard_uri.replace("*", ""))
    if not normalized_uri.scheme:
        return False

    return True
    def test_get_redirects_to_login_when_not_authenticated(self, controller, pyramid_request, view_name):
        with pytest.raises(httpexceptions.HTTPFound) as exc:
            view = getattr(controller, view_name)
            view()

        parsed_url = urlparse.urlparse(exc.value.location)
        assert parsed_url.path == '/login'
        assert urlparse.parse_qs(parsed_url.query) == {'next': [pyramid_request.url],
                                                       'for_oauth': ['True']}
Beispiel #18
0
def bad_csrf_token_html(context, request):
    request.response.status_code = 403

    next_path = '/'
    referer = urlparse.urlparse(request.referer or '')
    if referer.hostname == request.domain:
        next_path = referer.path

    login_path = request.route_path('login', _query={'next': next_path})
    return {'login_path': login_path}
Beispiel #19
0
def bad_csrf_token_html(context, request):
    request.response.status_code = 403

    next_path = '/'
    referer = urlparse.urlparse(request.referer or '')
    if referer.hostname == request.domain:
        next_path = referer.path

    login_path = request.route_path('login', _query={'next': next_path})
    return {'login_path': login_path}
Beispiel #20
0
def bad_csrf_token_html(context, request):
    request.response.status_code = 403

    next_path = "/"
    referer = urlparse.urlparse(request.referer or "")
    if referer.hostname == request.domain:
        next_path = referer.path

    login_path = request.route_path("login", _query={"next": next_path})
    return {"login_path": login_path}
Beispiel #21
0
def bad_csrf_token_html(context, request):
    request.response.status_code = 403

    next_path = "/"
    referer = urlparse.urlparse(request.referer or "")
    if referer.hostname == request.domain:
        next_path = referer.path

    login_path = request.route_path("login", _query={"next": next_path})
    return {"login_path": login_path}
Beispiel #22
0
def build_type_from_api_url(api_url):
    """
    Returns the default build type ('production', 'staging' or 'dev')
    when building an extension that communicates with the given service
    """
    host = urlparse.urlparse(api_url).netloc
    if host == 'hypothes.is':
        return 'production'
    elif host == 'stage.hypothes.is':
        return 'staging'
    else:
        return 'dev'
Beispiel #23
0
def build_type_from_api_url(api_url):
    """
    Returns the default build type ('production', 'staging' or 'dev')
    when building an extension that communicates with the given service
    """
    host = urlparse.urlparse(api_url).netloc
    if host == 'hypothes.is':
        return 'production'
    elif host == 'stage.hypothes.is':
        return 'staging'
    else:
        return 'dev'
Beispiel #24
0
    def test_get_redirects_to_login_when_not_authenticated(
        self, controller, pyramid_request, view_name
    ):
        with pytest.raises(httpexceptions.HTTPFound) as exc:
            view = getattr(controller, view_name)
            view()

        parsed_url = urlparse.urlparse(exc.value.location)
        assert parsed_url.path == "/login"
        assert urlparse.parse_qs(parsed_url.query) == {
            "next": [pyramid_request.url],
            "for_oauth": ["True"],
        }
Beispiel #25
0
def pretty_link(url):
    """
    Return a nicely formatted version of a URL.

    This strips off 'visual noise' from the URL including common schemes
    (HTTP, HTTPS), domain prefixes ('www.') and query strings.
    """
    parsed = urlparse.urlparse(url)
    if parsed.scheme not in ['http', 'https']:
        return url
    netloc = parsed.netloc
    if netloc.startswith('www.'):
        netloc = netloc[4:]
    return url_unquote(netloc + parsed.path)
Beispiel #26
0
def tag_uri_for_annotation(annotation, annotation_url):
    """Return a tag URI (unique identifier) for the given annotation.

    Suitable for use as the value of the <id> element of an <entry> in an
    Atom feed, or the <guid> element of an <item> in an RSS feed.

    :returns: A tag URI (RFC 4151) for use as the ID for the Atom entry.
    :rtype: string

    """
    domain = urlparse.urlparse(annotation_url(annotation)).hostname
    return u"tag:{domain},{date}:{id_}".format(domain=domain,
                                               date=FEED_TAG_DATE,
                                               id_=annotation.id)
Beispiel #27
0
def pretty_link(url):
    """
    Return a nicely formatted version of a URL.

    This strips off 'visual noise' from the URL including common schemes
    (HTTP, HTTPS), domain prefixes ('www.') and query strings.
    """
    parsed = urlparse.urlparse(url)
    if parsed.scheme not in ['http', 'https']:
        return url
    netloc = parsed.netloc
    if netloc.startswith('www.'):
        netloc = netloc[4:]
    return url_unquote(netloc + parsed.path)
Beispiel #28
0
def tag_uri_for_annotation(annotation, annotation_url):
    """Return a tag URI (unique identifier) for the given annotation.

    Suitable for use as the value of the <id> element of an <entry> in an
    Atom feed, or the <guid> element of an <item> in an RSS feed.

    :returns: A tag URI (RFC 4151) for use as the ID for the Atom entry.
    :rtype: string

    """
    domain = urlparse.urlparse(annotation_url(annotation)).hostname
    return u"tag:{domain},{date}:{id_}".format(domain=domain,
                                               date=FEED_TAG_DATE,
                                               id_=annotation.id)
Beispiel #29
0
    def _render_web_message_response(self, redirect_uri):
        location = urlparse.urlparse(redirect_uri)
        params = urlparse.parse_qs(location.query)
        origin = '{url.scheme}://{url.netloc}'.format(url=location)

        state = None
        states = params.get('state', [])
        if states:
            state = states[0]

        return {
            'code': params.get('code', [])[0],
            'origin': origin,
            'state': state,
        }
Beispiel #30
0
    def _render_web_message_response(self, redirect_uri):
        location = urlparse.urlparse(redirect_uri)
        params = urlparse.parse_qs(location.query)
        origin = '{l.scheme}://{l.netloc}'.format(l=location)

        state = None
        states = params.get('state', [])
        if states:
            state = states[0]

        return {
            'code': params.get('code', [])[0],
            'origin': origin,
            'state': state,
        }
Beispiel #31
0
    def _render_web_message_response(self, redirect_uri):
        location = urlparse.urlparse(redirect_uri)
        params = urlparse.parse_qs(location.query)
        origin = "{url.scheme}://{url.netloc}".format(url=location)

        state = None
        states = params.get("state", [])
        if states:
            state = states[0]

        return {
            "code": params.get("code", [])[0],
            "origin": origin,
            "state": state
        }
Beispiel #32
0
    def __init__(self, document, annotations=None):
        self.annotations = []
        self.tags = set()
        self.users = set()
        self.uri = None

        self.title = document.title

        if document.web_uri:
            parsed = urlparse.urlparse(document.web_uri)
            self.uri = parsed.geturl()
            self.domain = parsed.netloc
        else:
            self.domain = _('Local file')

        if annotations:
            self.update(annotations)
Beispiel #33
0
        def first_http_url(type_=None):
            """
            Return this document's first http(s) URL of the given type.

            Return None if this document doesn't have any http(s) URLs of the
            given type.

            If no type is given just return this document's first http(s)
            URL, or None.

            """
            for document_uri in self.document_uris:
                uri = document_uri.uri
                if type_ is not None and document_uri.type != type_:
                    continue
                if urlparse.urlparse(uri).scheme not in ['http', 'https']:
                    continue
                return document_uri.uri
Beispiel #34
0
def back_link(context, request):
    """
    A link which takes the user back to the previous page on the site.
    """

    referrer_path = urlparse.urlparse(request.referrer or "").path
    current_username = request.user.username

    if referrer_path == request.route_path(
        "activity.user_search", username=current_username
    ):
        back_label = _("Back to your profile page")
    elif _matches_route(referrer_path, request, "group_read"):
        back_label = _("Back to group overview page")
    else:
        back_label = None

    return {"back_label": back_label, "back_location": request.referrer}
Beispiel #35
0
        def first_http_url(type_=None):
            """
            Return this document's first http(s) URL of the given type.

            Return None if this document doesn't have any http(s) URLs of the
            given type.

            If no type is given just return this document's first http(s)
            URL, or None.

            """
            for document_uri in self.document_uris:
                uri = document_uri.uri
                if type_ is not None and document_uri.type != type_:
                    continue
                if urlparse.urlparse(uri).scheme not in ['http', 'https']:
                    continue
                return document_uri.uri
Beispiel #36
0
    def __init__(self, document, annotations=None):
        self.annotations = []
        self.tags = set()
        self.users = set()
        self.uri = None

        self.title = document.title

        presented_document = presenters.DocumentHTMLPresenter(document)

        if presented_document.web_uri:
            parsed = urlparse.urlparse(presented_document.web_uri)
            self.uri = parsed.geturl()
            self.domain = parsed.netloc
        else:
            self.domain = _("Local file")

        if annotations:
            self.update(annotations)
Beispiel #37
0
    def __init__(self, document, annotations=None):
        self.annotations = []
        self.tags = set()
        self.users = set()
        self.uri = None

        self.title = document.title

        presented_document = presenters.DocumentHTMLPresenter(document)

        if presented_document.web_uri:
            parsed = urlparse.urlparse(presented_document.web_uri)
            self.uri = parsed.geturl()
            self.domain = parsed.netloc
        else:
            self.domain = _("Local file")

        if annotations:
            self.update(annotations)
Beispiel #38
0
def _app_html_context(assets_env, api_url, service_url, ga_tracking_id,
                      sentry_public_dsn, websocket_url):
    """
    Returns a dict of asset URLs and contents used by the sidebar app
    HTML tempate.
    """

    if urlparse.urlparse(service_url).hostname == 'localhost':
        ga_cookie_domain = 'none'
    else:
        ga_cookie_domain = 'auto'

    # the serviceUrl parameter must contain a path element
    service_url = url_with_path(service_url)

    app_config = {
        'apiUrl': api_url,
        'serviceUrl': service_url,
    }

    if websocket_url:
        app_config.update({
            'websocketUrl': websocket_url,
        })

    if sentry_public_dsn:
        app_config.update({
            'raven': {
                'dsn': sentry_public_dsn,
                'release': __version__
            }
        })

    return {
        'app_config': json.dumps(app_config),
        'angular_templates': map(_angular_template_context,
                                 ANGULAR_DIRECTIVE_TEMPLATES),
        'app_css_urls': assets_env.urls('app_css'),
        'app_js_urls': assets_env.urls('app_js'),
        'ga_tracking_id': ga_tracking_id,
        'ga_cookie_domain': ga_cookie_domain,
        'register_url': service_url + 'register',
    }
Beispiel #39
0
def back_link(context, request):
    """
    A link which takes the user back to the previous page on the site.
    """

    referrer_path = urlparse.urlparse(request.referrer or '').path
    current_username = request.authenticated_user.username

    if referrer_path == request.route_path('activity.user_search',
                                           username=current_username):
        back_label = _('Back to your profile page')
    elif _matches_route(referrer_path, request, 'group_read'):
        back_label = _('Back to group overview page')
    else:
        back_label = None

    return {
        'back_label': back_label,
        'back_location': request.referrer,
    }
Beispiel #40
0
def back_link(context, request):
    """
    A link which takes the user back to the previous page on the site.
    """

    referrer_path = urlparse.urlparse(request.referrer or '').path
    current_username = request.user.username

    if referrer_path == request.route_path('activity.user_search',
                                           username=current_username):
        back_label = _('Back to your profile page')
    elif _matches_route(referrer_path, request, 'group_read'):
        back_label = _('Back to group overview page')
    else:
        back_label = None

    return {
        'back_label': back_label,
        'back_location': request.referrer,
    }
Beispiel #41
0
def wildcard_uri_is_valid(wildcard_uri):
    """
    Return True if uri contains wildcards in appropriate places, return False otherwise.

    *'s and _'s are not permitted in the scheme or netloc aka:
        scheme://netloc/path;parameters?query#fragment.

    If a wildcard is near the begining of a url, elasticsearch will find a large portion of the
    annotations because it is based on luncene which searches from left to right. In order to
    avoid the performance implications of having such a large initial search space, wildcards are
    not allowed in the begining of the url.
    """
    if "*" not in wildcard_uri and "_" not in wildcard_uri:
        return False

    # Note: according to the URL spec _'s are allowed in the domain so this may be
    # something that needs to be supported at a later date.
    normalized_uri = urlparse.urlparse(wildcard_uri)
    if (not normalized_uri.scheme or "*" in normalized_uri.netloc
            or "_" in normalized_uri.netloc):
        return False

    return True
def _document_web_uri(document):
    for docuri in document.document_uris:
        uri = urlparse.urlparse(docuri.uri)
        if uri.scheme in ['http', 'https']:
            return docuri.uri
Beispiel #43
0
 def domain(user):
     if not user.uri:
         return None
     return urlparse.urlparse(user.uri).netloc
Beispiel #44
0
# -*- coding: utf-8 -*-
import os
from h._compat import urlparse

# Smart detect heroku stack and assume a trusted proxy.
# This is a convenience that should hopefully not be too surprising.
if 'heroku' in os.environ.get('LD_LIBRARY_PATH', ''):
    forwarded_allow_ips = '*'

if 'STATSD_PORT' in os.environ:
    statsd_host = urlparse.urlparse(os.environ['STATSD_PORT_8125_UDP']).netloc


def post_fork(server, worker):
    # Support back-ported SSL changes on Debian / Ubuntu
    import _ssl
    import gevent.hub
    if hasattr(_ssl, 'SSLContext') and not hasattr(_ssl, '_sslwrap'):
        gevent.hub.PYGTE279 = True

    # Patch psycopg2 if we're asked to by the worker class
    if getattr(server.worker_class, 'use_psycogreen', False):
        import psycogreen.gevent
        psycogreen.gevent.patch_psycopg()
        worker.log.info("Made psycopg green")
Beispiel #45
0
def execute(request, query, page_size):
    search_result = _execute_search(request, query, page_size)

    result = ActivityResults(total=search_result.total,
                             aggregations=search_result.aggregations,
                             timeframes=[])

    if result.total == 0:
        return result

    # Load all referenced annotations from the database, bucket them, and add
    # the buckets to result.timeframes.
    anns = fetch_annotations(request.db, search_result.annotation_ids)
    result.timeframes.extend(bucketing.bucket(anns))

    # Fetch all groups
    group_pubids = set([a.groupid
                        for t in result.timeframes
                        for b in t.document_buckets.values()
                        for a in b.annotations])
    groups = {g.pubid: g for g in _fetch_groups(request.db, group_pubids)}

    # Add group information to buckets and present annotations
    result.docs = dict()
    for timeframe in result.timeframes:
        for bucket in timeframe.document_buckets.values():
            bucket.presented_annotations = []
            for annotation in bucket.annotations:
                bucket.presented_annotations.append({
                    'annotation': presenters.AnnotationHTMLPresenter(annotation),
                    'group': groups.get(annotation.groupid),
                    'html_link': links.html_link(request, annotation),
                    'incontext_link': links.incontext_link(request, annotation)
                })
                # XXX: redo the bucketing, fake a bucket
                if annotation.document.title not in result.docs:
                    result.docs[annotation.document.title] = doc = lambda: None
                    doc.title = annotation.document.title
                    doc.annotations = []
                    doc.tags = set()
                    doc.users = set()
                    doc.annotations_count = 0
                    doc.incontext_link = types.MethodType(_incontext_link, doc)

                    presented_document = presenters.DocumentHTMLPresenter(annotation.document)

                    if presented_document.web_uri:
                        parsed = urlparse.urlparse(presented_document.web_uri)
                        doc.uri = parsed.geturl()
                        doc.domain = parsed.netloc
                    else:
                        doc.domain = _('Local file')

                doc = result.docs[annotation.document.title]
                doc.annotations.append(annotation)
                doc.tags.update(set(annotation.tags))
                doc.users.add(annotation.userid)
                doc.annotations_count += 1
    for key in result.docs:
        doc = result.docs[key]
        doc.annotations = sorted(doc.annotations, cmp=_sort_by_position)
        doc.presented_annotations = []
        for annotation in doc.annotations:
            doc.presented_annotations.append({
                'annotation': presenters.AnnotationHTMLPresenter(annotation),
                'group': groups.get(annotation.groupid),
                'html_link': links.html_link(request, annotation),
                'incontext_link': links.incontext_link(request, annotation)
            })

    return result
Beispiel #46
0
def build_chrome(args):
    """
    Build the Chrome extension. You can supply the base URL of an h
    installation with which this extension will communicate, such as
    "http://localhost:5000" when developing locally or
    "https://hypothes.is" to talk to the production Hypothesis application.
    """
    service_url = args.service_url
    if not service_url.endswith('/'):
        service_url = '{}/'.format(service_url)

    webassets_env = get_webassets_env(base_dir='./build/chrome/public',
                                      assets_url='/public',
                                      debug=args.debug)

    # Prepare a fresh build.
    clean('build/chrome')
    os.makedirs('build/chrome')
    content_dir = webassets_env.directory
    os.makedirs(content_dir)

    # Bundle the extension assets.
    copytree('h/browser/chrome/content', 'build/chrome/content')
    copytree('h/browser/chrome/help', 'build/chrome/help')
    copytree('h/browser/chrome/images', 'build/chrome/images')
    copytree('h/static/images', 'build/chrome/public/images')

    os.makedirs('build/chrome/lib')

    subprocess_args = ['node_modules/.bin/browserify',
                       'h/browser/chrome/lib/extension.js', '--outfile',
                       'build/chrome/lib/extension-bundle.js']
    if args.debug:
        subprocess_args.append('--debug')
    subprocess.call(subprocess_args)

    # Render the sidebar html
    api_url = '{}api/'.format(service_url)

    if args.bundle_sidebar:
        build_extension_common(webassets_env, service_url, bundle_app=True)
        with codecs.open(content_dir + '/app.html', 'w', 'utf-8') as fp:
            data = client.render_app_html(
                api_url=api_url,
                service_url=service_url,

                # Google Analytics tracking is currently not enabled
                # for the extension
                ga_tracking_id=None,
                webassets_env=webassets_env,
                websocket_url=args.websocket_url,
                sentry_public_dsn=args.sentry_public_dsn)
            fp.write(data)
    else:
        build_extension_common(webassets_env, service_url)

    # Render the manifest.
    with codecs.open('build/chrome/manifest.json', 'w', 'utf-8') as fp:
        script_url = urlparse.urlparse(webassets_env.url)
        if script_url.scheme and script_url.netloc:
            script_host_url = '{}://{}'.format(script_url.scheme,
                                               script_url.netloc)
        else:
            script_host_url = None
        data = chrome_manifest(script_host_url)
        fp.write(data)

    # Write build settings to a JSON file
    with codecs.open('build/chrome/settings-data.js', 'w', 'utf-8') as fp:
        settings = settings_dict(service_url, api_url, args.sentry_public_dsn)
        fp.write('window.EXTENSION_CONFIG = ' + json.dumps(settings))
Beispiel #47
0
def includeme(config):
    settings = config.registry.settings

    # We need to include `h.models` before pretty much everything else to
    # avoid the possibility that one of the imports below directly or
    # indirectly imports `memex.models`. See the comment at the top of
    # `h.models` for details.
    #
    # FIXME: h modules should not access `memex.models`, even indirectly,
    # except through `h.models`.
    config.include('h.models')

    config.set_root_factory('h.resources:Root')

    config.add_subscriber('h.subscribers.add_renderer_globals',
                          'pyramid.events.BeforeRender')
    config.add_subscriber('h.subscribers.publish_annotation_event',
                          'memex.events.AnnotationEvent')
    config.add_subscriber('h.subscribers.send_reply_notifications',
                          'memex.events.AnnotationEvent')

    config.add_tween('h.tweens.conditional_http_tween_factory', under=EXCVIEW)
    config.add_tween('h.tweens.redirect_tween_factory')
    config.add_tween('h.tweens.csrf_tween_factory')
    config.add_tween('h.tweens.auth_token')
    config.add_tween('h.tweens.security_header_tween_factory')

    config.add_renderer('csv', 'h.renderers.CSV')
    config.add_request_method(in_debug_mode, 'debug', reify=True)

    config.include('pyramid_jinja2')
    config.add_jinja2_extension('h.jinja_extensions.Filters')
    config.add_jinja2_extension('h.jinja_extensions.SvgIcon')
    # Register a deferred action to setup the assets environment
    # when the configuration is committed.
    config.action(None, configure_jinja2_assets, args=(config, ))

    # Pyramid layouts: provides support for reusable components ('panels')
    # that are used across multiple pages
    config.include('pyramid_layout')

    config.registry.settings.setdefault(
        'mail.default_sender', '"Annotation Daemon" <no-reply@localhost>')
    config.include('pyramid_mailer')

    # Pyramid service layer: provides infrastructure for registering and
    # retrieving services bound to the request.
    config.include('pyramid_services')

    # Configure the transaction manager to support retrying retryable
    # exceptions, and generate a new transaction manager for each request.
    config.add_settings({
        "tm.attempts":
        3,
        "tm.manager_hook":
        lambda request: transaction.TransactionManager(),
        "tm.annotate_user":
        False,
    })
    config.include('pyramid_tm')

    # Define the global default Content Security Policy
    client_url = settings.get('h.client_url', DEFAULT_CLIENT_URL)
    client_host = urlparse.urlparse(client_url).netloc
    settings['csp'] = {
        "font-src": ["'self'", "fonts.gstatic.com", client_host],
        "script-src": ["'self'", client_host, "www.google-analytics.com"],

        # Allow inline styles until https://github.com/hypothesis/client/issues/293
        # is resolved as otherwise our own tool would break on the site,
        # including on /docs/help.
        "style-src":
        ["'self'", "fonts.googleapis.com", client_host, "'unsafe-inline'"],
    }
    if 'csp.report_uri' in settings:
        settings['csp']['report-uri'] = [settings['csp.report_uri']]

    # API module
    #
    # We include this first so that:
    # - configuration directives provided by modules in `memex` are available
    #   to the rest of the application at startup.
    # - we can override behaviour from `memex` if necessary.
    config.include('memex', route_prefix='/api')

    # Override memex group service
    config.register_service_factory(
        'h.services.groupfinder.groupfinder_service_factory',
        iface='memex.interfaces.IGroupService')

    # Core site modules
    config.include('h.assets')
    config.include('h.auth')
    config.include('h.authz')
    config.include('h.db')
    config.include('h.features')
    config.include('h.form')
    config.include('h.indexer')
    config.include('h.panels')
    config.include('h.realtime')
    config.include('h.routes')
    config.include('h.sentry')
    config.include('h.services')
    config.include('h.session')
    config.include('h.stats')
    config.include('h.views')
    config.include('h.viewderivers')

    # Site modules
    config.include('h.accounts')
    config.include('h.admin')
    config.include('h.groups')
    config.include('h.links')
    config.include('h.nipsa')
    config.include('h.notification')

    # Debugging assistance
    if asbool(config.registry.settings.get('h.debug')):
        config.include('pyramid_debugtoolbar')
        config.include('h.debug')
Beispiel #48
0
Datei: app.py Projekt: rowhit/h
def includeme(config):
    settings = config.registry.settings

    config.set_root_factory('h.resources:Root')

    config.add_subscriber('h.subscribers.add_renderer_globals',
                          'pyramid.events.BeforeRender')
    config.add_subscriber('h.subscribers.publish_annotation_event',
                          'h.events.AnnotationEvent')
    config.add_subscriber('h.subscribers.send_reply_notifications',
                          'h.events.AnnotationEvent')

    config.add_tween('h.tweens.conditional_http_tween_factory', under=EXCVIEW)
    config.add_tween('h.tweens.redirect_tween_factory')
    config.add_tween('h.tweens.csrf_tween_factory')
    config.add_tween('h.tweens.auth_token')
    config.add_tween('h.tweens.security_header_tween_factory')

    config.add_request_method(in_debug_mode, 'debug', reify=True)

    config.include('pyramid_jinja2')
    config.add_jinja2_extension('h.jinja_extensions.Filters')
    config.add_jinja2_extension('h.jinja_extensions.SvgIcon')
    # Register a deferred action to setup the assets environment
    # when the configuration is committed.
    config.action(None, configure_jinja2_assets, args=(config, ))

    # Pyramid layouts: provides support for reusable components ('panels')
    # that are used across multiple pages
    config.include('pyramid_layout')

    config.registry.settings.setdefault(
        'mail.default_sender', '"Annotation Daemon" <no-reply@localhost>')
    if asbool(config.registry.settings.get('h.debug')):
        config.include('pyramid_mailer.debug')
    else:
        config.include('pyramid_mailer')

    # Pyramid service layer: provides infrastructure for registering and
    # retrieving services bound to the request.
    config.include('pyramid_services')

    # Configure the transaction manager to support retrying retryable
    # exceptions, and generate a new transaction manager for each request.
    config.add_settings({
        "tm.attempts":
        3,
        "tm.manager_hook":
        lambda request: transaction.TransactionManager(),
        "tm.annotate_user":
        False,
    })
    config.include('pyramid_tm')

    # Define the global default Content Security Policy
    client_url = settings.get('h.client_url', DEFAULT_CLIENT_URL)
    client_host = urlparse.urlparse(client_url).netloc
    settings['csp'] = {
        "font-src": ["'self'", "fonts.gstatic.com", client_host],
        "script-src": ["'self'", client_host, "www.google-analytics.com"],

        # Allow inline styles until https://github.com/hypothesis/client/issues/293
        # is resolved as otherwise our own tool would break on the site,
        # including on /docs/help.
        "style-src":
        ["'self'", "fonts.googleapis.com", client_host, "'unsafe-inline'"],
    }
    if 'csp.report_uri' in settings:
        settings['csp']['report-uri'] = [settings['csp.report_uri']]

    # Core site modules
    config.include('h.assets')
    config.include('h.auth')
    config.include('h.authz')
    config.include('h.db')
    config.include('h.eventqueue')
    config.include('h.form')
    config.include('h.indexer')
    config.include('h.panels')
    config.include('h.realtime')
    config.include('h.routes')
    config.include('h.search')
    config.include('h.sentry')
    config.include('h.services')
    config.include('h.session')
    config.include('h.stats')
    config.include('h.viewderivers')
    config.include('h.viewpredicates')
    config.include('h.views')

    # Site modules
    config.include('h.accounts')
    config.include('h.groups')
    config.include('h.links')
    config.include('h.nipsa')
    config.include('h.notification')

    # Debugging assistance
    if asbool(config.registry.settings.get('h.debug')):
        config.include('pyramid_debugtoolbar')
Beispiel #49
0
def check_sentry_dsn_is_public(dsn):
    parsed_dsn = urlparse.urlparse(dsn)
    if parsed_dsn.password:
        raise argparse.ArgumentTypeError(
            "Must be a public Sentry DSN which does not contain a secret key.")
    return dsn
Beispiel #50
0
def check_sentry_dsn_is_public(dsn):
    parsed_dsn = urlparse.urlparse(dsn)
    if parsed_dsn.password:
        raise argparse.ArgumentTypeError(
            "Must be a public Sentry DSN which does not contain a secret key.")
    return dsn
def _document_web_uri(document):
    for docuri in document.document_uris:
        uri = urlparse.urlparse(docuri.uri)
        if uri.scheme in ["http", "https"]:
            return docuri.uri
Beispiel #52
0
def _setup_statsd(settings):
    if 'STATSD_PORT' in os.environ:
        statsd_host = urlparse.urlparse(os.environ['STATSD_PORT_8125_UDP'])
        settings['statsd.host'] = statsd_host.hostname
        settings['statsd.port'] = statsd_host.port
Beispiel #53
0
 def domain(user):
     if not user.uri:
         return None
     return urlparse.urlparse(user.uri).netloc
Beispiel #54
0
def includeme(config):
    settings = config.registry.settings

    config.set_root_factory('h.traversal:Root')

    config.add_subscriber('h.subscribers.add_renderer_globals',
                          'pyramid.events.BeforeRender')
    config.add_subscriber('h.subscribers.publish_annotation_event',
                          'h.events.AnnotationEvent')
    config.add_subscriber('h.subscribers.send_reply_notifications',
                          'h.events.AnnotationEvent')

    config.add_tween('h.tweens.conditional_http_tween_factory', under=EXCVIEW)
    config.add_tween('h.tweens.redirect_tween_factory')
    config.add_tween('h.tweens.csrf_tween_factory')
    config.add_tween('h.tweens.invalid_path_tween_factory')
    config.add_tween('h.tweens.security_header_tween_factory')
    config.add_tween('h.tweens.cache_header_tween_factory')
    config.add_tween('h.tweens.encode_headers_tween_factory')

    config.add_request_method(in_debug_mode, 'debug', reify=True)

    config.include('pyramid_jinja2')
    config.add_jinja2_extension('h.jinja_extensions.Filters')
    config.add_jinja2_extension('h.jinja_extensions.SvgIcon')
    # Register a deferred action to setup the assets environment
    # when the configuration is committed.
    config.action(None, configure_jinja2_assets, args=(config,))

    # Pyramid layouts: provides support for reusable components ('panels')
    # that are used across multiple pages
    config.include('pyramid_layout')

    config.registry.settings.setdefault('mail.default_sender',
                                        '"Annotation Daemon" <no-reply@localhost>')
    if asbool(config.registry.settings.get('h.debug')):
        config.include('pyramid_mailer.debug')
    else:
        config.include('pyramid_mailer')

    # Pyramid service layer: provides infrastructure for registering and
    # retrieving services bound to the request.
    config.include('pyramid_services')

    # Configure the transaction manager to support retrying retryable
    # exceptions, and generate a new transaction manager for each request.
    config.add_settings({
        "tm.attempts": 3,
        "tm.manager_hook": lambda request: transaction.TransactionManager(),
        "tm.annotate_user": False,
    })
    config.include('pyramid_tm')

    # Define the global default Content Security Policy
    client_url = settings.get('h.client_url', DEFAULT_CLIENT_URL)
    client_host = urlparse.urlparse(client_url).netloc
    settings['csp'] = {
        "font-src": ["'self'", "fonts.gstatic.com", client_host],
        "script-src": ["'self'", client_host, "www.google-analytics.com"],

        # Allow inline styles until https://github.com/hypothesis/client/issues/293
        # is resolved as otherwise our own tool would break on the site,
        # including on /docs/help.
        "style-src": ["'self'", "fonts.googleapis.com", client_host,
                      "'unsafe-inline'"],
    }
    if 'csp.report_uri' in settings:
        settings['csp']['report-uri'] = [settings['csp.report_uri']]

    # Core site modules
    config.include('h.assets')
    config.include('h.auth')
    config.include('h.authz')
    config.include('h.db')
    config.include('h.eventqueue')
    config.include('h.form')
    config.include('h.indexer')
    config.include('h.panels')
    config.include('h.realtime')
    config.include('h.renderers')
    config.include('h.routes')
    config.include('h.search')
    config.include('h.sentry')
    config.include('h.services')
    config.include('h.session')
    config.include('h.stats')
    config.include('h.viewderivers')
    config.include('h.viewpredicates')
    config.include('h.views')

    # Site modules
    config.include('h.accounts')
    config.include('h.groups')
    config.include('h.links')
    config.include('h.nipsa')
    config.include('h.notification')

    # Debugging assistance
    if asbool(config.registry.settings.get('h.debug')):
        config.include('pyramid_debugtoolbar')
Beispiel #55
0
def url_with_path(url):
    if urlparse.urlparse(url).path == '':
        return '{}/'.format(url)
    else:
        return url