Beispiel #1
0
def webathena_kerberos_login(
    request: HttpRequest,
    user_profile: UserProfile,
    cred: Optional[str] = REQ(default=None)
) -> HttpResponse:
    global kerberos_alter_egos
    if cred is None:
        return json_error(_("Could not find Kerberos credential"))
    if not user_profile.realm.webathena_enabled:
        return json_error(_("Webathena login not enabled"))

    try:
        parsed_cred = orjson.loads(cred)
        user = parsed_cred["cname"]["nameString"][0]
        if user in kerberos_alter_egos:
            user = kerberos_alter_egos[user]
        assert (user == user_profile.email.split("@")[0])
        # Limit characters in usernames to valid MIT usernames
        # This is important for security since DNS is not secure.
        assert (re.match(r'^[a-z0-9_.-]+$', user) is not None)
        ccache = make_ccache(parsed_cred)

        # 'user' has been verified to contain only benign characters that won't
        # help with shell injection.
        user = mark_sanitized(user)

        # 'ccache' is only written to disk by the script and used as a kerberos
        # credential cache file.
        ccache = mark_sanitized(ccache)
    except Exception:
        return json_error(_("Invalid Kerberos cache"))

    # TODO: Send these data via (say) rabbitmq
    try:
        api_key = get_api_key(user_profile)
        command = [
            "/home/zulip/python-zulip-api/zulip/integrations/zephyr/process_ccache",
            user,
            api_key,
            base64.b64encode(ccache).decode("utf-8"),
        ]
        subprocess.check_call([
            "ssh", settings.PERSONAL_ZMIRROR_SERVER, "--",
            " ".join(map(shlex.quote, command))
        ])
    except subprocess.CalledProcessError:
        logging.exception("Error updating the user's ccache", stack_info=True)
        return json_error(_("We were unable to setup mirroring for you"))

    return json_success()
Beispiel #2
0
    def _handle_consume_exception(self, events: List[Dict[str, Any]],
                                  exception: Exception) -> None:
        if isinstance(exception, InterruptConsumeException):
            # The exception signals that no further error handling
            # is needed and the worker can proceed.
            return

        with configure_scope() as scope:
            scope.set_context(
                "events",
                {
                    "data": events,
                    "queue_name": self.queue_name,
                },
            )
            if isinstance(exception, WorkerTimeoutException):
                with sentry_sdk.push_scope() as scope:
                    scope.fingerprint = ["worker-timeout", self.queue_name]
                    logging.exception(exception, stack_info=True)
            else:
                logging.exception("Problem handling data on queue %s",
                                  self.queue_name,
                                  stack_info=True)
        if not os.path.exists(settings.QUEUE_ERROR_DIR):
            os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
        # Use 'mark_sanitized' to prevent Pysa from detecting this false positive
        # flow. 'queue_name' is always a constant string.
        fname = mark_sanitized(f"{self.queue_name}.errors")
        fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
        line = f"{time.asctime()}\t{orjson.dumps(events).decode()}\n"
        lock_fn = fn + ".lock"
        with lockfile(lock_fn):
            with open(fn, "a") as f:
                f.write(line)
        check_and_send_restart_signal()
Beispiel #3
0
def get_link_embed_data(
        url: str,
        maxwidth: Optional[int] = 640,
        maxheight: Optional[int] = 480) -> Optional[Dict[str, Any]]:
    if not is_link(url):
        return None

    if not valid_content_type(url):
        return None

    # We are using two different mechanisms to get the embed data
    # 1. Use OEmbed data, if found, for photo and video "type" sites
    # 2. Otherwise, use a combination of Open Graph tags and Meta tags
    data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {}
    if data.get('oembed'):
        return data

    response = requests.get(mark_sanitized(url),
                            stream=True,
                            headers=HEADERS,
                            timeout=TIMEOUT)
    if response.ok:
        og_data = OpenGraphParser(response.text).extract_data()
        for key in ['title', 'description', 'image']:
            if not data.get(key) and og_data.get(key):
                data[key] = og_data[key]

        generic_data = GenericParser(response.text).extract_data() or {}
        for key in ['title', 'description', 'image']:
            if not data.get(key) and generic_data.get(key):
                data[key] = generic_data[key]
    return data
Beispiel #4
0
 def _handle_consume_exception(self, events: List[Dict[str, Any]],
                               exception: Exception) -> None:
     with configure_scope() as scope:
         scope.set_context("events", {
             "data": events,
             "queue_name": self.queue_name,
         })
         if isinstance(exception, WorkerTimeoutException):
             with sentry_sdk.push_scope() as scope:
                 scope.fingerprint = ['worker-timeout', self.queue_name]
                 logging.exception("%s in queue %s",
                                   str(exception),
                                   self.queue_name,
                                   stack_info=True)
         else:
             logging.exception("Problem handling data on queue %s",
                               self.queue_name,
                               stack_info=True)
     if not os.path.exists(settings.QUEUE_ERROR_DIR):
         os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
     # Use 'mark_sanitized' to prevent Pysa from detecting this false positive
     # flow. 'queue_name' is always a constant string.
     fname = mark_sanitized(f'{self.queue_name}.errors')
     fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
     line = f'{time.asctime()}\t{orjson.dumps(events).decode()}\n'
     lock_fn = fn + '.lock'
     with lockfile(lock_fn):
         with open(fn, 'a') as f:
             f.write(line)
     check_and_send_restart_signal()
    def _index_for(self, minified_src: str) -> sourcemap.SourceMapDecoder:
        '''Return the source map index for minified_src, loading it if not
           already loaded.'''

        # Prevent path traversal
        assert ".." not in minified_src and "/" not in minified_src

        if minified_src not in self._indices:
            for source_dir in self._dirs:
                filename = os.path.join(source_dir, minified_src + '.map')
                if os.path.isfile(filename):
                    # Use 'mark_sanitized' to force Pysa to ignore the fact that
                    # 'filename' is user controlled. While putting user
                    # controlled data into a filesystem operation is bad, in
                    # this case it's benign because 'filename' can't traverse
                    # directories outside of the pre-configured 'sourcemap_dirs'
                    # (due to the above assertions) and will always end in
                    # '.map'. Additionally, the result of this function is used
                    # for error logging and not returned to the user, so
                    # controlling the loaded file would not be useful to an
                    # attacker.
                    with open(mark_sanitized(filename)) as fp:
                        self._indices[minified_src] = sourcemap.load(fp)
                        break

        return self._indices[minified_src]
Beispiel #6
0
def check_add_realm_emoji(realm: Realm, name: str, author: UserProfile,
                          image_file: IO[bytes]) -> RealmEmoji:
    try:
        realm_emoji = RealmEmoji(realm=realm, name=name, author=author)
        realm_emoji.full_clean()
        realm_emoji.save()
    except django.db.utils.IntegrityError:
        # Match the string in upload_emoji.
        raise JsonableError(_("A custom emoji with this name already exists."))

    emoji_file_name = get_emoji_file_name(image_file.name, realm_emoji.id)

    # The only user-controlled portion of 'emoji_file_name' is an extension,
    # which can not contain '..' or '/' or '\', making it difficult to exploit
    emoji_file_name = mark_sanitized(emoji_file_name)

    emoji_uploaded_successfully = False
    is_animated = False
    try:
        is_animated = upload_emoji_image(image_file, emoji_file_name, author)
        emoji_uploaded_successfully = True
    finally:
        if not emoji_uploaded_successfully:
            realm_emoji.delete()
    realm_emoji.file_name = emoji_file_name
    realm_emoji.is_animated = is_animated
    realm_emoji.save(update_fields=["file_name", "is_animated"])
    notify_realm_emoji(realm_emoji.realm)
    return realm_emoji
Beispiel #7
0
def get_link_embed_data(url: str,
                        maxwidth: int = 640,
                        maxheight: int = 480) -> Optional[Dict[str, Any]]:
    if not is_link(url):
        return None

    if not valid_content_type(url):
        return None

    # We are using two different mechanisms to get the embed data
    # 1. Use OEmbed data, if found, for photo and video "type" sites
    # 2. Otherwise, use a combination of Open Graph tags and Meta tags
    data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight) or {}
    if data.get("oembed"):
        return data

    response = PreviewSession().get(mark_sanitized(url), stream=True)
    if response.ok:
        og_data = OpenGraphParser(
            response.content,
            response.headers.get("Content-Type")).extract_data()
        for key in ["title", "description", "image"]:
            if not data.get(key) and og_data.get(key):
                data[key] = og_data[key]

        generic_data = (GenericParser(
            response.content,
            response.headers.get("Content-Type")).extract_data() or {})
        for key in ["title", "description", "image"]:
            if not data.get(key) and generic_data.get(key):
                data[key] = generic_data[key]
    return data
Beispiel #8
0
def get_link_embed_data(url: str,
                        maxwidth: int = 640,
                        maxheight: int = 480) -> Optional[UrlEmbedData]:
    if not is_link(url):
        return None

    if not valid_content_type(url):
        return None

    # The oembed data from pyoembed may be complete enough to return
    # as-is; if so, we use it.  Otherwise, we use it as a _base_ for
    # the other, less sophisticated techniques which we apply as
    # successive fallbacks.
    data = get_oembed_data(url, maxwidth=maxwidth, maxheight=maxheight)
    if data is not None and isinstance(data, UrlOEmbedData):
        return data

    response = PreviewSession().get(mark_sanitized(url), stream=True)
    if not response.ok:
        return None

    if data is None:
        data = UrlEmbedData()

    for parser_class in (OpenGraphParser, GenericParser):
        parser = parser_class(response.content,
                              response.headers.get("Content-Type"))
        data.merge(parser.extract_data())

    if data.image:
        data.image = urljoin(response.url, data.image)
    return data
Beispiel #9
0
def get_safe_redirect_to(url: str, redirect_host: str) -> str:
    is_url_safe = url_has_allowed_host_and_scheme(url=url, allowed_hosts=None)
    if is_url_safe:
        # Mark as safe to prevent Pysa from surfacing false positives for
        # open redirects. In this branch, we have already checked that the URL
        # points to the specified 'redirect_host', or is relative.
        return urllib.parse.urljoin(redirect_host, mark_sanitized(url))
    else:
        return redirect_host
Beispiel #10
0
def get_zoom_sid(request: HttpRequest) -> str:
    # This is used to prevent CSRF attacks on the Zoom OAuth
    # authentication flow.  We want this value to be unpredictable and
    # tied to the session, but we don’t want to expose the main CSRF
    # token directly to the Zoom server.

    csrf.get_token(request)
    # Use 'mark_sanitized' to cause Pysa to ignore the flow of user controlled
    # data out of this function. 'request.META' is indeed user controlled, but
    # post-HMAC output is no longer meaningfully controllable.
    return mark_sanitized(
        "" if getattr(request, "_dont_enforce_csrf_checks", False) else
        salted_hmac("Zulip Zoom sid", request.META["CSRF_COOKIE"]).hexdigest())
Beispiel #11
0
 def _handle_consume_exception(self, events: List[Dict[str, Any]]) -> None:
     self._log_problem()
     if not os.path.exists(settings.QUEUE_ERROR_DIR):
         os.mkdir(settings.QUEUE_ERROR_DIR)  # nocoverage
     # Use 'mark_sanitized' to prevent Pysa from detecting this false positive
     # flow. 'queue_name' is always a constant string.
     fname = mark_sanitized(f'{self.queue_name}.errors')
     fn = os.path.join(settings.QUEUE_ERROR_DIR, fname)
     line = f'{time.asctime()}\t{orjson.dumps(events).decode()}\n'
     lock_fn = fn + '.lock'
     with lockfile(lock_fn):
         with open(fn, 'ab') as f:
             f.write(line.encode('utf-8'))
     check_and_send_restart_signal()
Beispiel #12
0
def login_and_go_to_home(request: HttpRequest, user_profile: UserProfile) -> HttpResponse:
    mobile_flow_otp = get_expirable_session_var(request.session, 'registration_mobile_flow_otp',
                                                delete=True)
    desktop_flow_otp = get_expirable_session_var(request.session, 'registration_desktop_flow_otp',
                                                 delete=True)
    if mobile_flow_otp is not None:
        return finish_mobile_flow(request, user_profile, mobile_flow_otp)
    elif desktop_flow_otp is not None:
        return finish_desktop_flow(request, user_profile, desktop_flow_otp)

    do_login(request, user_profile)
    # Using 'mark_sanitized' to work around false positive where Pysa thinks
    # that 'user_profile' is user-controlled
    return HttpResponseRedirect(mark_sanitized(user_profile.realm.uri) + reverse('home'))
Beispiel #13
0
def add_query_arg_to_redirect_url(original_url: str, query_arg: str) -> str:
    assert '?' in original_url
    # Using 'mark_sanitized' because user-controlled data after the '?' is
    # not relevant for open redirects
    return original_url + "&" + mark_sanitized(query_arg)
Beispiel #14
0
def maybe_send_to_registration(
        request: HttpRequest,
        email: str,
        full_name: str = '',
        mobile_flow_otp: Optional[str] = None,
        desktop_flow_otp: Optional[str] = None,
        is_signup: bool = False,
        password_required: bool = True,
        multiuse_object_key: str = '',
        full_name_validated: bool = False) -> HttpResponse:
    """Given a successful authentication for an email address (i.e. we've
    confirmed the user controls the email address) that does not
    currently have a Zulip account in the target realm, send them to
    the registration flow or the "continue to registration" flow,
    depending on is_signup, whether the email address can join the
    organization (checked in HomepageForm), and similar details.
    """

    # In the desktop and mobile registration flows, the sign up
    # happens in the browser so the user can use their
    # already-logged-in social accounts.  Then at the end, with the
    # user account created, we pass the appropriate data to the app
    # via e.g. a `zulip://` redirect.  We store the OTP keys for the
    # mobile/desktop flow in the session with 1-hour expiry, because
    # we want this configuration of having a successful authentication
    # result in being logged into the app to persist if the user makes
    # mistakes while trying to authenticate (E.g. clicks the wrong
    # Google account, hits back, etc.) during a given browser session,
    # rather than just logging into the webapp in the target browser.
    #
    # We can't use our usual pre-account-creation state storage
    # approach of putting something in PreregistrationUser, because
    # that would apply to future registration attempts on other
    # devices, e.g. just creating an account on the web on their laptop.
    assert not (mobile_flow_otp and desktop_flow_otp)
    if mobile_flow_otp:
        set_expirable_session_var(request.session,
                                  'registration_mobile_flow_otp',
                                  mobile_flow_otp,
                                  expiry_seconds=3600)
    elif desktop_flow_otp:
        set_expirable_session_var(request.session,
                                  'registration_desktop_flow_otp',
                                  desktop_flow_otp,
                                  expiry_seconds=3600)

    if multiuse_object_key:
        from_multiuse_invite = True
        multiuse_obj = Confirmation.objects.get(
            confirmation_key=multiuse_object_key).content_object
        realm = multiuse_obj.realm
        invited_as = multiuse_obj.invited_as
    else:
        from_multiuse_invite = False
        multiuse_obj = None
        try:
            realm = get_realm(get_subdomain(request))
        except Realm.DoesNotExist:
            realm = None
        invited_as = PreregistrationUser.INVITE_AS['MEMBER']

    form = HomepageForm({'email': email},
                        realm=realm,
                        from_multiuse_invite=from_multiuse_invite)
    if form.is_valid():
        # If the email address is allowed to sign up for an account in
        # this organization, construct a PreregistrationUser and
        # Confirmation objects, and then send the user to account
        # creation or confirm-continue-registration depending on
        # is_signup.
        try:
            prereg_user = PreregistrationUser.objects.filter(
                email__iexact=email, realm=realm).latest("invited_at")

            # password_required and full_name data passed here as argument should take precedence
            # over the defaults with which the existing PreregistrationUser that we've just fetched
            # was created.
            prereg_user.password_required = password_required
            update_fields = ["password_required"]
            if full_name:
                prereg_user.full_name = full_name
                prereg_user.full_name_validated = full_name_validated
                update_fields.extend(["full_name", "full_name_validated"])
            prereg_user.save(update_fields=update_fields)
        except PreregistrationUser.DoesNotExist:
            prereg_user = create_preregistration_user(
                email,
                request,
                password_required=password_required,
                full_name=full_name,
                full_name_validated=full_name_validated,
            )

        if multiuse_obj is not None:
            request.session.modified = True
            streams_to_subscribe = list(multiuse_obj.streams.all())
            prereg_user.streams.set(streams_to_subscribe)
            prereg_user.invited_as = invited_as
            prereg_user.save()

        # We want to create a confirmation link to create an account
        # in the current realm, i.e. one with a hostname of
        # realm.host.  For the Apache REMOTE_USER_SSO auth code path,
        # this is preferable over realm.get_host() because the latter
        # contains the port number of the Apache instance and we want
        # to send the user back to nginx.  But if we're in the realm
        # creation code path, there might not be a realm yet, so we
        # have to use request.get_host().
        if realm is not None:
            host = realm.host
        else:
            host = request.get_host()
        # Mark 'host' as safe for use in a redirect. It's pulled from the
        # current request or realm, both of which only allow a limited set of
        # trusted hosts.
        confirmation_link = create_confirmation_link(
            prereg_user, mark_sanitized(host), Confirmation.USER_REGISTRATION)
        if is_signup:
            return redirect(confirmation_link)

        context = {
            'email': email,
            'continue_link': confirmation_link,
            'full_name': full_name
        }
        return render(request,
                      'zerver/confirm_continue_registration.html',
                      context=context)

    # This email address it not allowed to join this organization, so
    # just send the user back to the registration page.
    url = reverse('register')
    context = login_context(request)
    extra_context: Mapping[str, Any] = {
        'form': form,
        'current_url': lambda: url,
        'from_multiuse_invite': from_multiuse_invite,
        'multiuse_object_key': multiuse_object_key,
        'mobile_flow_otp': mobile_flow_otp,
        'desktop_flow_otp': desktop_flow_otp,
    }
    context.update(extra_context)
    return render(request, 'zerver/accounts_home.html', context=context)