Beispiel #1
0
def timestamp_to_datetime(timestamp):
    """
    Convert a string timestamp to a datetime object.

    :param str timestamp: a generic or ISO-8601 timestamp
    :return: datetime object of the timestamp
    :rtype: datetime.datetime
    :raises ValueError: if the timestamp is an unsupported or invalid format
    """
    log.debug('Trying to parse the timestamp "{0}"'.format(timestamp))
    error_msg = 'The timestamp "{0}" is an invalid format'.format(timestamp)
    combinations = (
        (r'^(?P<datetime>\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2})(?:\.\d+)?$',
         '%Y-%m-%d %H:%M:%S'),
        (r'^(?P<datetime>\d{4}-\d{1,2}-\d{1,2})$', '%Y-%m-%d'),
        # ISO 8601 format
        (r'^(?P<datetime>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(?:\.\d+)?(?:Z|[-+]00(?::00)?)?$',
         '%Y-%m-%dT%H:%M:%S'))

    for combination in combinations:
        regex_match = re.match(combination[0], timestamp)
        if regex_match:
            try:
                return datetime.strptime(regex_match.group('datetime'),
                                         combination[1])
            except ValueError:
                # In case the user asked for an unreleastic date like "2020:99:99"
                raise ValueError(error_msg)

    raise ValueError(error_msg)
Beispiel #2
0
    def get_connection(self, db_name, force_new=False, retry=None):
        """
        Return an existing psycopg2 connection and establish it if needed.

        :param str db_name: the database name to get a connection to
        :kwarg bool force_new: forces a new database connection even if one
        already exists
        :kwarg int retry: the number of times to retry a failed connection. If this
        is not set, then the Teiid connection attempt will be repeated until it is successful.
        :return: a connection to Teiid
        :rtype: psycopg2 connection
        """
        if not force_new and db_name in self._connections:
            return self._connections[db_name]
        if retry is not None and retry < 1:
            raise ValueError(
                'The retry keyword must contain a value greater than 0')

        log.debug('Connecting to Teiid host {0}:{1}'.format(
            self.host, self.port))
        attempts = 0
        while True:
            attempts += 1
            try:
                conn = psycopg2.connect(database=db_name,
                                        host=self.host,
                                        port=str(self.port),
                                        user=self.username,
                                        password=self.password,
                                        connect_timeout=300)
                break
            except psycopg2.OperationalError as e:
                if retry and attempts > retry:
                    raise
                else:
                    log.exception(e)
                    log.warning(
                        'The Teiid connection failed on attempt {0}. Sleeping for 60 '
                        'seconds.'.format(attempts))
                    sleep(60)

        # Teiid does not support setting this value at all and unless we
        # specify ISOLATION_LEVEL_AUTOCOMMIT (zero), psycopg2 will send a
        # SET command to the Teiid server doesn't understand.
        conn.set_isolation_level(0)

        self._connections[db_name] = conn
        return conn
Beispiel #3
0
    def run(self, since=None, until=None):
        """
        Run the dist-git scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        log.info('Starting initial load of dist-git commits')
        if since is None:
            start_date = self.default_since
        else:
            start_date = timestamp_to_date(since)

        if until is None:
            end_date = self.default_until
        else:
            end_date = timestamp_to_date(until)
        results = self.get_distgit_data(start_date, end_date)
        total_results = len(results)
        log.info('Successfully fetched {0} results from Teiid'.format(
            total_results))
        # Overwrite results with the formatted results so we don't have to store both in RAM
        results = list(self._get_result_chunks(results))
        # Upload the results to Neo4j using multi-processing to process chunks of results. We don't
        # use pool so that way the process doesn't get reused and the RAM is returned to the OS.
        # This will aid in a work-around for a memory leak from one of the libraries used that
        # couldn't be tracked down.
        procs = []
        concurrent_procs = 2
        for i, result in enumerate(results):
            # Only check if we've reached the process limit after it's technically possible
            if i >= concurrent_procs:
                active_procs = [_proc for _proc in procs if _proc.is_alive()]
                if len(active_procs) >= concurrent_procs:
                    log.debug(
                        'There are already {0} processes running. Will wait until one of '
                        'them completes.'.format(len(active_procs)))
                    active_procs[0].join()
            proc = Process(target=self._update_neo4j,
                           args=(neomodel_config.DATABASE_URL, total_results,
                                 result))
            proc.start()
            procs.append(proc)

        for proc in procs:
            # Wait for all the processes to finish
            proc.join()
        log.info('Initial load of dist-git commits complete!')
Beispiel #4
0
def is_user_authorized(username, employee_type):
    """
    Verify the user is authorized to access the application.

    :param str username: the username from the user's token
    :param str employee_type: the employee type from the user's token
    :return: a boolean that determines if the user is authorized
    :rtype: bool
    """
    employee_types = current_app.config.get('EMPLOYEE_TYPES', [])
    if employee_type in employee_types:
        log.debug('The user %s is an employee', username)
        return True

    ldap_group_dn = current_app.config.get('LDAP_EXCEPTIONS_GROUP_DN')
    if ldap_group_dn and username in _get_exception_users():
        log.debug(
            'The user %s is not considered an employee but is an exception',
            username)
        return True

    return False
Beispiel #5
0
    def query_api_and_update_neo4j(self):
        """
        Scrape the Freshmaker API and upload the data to Neo4j.

        :param str start_date: a datetime to start scraping data from
        """
        # Initialize session and url
        session = retry_session()
        fm_url = self.freshmaker_url
        while True:
            log.debug('Querying {0}'.format(fm_url))
            try:
                rv_json = session.get(fm_url, timeout=60).json()
            except ConnectionError:
                # TODO: Remove this once FACTORY-3955 is resolved
                log.error(
                    'The connection to Freshmaker at %s failed. Skipping the rest of the scraper.',
                    fm_url,
                )
                break

            for fm_event in rv_json['items']:
                try:
                    int(fm_event['search_key'])
                except ValueError:
                    # Skip Freshmaker Events that don't have the search_key as the Advisory ID
                    continue
                log.debug('Creating FreshmakerEvent {0}'.format(fm_event['id']))
                event_params = dict(
                    id_=fm_event['id'],
                    event_type_id=fm_event['event_type_id'],
                    message_id=fm_event['message_id'],
                    state=fm_event['state'],
                    state_name=fm_event['state_name'],
                    state_reason=fm_event['state_reason'],
                    url=fm_event['url']
                )
                if fm_event.get('time_created'):
                    event_params['time_created'] = timestamp_to_datetime(fm_event['time_created'])
                if fm_event.get('time_done'):
                    event_params['time_done'] = timestamp_to_datetime(fm_event['time_created'])
                event = FreshmakerEvent.create_or_update(event_params)[0]

                log.debug('Creating Advisory {0}'.format(fm_event['search_key']))
                advisory = Advisory.get_or_create(dict(
                    id_=fm_event['search_key']
                ))[0]

                event.conditional_connect(event.triggered_by_advisory, advisory)

                for build_dict in fm_event['builds']:
                    # To handle a faulty container build in Freshmaker
                    if build_dict['build_id'] and int(build_dict['build_id']) < 0:
                        continue
                    log.debug('Creating FreshmakerBuild {0}'.format(build_dict['build_id']))
                    fb_params = dict(
                        id_=build_dict['id'],
                        dep_on=build_dict['dep_on'],
                        name=build_dict['name'],
                        original_nvr=build_dict['original_nvr'],
                        rebuilt_nvr=build_dict['rebuilt_nvr'],
                        state=build_dict['state'],
                        state_name=build_dict['state_name'],
                        state_reason=build_dict['state_reason'],
                        time_submitted=timestamp_to_datetime(build_dict['time_submitted']),
                        type_=build_dict['type'],
                        type_name=build_dict['type_name'],
                        url=build_dict['url']
                    )
                    if build_dict['time_completed']:
                        fb_params['time_completed'] = timestamp_to_datetime(
                            build_dict['time_completed'])
                    if build_dict['build_id']:
                        fb_params['build_id'] = build_dict['build_id']
                    fb = FreshmakerBuild.create_or_update(fb_params)[0]
                    event.requested_builds.connect(fb)

                    # The build ID obtained from Freshmaker API is actually a Koji task ID
                    task_result = None
                    if build_dict['build_id']:
                        task_result = self.get_koji_task_result(build_dict['build_id'])

                    if not task_result:
                        continue

                    # Extract the build ID from a task result
                    xml_root = ET.fromstring(task_result)
                    # TODO: Change this if a task can trigger multiple builds
                    try:
                        build_id = xml_root.find(".//*[name='koji_builds'].//string").text
                    except AttributeError:
                        build_id = None

                    if not build_id:
                        continue

                    log.debug('Creating ContainerKojiBuild {0}'.format(build_id))
                    build_params = {
                        'id_': build_id,
                        'original_nvr': build_dict['original_nvr']
                    }
                    try:
                        build = ContainerKojiBuild.create_or_update(build_params)[0]
                    except neomodel.exceptions.ConstraintValidationFailed:
                        # This must have errantly been created as a KojiBuild instead of a
                        # ContainerKojiBuild, so let's fix that.
                        build = KojiBuild.nodes.get_or_none(id_=build_id)
                        if not build:
                            # If there was a constraint validation failure and the build isn't just
                            # the wrong label, then we can't recover.
                            raise
                        build.add_label(ContainerKojiBuild.__label__)
                        build = ContainerKojiBuild.create_or_update(build_params)[0]

                    event.successful_koji_builds.connect(build)

            if rv_json['meta'].get('next'):
                fm_url = rv_json['meta']['next']
            else:
                break
Beispiel #6
0
    def query(self, sql, db='public', retry=None):
        """
        Send the SQL query to Teiid and return the rows as a list.

        :param str sql: the SQL query to send to the database
        :kwarg str db: the database name to query on
        :kwarg int retry: the number of times to retry a failed query. If this
        is not set, then the Teiid query will be repeated until it is successful.
        :return: a list of rows from Teiid. Each row is a dictionary
        with the column headers as the keys.
        :rtype: list
        """
        con = self.get_connection(db)
        cursor = con.cursor()
        if retry is not None and retry < 1:
            raise ValueError(
                'The retry keyword must contain a value greater than 0')

        if self._last_query_dt:
            now = datetime.utcnow()
            now_and_last_diff = now - self._last_query_dt
            if now_and_last_diff < timedelta(seconds=0.5):
                sleep(now_and_last_diff.total_seconds())

        log.debug('Querying Teiid DB "{0}" with SQL:\n{1}'.format(db, sql))

        fifteen_mins = 15 * 60
        backoff = 30
        attempts = 0
        while True:
            attempts += 1
            try:
                if attempts > 1:
                    # Restart the database connection after failed queries
                    con = self.get_connection(db, force_new=True)
                    cursor = con.cursor()
                cursor.execute(sql)
                self._last_query_dt = datetime.utcnow()
                break
            except psycopg2.OperationalError as e:
                if retry and attempts > retry:
                    raise
                else:
                    log.exception(e)
                    if backoff < fifteen_mins:
                        # Double the backoff time
                        backoff = backoff * 2
                    elif backoff > fifteen_mins:
                        # Max out the backoff time to 15 minutes
                        backoff = fifteen_mins
                    log.warning(
                        'The Teiid query failed on attempt {0}. Sleeping for {1} seconds.'
                        .format(attempts, backoff))
                    sleep(backoff)

        data = cursor.fetchall()
        # column header names
        cols = [t[0] for t in cursor.description or []]
        log.debug('Found the following columns: {}'.format(cols))
        log.debug('Received {} rows from Teiid'.format(len(data)))
        # build a return array with all columns
        return [dict(zip(cols, row)) for row in data]
Beispiel #7
0
    def _update_neo4j(neo4j_url, total_results, counter_and_results):
        """
        Update Neo4j results via mapping with multiprocessing.

        :param str neo4j_url: database url for Neo4j
        :param int total_results: the total number of results that will be processed. This is used
        for a logging statement about progress.
        :param tuple counter_and_results: a tuple where the first index is the current counter and
        the second index is a list of dictionaries representing results from Teiid
        """
        try:
            previous_total = counter_and_results[0]
            results = counter_and_results[1]
            # Since _update_neo4j will be run in a separate process, we must configure the database
            # URL every time the method is run.
            neomodel_config.DATABASE_URL = neo4j_url
            # Create a thread pool with 4 threads to speed up queries to cgit
            pool = ThreadPool(4)
            counter = 0
            for result in results:
                if counter % 200 == 0:
                    until = counter + 200
                    if until > len(results):
                        until = len(results)
                    # Because of the joins in the SQL query, we end up with several rows with the
                    # same commit hash and we only want to query cgit once per commit
                    unique_commits = set([(c['module'], c['sha'])
                                          for c in results[counter:until]])
                    log.debug(
                        'Getting the author email addresses from cgit in parallel '
                        'for results {0} to {1}'.format(counter, until))
                    repos_info = {
                        r['commit']: r
                        for r in pool.map(DistGitScraper._get_repo_info,
                                          unique_commits)
                    }
                    # This is no longer needed so it can be cleared to save RAM
                    del unique_commits
                counter += 1
                log.info('Processing commit entry {0}/{1}'.format(
                    previous_total + counter, total_results))
                repo_info = repos_info[result['sha']]
                if not repo_info.get('namespace'):
                    log.info(
                        'Skipping nodes creation with commit ID {0}'.format(
                            result['commit_id']))
                    continue

                log.debug(
                    'Creating nodes associated with commit ID {0}'.format(
                        result['commit_id']))
                repo = DistGitRepo.get_or_create({
                    'namespace':
                    repo_info['namespace'],
                    'name':
                    result['module']
                })[0]
                commit = DistGitCommit.create_or_update({
                    'author_date':
                    result['author_date'],
                    'commit_date':
                    result['commit_date'],
                    'hash_':
                    result['sha'],
                    # In case we get unicode characters in Python 2
                    'log_message':
                    bytes(result['log_message'], 'utf-8').decode()
                })[0]
                bug = BugzillaBug.get_or_create({'id_':
                                                 result['bugzilla_id']})[0]

                log.debug(
                    'Creating the user nodes associated with commit ID {0}'.
                    format(result['commit_id']))
                author = User.create_or_update({
                    'username':
                    repo_info['author_username'],
                    'email':
                    repo_info['author_email']
                })[0]

                log.debug(
                    'Creating the relationships associated with commit ID {0}'.
                    format(result['commit_id']))
                repo.commits.connect(commit)

                commit.conditional_connect(commit.author, author)

                if result['bugzilla_type'] == 'related':
                    commit.related_bugs.connect(bug)
                elif result['bugzilla_type'] == 'resolves':
                    commit.resolved_bugs.connect(bug)
                elif result['bugzilla_type'] == 'reverted':
                    commit.reverted_bugs.connect(bug)
                # This is no longer needed so it can be cleared to save RAM
                del repo_info
        finally:
            # Close the DB connection after this is done processing
            db.driver.close()
Beispiel #8
0
    def _get_repo_info(repo_and_commit):
        """
        Query cgit for the namespace, username and email of the author.

        :param tuple repo_and_commit: contains the repo and commit to query for
        :return: a JSON string of a dictionary with the keys namespace, author_username,
        author_email, and the commit
        :rtype: str
        """
        repo, commit = repo_and_commit
        log.debug(
            'Attempting to find the cgit URL for the commit "{0}" in repo "{1}"'
            .format(commit, repo))
        session = retry_session()
        rv = {'commit': commit}
        cgit_result = None
        # The tuple of namespaces to try when determining which namespace this git module belongs
        # to since this information isn't stored in GitBZ yet
        namespaces = ('rpms', 'containers', 'modules', 'tests')
        cgit_url = getenv('ESTUARY_CGIT_URL',
                          'http://pkgs.devel.redhat.com/cgit/')
        for namespace in namespaces:
            url = '{0}{1}/{2}/commit/?id={3}&dt=2'.format(
                cgit_url, namespace, repo, commit)
            log.debug('Trying the URL "{0}"'.format(url))
            try:
                cgit_result = session.get(url, timeout=15)
            except ConnectionError:
                log.error('The connection to "{0}" failed'.format(url))
                continue

            if cgit_result.status_code == 200:
                # If the repo is empty, cgit oddly returns a 200 status code, so let's correct the
                # status code so that the remainder of the code knows it's a bad request
                if 'Repository seems to be empty' in cgit_result.text:
                    cgit_result.status_code = 404
                else:
                    # If the repo is populated and a 200 status code is returned, then we can
                    # assume we found the correct repo
                    break

        if not cgit_result or cgit_result.status_code != 200:
            log.error(
                'Couldn\'t find the commit "{0}" for the repo "{1}" in the namespaces: {2}'
                .format(commit, repo, ', '.join(namespaces)))
            return rv

        log.debug(
            'Found the cgit URL "{0}" for the commit "{1}" in repo "{2}"'.
            format(url, commit, repo))
        rv['namespace'] = namespace

        # Start parsing the cgit content
        soup = BeautifulSoup(cgit_result.text, 'html.parser')
        # Workaround for BS4 in EL7 since `soup.find('th', string=person)` doesn't work in
        # that environment
        th_tags = soup.find_all('th')
        data_found = {'author': False}
        for th_tag in th_tags:
            if th_tag.string in ('author'):
                data_found[th_tag.string] = True
                username_key = '{0}_username'.format(th_tag.string)
                email_key = '{0}_email'.format(th_tag.string)
                rv[username_key], rv[
                    email_key] = DistGitScraper._parse_username_email_from_cgit(
                        th_tag, commit, namespace, repo)

            # If all the "th" elements we're interested in were parsed, then break from the loop
            # early
            if all(data_found.values()):
                break

        soup.decompose()
        return rv
Beispiel #9
0
    def query_api_and_update_neo4j(self):
        """
        Scrape the Freshmaker API and upload the data to Neo4j.

        :param str start_date: a datetime to start scraping data from
        """
        # Initialize session and url
        session = retry_session()
        fm_url = self.freshmaker_url
        while True:
            log.debug('Querying {0}'.format(fm_url))
            rv_json = session.get(fm_url, timeout=15).json()
            for fm_event in rv_json['items']:
                try:
                    int(fm_event['search_key'])
                except ValueError:
                    # Skip Freshmaker Events that don't have the search_key as the Advisory ID
                    continue
                event = FreshmakerEvent.create_or_update(dict(
                    id_=fm_event['id'],
                    event_type_id=fm_event['event_type_id'],
                    message_id=fm_event['message_id'],
                    state=fm_event['state'],
                    state_name=fm_event['state_name'],
                    state_reason=fm_event['state_reason'],
                    url=fm_event['url']
                ))[0]

                advisory = Advisory.get_or_create(dict(
                    id_=fm_event['search_key']
                ))[0]

                event.conditional_connect(event.triggered_by_advisory, advisory)

                for build_dict in fm_event['builds']:
                    # To handle a faulty container build in Freshmaker
                    if not build_dict['build_id'] or int(build_dict['build_id']) < 0:
                        continue

                    # The build ID obtained from Freshmaker API is actually a Koji task ID
                    task_result = self.get_koji_task_result(build_dict['build_id'])
                    if not task_result:
                        continue

                    # Extract the build ID from a task result
                    xml_root = ET.fromstring(task_result)
                    # TODO: Change this if a task can trigger multiple builds
                    try:
                        build_id = xml_root.find(".//*[name='koji_builds'].//string").text
                    except AttributeError:
                        build_id = None

                    if build_id:
                        build = ContainerKojiBuild.get_or_create(dict(
                            id_=build_id,
                            original_nvr=build_dict['original_nvr']
                        ))[0]
                        event.triggered_container_builds.connect(build)

            if rv_json['meta'].get('next'):
                fm_url = rv_json['meta']['next']
            else:
                break
Beispiel #10
0
    def update_neo4j(self, results):
        """
        Update Neo4j with the dist-git commit and push information from Teiid.

        :param list results: a list of dictionaries
        """
        pool = Pool(processes=8)
        counter = 0
        for result in results:
            if counter % 200 == 0:
                until = counter + 200
                if until > len(results):
                    until = len(results)
                # Because of the joins in the SQL query, we end up with several rows with the same
                # commit hash and we only want to query cgit once per commit
                unique_commits = set([(c['module'], c['sha'])
                                      for c in results[counter:until]])
                log.debug(
                    'Getting the author and committer email addresses from cgit in parallel '
                    'for results {0} to {1}'.format(counter, until))
                repos_info = {}
                for _r in pool.map(DistGitScraper._get_repo_info,
                                   unique_commits):
                    r = json.loads(_r)
                    repos_info[r['commit']] = r
                # This is no longer needed so it can be cleared to save RAM
                del unique_commits
                # A lot of RAM was allocated or used up, so let's call gc.collect() to ensure it
                # is removed
                gc.collect()
            counter += 1
            log.info('Processing commit and push entry {0}/{1}'.format(
                str(counter), str(len(results))))
            repo_info = repos_info[result['sha']]
            if not repo_info.get('namespace'):
                log.info(
                    'Skipping nodes creation with commit ID {0} and push ID {1}'
                    .format(result['commit_id'], result['push_id']))
                continue

            log.debug(
                'Creating nodes associated with commit ID {0} and push ID {1}'.
                format(result['commit_id'], result['push_id']))
            repo = DistGitRepo.get_or_create({
                'namespace':
                repo_info['namespace'],
                'name':
                result['module']
            })[0]
            branch_name = result['ref'].rsplit('/', 1)[1]
            branch = DistGitBranch.get_or_create({
                'name':
                branch_name,
                'repo_namespace':
                repo_info['namespace'],
                'repo_name':
                result['module']
            })[0]
            commit = DistGitCommit.create_or_update({
                'author_date':
                result['author_date'],
                'commit_date':
                result['commit_date'],
                'hash_':
                result['sha'],
                # In case we get unicode characters in Python 2
                'log_message':
                bytes(result['log_message'], 'utf-8').decode()
            })[0]
            push = DistGitPush.get_or_create({
                'id_': result['push_id'],
                'push_date': result['push_date'],
                'push_ip': result['push_ip']
            })[0]
            bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0]

            log.debug(
                'Creating the user nodes associated with commit ID {0} and push ID {1}'
                .format(result['commit_id'], result['push_id']))
            author = User.create_or_update({
                'username':
                repo_info['author_username'],
                'email':
                repo_info['author_email']
            })[0]
            committer = User.create_or_update({
                'username':
                repo_info['committer_username'],
                'email':
                repo_info['committer_email']
            })[0]
            pusher = User.get_or_create({'username': result['pusher']})[0]

            log.debug(
                'Creating the relationships associated with commit ID {0} and push ID {1}'
                .format(result['commit_id'], result['push_id']))
            repo.contributors.connect(author)
            repo.contributors.connect(committer)
            repo.contributors.connect(pusher)
            repo.commits.connect(commit)
            repo.pushes.connect(push)
            repo.branches.connect(branch)

            branch.contributors.connect(author)
            branch.contributors.connect(committer)
            branch.contributors.connect(pusher)
            branch.commits.connect(commit)
            branch.pushes.connect(push)

            push.conditional_connect(push.pusher, pusher)
            push.commits.connect(commit)

            commit.conditional_connect(commit.author, author)
            commit.conditional_connect(commit.committer, committer)

            if repo_info['parent']:
                parent_commit = DistGitCommit.get_or_create(
                    {'hash_': repo_info['parent']})[0]
                commit.conditional_connect(commit.parent, parent_commit)

            if result['bugzilla_type'] == 'related':
                commit.related_bugs.connect(bug)
            elif result['bugzilla_type'] == 'resolves':
                commit.resolved_bugs.connect(bug)
            elif result['bugzilla_type'] == 'reverted':
                commit.reverted_bugs.connect(bug)
            # This is no longer needed so it can be cleared to save RAM
            del repo_info
Beispiel #11
0
def _get_exception_users():
    """
    Get the list of users that are explicitly whitelisted.

    If the LDAP search fails, an empty set is returned.

    :return: a set of usernames
    :rtype: set
    :raise InternalServerError: if a required configuration value is not set or the connection to
        the LDAP server fails
    """
    # Import this here so it's not required for deployments with auth disabled
    import ldap3

    base_error = '%s is not set in the server configuration'
    ldap_uri = current_app.config.get('LDAP_URI')
    if not ldap_uri:
        log.error(base_error, 'LDAP_URI')
        raise InternalServerError()

    ldap_group_dn = current_app.config.get('LDAP_EXCEPTIONS_GROUP_DN')
    if not ldap_group_dn:
        log.error(base_error, 'LDAP_EXCEPTIONS_GROUP_DN')
        raise InternalServerError()

    if ldap_uri.startswith('ldaps://'):
        ca = current_app.config['LDAP_CA_CERTIFICATE']
        log.debug('Connecting to %s using SSL and the CA %s', ldap_uri, ca)
        tls = ldap3.Tls(ca_certs_file=ca, validate=ssl.CERT_REQUIRED)
        server = ldap3.Server(ldap_uri, use_ssl=True, tls=tls)
    else:
        log.debug('Connecting to %s without SSL', ldap_uri)
        server = ldap3.Server(ldap_uri)

    connection = ldap3.Connection(server)
    try:
        connection.open()
    except ldap3.core.exceptions.LDAPSocketOpenError:
        log.exception('The connection to %s failed', ldap_uri)
        raise InternalServerError()

    membership_attr = current_app.config['LDAP_GROUP_MEMBERSHIP_ATTRIBUTE']
    log.debug('Searching for the attribute %s on %s', ldap_group_dn,
              membership_attr)
    # Set the scope to base so only the group from LDAP_GROUP_DN is returned
    success = connection.search(ldap_group_dn,
                                '(cn=*)',
                                search_scope=ldap3.BASE,
                                attributes=[membership_attr])
    if not success:
        log.error(
            'The user exceptions list could not be determined because the search for the attribute '
            '%s on %s failed with %r',
            membership_attr,
            ldap_group_dn,
            connection.response,
        )
        return set()

    return set([
        dn.split('=')[1].split(',')[0]
        for dn in connection.response[0]['attributes'][membership_attr]
    ])